Fix the timing of read and write events under kqueue (#9416)
Normally we execute the read event first and then the write event. When the barrier is set, we will do it reverse. However, under `kqueue`, if an `fd` has both read and write events, reading the event using `kevent` will generate two events, which will result in uncontrolled read and write timing. This also means that the guarantees of AOF `appendfsync` = `always` are not met on MacOS without this fix. The main change to this pr is to cache the events already obtained when reading them, so that if the same `fd` occurs again, only the mask in the cache is updated, rather than a new event is generated. This was exposed by the following test failure on MacOS: ``` *** [err]: AOF fsync always barrier issue in tests/integration/aof.tcl Expected 544 != 544 (context: type eval line 26 cmd {assert {$size1 != $size2}} proc ::test) ``` (cherry picked from commit 306a5ccd2d053ff653988b61a779e3cbce408874)
This commit is contained in:
parent
a2e8a3a241
commit
694a869e78
@ -36,8 +36,29 @@
|
||||
typedef struct aeApiState {
|
||||
int kqfd;
|
||||
struct kevent *events;
|
||||
|
||||
/* Events mask for merge read and write event.
|
||||
* To reduce memory consumption, we use 2 bits to store the mask
|
||||
* of an event, so that 1 byte will store the mask of 4 events. */
|
||||
char *eventsMask;
|
||||
} aeApiState;
|
||||
|
||||
#define EVENT_MASK_MALLOC_SIZE(sz) (((sz) + 3) / 4)
|
||||
#define EVENT_MASK_OFFSET(fd) ((fd) % 4 * 2)
|
||||
#define EVENT_MASK_ENCODE(fd, mask) (((mask) & 0x3) << EVENT_MASK_OFFSET(fd))
|
||||
|
||||
static inline int getEventMask(const char *eventsMask, int fd) {
|
||||
return (eventsMask[fd/4] >> EVENT_MASK_OFFSET(fd)) & 0x3;
|
||||
}
|
||||
|
||||
static inline void addEventMask(char *eventsMask, int fd, int mask) {
|
||||
eventsMask[fd/4] |= EVENT_MASK_ENCODE(fd, mask);
|
||||
}
|
||||
|
||||
static inline void resetEventMask(char *eventsMask, int fd) {
|
||||
eventsMask[fd/4] &= ~EVENT_MASK_ENCODE(fd, 0x3);
|
||||
}
|
||||
|
||||
static int aeApiCreate(aeEventLoop *eventLoop) {
|
||||
aeApiState *state = zmalloc(sizeof(aeApiState));
|
||||
|
||||
@ -54,6 +75,8 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
|
||||
return -1;
|
||||
}
|
||||
anetCloexec(state->kqfd);
|
||||
state->eventsMask = zmalloc(EVENT_MASK_MALLOC_SIZE(eventLoop->setsize));
|
||||
memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(eventLoop->setsize));
|
||||
eventLoop->apidata = state;
|
||||
return 0;
|
||||
}
|
||||
@ -62,6 +85,8 @@ static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
|
||||
aeApiState *state = eventLoop->apidata;
|
||||
|
||||
state->events = zrealloc(state->events, sizeof(struct kevent)*setsize);
|
||||
state->eventsMask = zrealloc(state->eventsMask, EVENT_MASK_MALLOC_SIZE(setsize));
|
||||
memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(setsize));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -70,6 +95,7 @@ static void aeApiFree(aeEventLoop *eventLoop) {
|
||||
|
||||
close(state->kqfd);
|
||||
zfree(state->events);
|
||||
zfree(state->eventsMask);
|
||||
zfree(state);
|
||||
}
|
||||
|
||||
@ -120,15 +146,37 @@ static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
|
||||
if (retval > 0) {
|
||||
int j;
|
||||
|
||||
numevents = retval;
|
||||
for(j = 0; j < numevents; j++) {
|
||||
int mask = 0;
|
||||
/* Normally we execute the read event first and then the write event.
|
||||
* When the barrier is set, we will do it reverse.
|
||||
*
|
||||
* However, under kqueue, read and write events would be separate
|
||||
* events, which would make it impossible to control the order of
|
||||
* reads and writes. So we store the event's mask we've got and merge
|
||||
* the same fd events later. */
|
||||
for (j = 0; j < retval; j++) {
|
||||
struct kevent *e = state->events+j;
|
||||
int fd = e->ident;
|
||||
int mask = 0;
|
||||
|
||||
if (e->filter == EVFILT_READ) mask |= AE_READABLE;
|
||||
if (e->filter == EVFILT_WRITE) mask |= AE_WRITABLE;
|
||||
eventLoop->fired[j].fd = e->ident;
|
||||
eventLoop->fired[j].mask = mask;
|
||||
if (e->filter == EVFILT_READ) mask = AE_READABLE;
|
||||
else if (e->filter == EVFILT_WRITE) mask = AE_WRITABLE;
|
||||
addEventMask(state->eventsMask, fd, mask);
|
||||
}
|
||||
|
||||
/* Re-traversal to merge read and write events, and set the fd's mask to
|
||||
* 0 so that events are not added again when the fd is encountered again. */
|
||||
numevents = 0;
|
||||
for (j = 0; j < retval; j++) {
|
||||
struct kevent *e = state->events+j;
|
||||
int fd = e->ident;
|
||||
int mask = getEventMask(state->eventsMask, fd);
|
||||
|
||||
if (mask) {
|
||||
eventLoop->fired[numevents].fd = fd;
|
||||
eventLoop->fired[numevents].mask = mask;
|
||||
resetEventMask(state->eventsMask, fd);
|
||||
numevents++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return numevents;
|
||||
|
Loading…
Reference in New Issue
Block a user