mirror of
https://github.com/bol-van/zapret.git
synced 2024-11-27 04:30:53 +03:00
1387 lines
31 KiB
C
1387 lines
31 KiB
C
|
#include "epollfd_ctx.h"
|
||
|
|
||
|
#include <sys/types.h>
|
||
|
|
||
|
#if defined(__FreeBSD__)
|
||
|
#include <sys/capsicum.h>
|
||
|
#endif
|
||
|
#include <sys/event.h>
|
||
|
#include <sys/ioctl.h>
|
||
|
#include <sys/queue.h>
|
||
|
#include <sys/socket.h>
|
||
|
#include <sys/stat.h>
|
||
|
|
||
|
#if defined(__DragonFly__)
|
||
|
/* For TAILQ_FOREACH_SAFE. */
|
||
|
#include <netproto/802_11/ieee80211_dragonfly.h>
|
||
|
#endif
|
||
|
|
||
|
#include <assert.h>
|
||
|
#include <errno.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include <limits.h>
|
||
|
#include <poll.h>
|
||
|
#include <unistd.h>
|
||
|
|
||
|
static RegisteredFDsNode *
|
||
|
registered_fds_node_create(int fd)
|
||
|
{
|
||
|
RegisteredFDsNode *node;
|
||
|
|
||
|
node = malloc(sizeof(*node));
|
||
|
if (!node) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
*node = (RegisteredFDsNode){.fd = fd, .self_pipe = {-1, -1}};
|
||
|
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
registered_fds_node_destroy(RegisteredFDsNode *node)
|
||
|
{
|
||
|
if (node->self_pipe[0] >= 0 && node->self_pipe[1] >= 0) {
|
||
|
(void)close(node->self_pipe[0]);
|
||
|
(void)close(node->self_pipe[1]);
|
||
|
}
|
||
|
|
||
|
free(node);
|
||
|
}
|
||
|
|
||
|
typedef struct {
|
||
|
int evfilt_read;
|
||
|
int evfilt_write;
|
||
|
int evfilt_except;
|
||
|
} NeededFilters;
|
||
|
|
||
|
static NeededFilters
|
||
|
get_needed_filters(RegisteredFDsNode *fd2_node)
|
||
|
{
|
||
|
NeededFilters needed_filters;
|
||
|
|
||
|
needed_filters.evfilt_except = 0;
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_FIFO) {
|
||
|
if (fd2_node->node_data.fifo.readable &&
|
||
|
fd2_node->node_data.fifo.writable) {
|
||
|
needed_filters.evfilt_read = !!(
|
||
|
fd2_node->events & EPOLLIN);
|
||
|
needed_filters.evfilt_write = !!(
|
||
|
fd2_node->events & EPOLLOUT);
|
||
|
|
||
|
if (fd2_node->events == 0) {
|
||
|
needed_filters.evfilt_read =
|
||
|
fd2_node->eof_state ? 1 : EV_CLEAR;
|
||
|
}
|
||
|
|
||
|
} else if (fd2_node->node_data.fifo.readable) {
|
||
|
needed_filters.evfilt_read = !!(
|
||
|
fd2_node->events & EPOLLIN);
|
||
|
needed_filters.evfilt_write = 0;
|
||
|
|
||
|
if (needed_filters.evfilt_read == 0) {
|
||
|
needed_filters.evfilt_read =
|
||
|
fd2_node->eof_state ? 1 : EV_CLEAR;
|
||
|
}
|
||
|
} else if (fd2_node->node_data.fifo.writable) {
|
||
|
needed_filters.evfilt_read = 0;
|
||
|
needed_filters.evfilt_write = !!(
|
||
|
fd2_node->events & EPOLLOUT);
|
||
|
|
||
|
if (needed_filters.evfilt_write == 0) {
|
||
|
needed_filters.evfilt_write =
|
||
|
fd2_node->eof_state ? 1 : EV_CLEAR;
|
||
|
}
|
||
|
} else {
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_KQUEUE) {
|
||
|
needed_filters.evfilt_read = !!(fd2_node->events & EPOLLIN);
|
||
|
needed_filters.evfilt_write = 0;
|
||
|
|
||
|
assert(fd2_node->eof_state == 0);
|
||
|
|
||
|
if (needed_filters.evfilt_read == 0) {
|
||
|
needed_filters.evfilt_read = EV_CLEAR;
|
||
|
}
|
||
|
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_SOCKET) {
|
||
|
needed_filters.evfilt_read = !!(fd2_node->events & EPOLLIN);
|
||
|
|
||
|
if (needed_filters.evfilt_read == 0 &&
|
||
|
(fd2_node->events & EPOLLRDHUP)) {
|
||
|
needed_filters.evfilt_read = (fd2_node->eof_state &
|
||
|
EOF_STATE_READ_EOF)
|
||
|
? 1
|
||
|
: EV_CLEAR;
|
||
|
}
|
||
|
|
||
|
#ifdef EVFILT_EXCEPT
|
||
|
needed_filters.evfilt_except = !!(fd2_node->events & EPOLLPRI);
|
||
|
#else
|
||
|
if (needed_filters.evfilt_read == 0 &&
|
||
|
(fd2_node->events & EPOLLPRI)) {
|
||
|
needed_filters.evfilt_read = fd2_node->pollpri_active
|
||
|
? 1
|
||
|
: EV_CLEAR;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
needed_filters.evfilt_write = !!(fd2_node->events & EPOLLOUT);
|
||
|
|
||
|
/* Let's use EVFILT_READ to drive the POLLHUP. */
|
||
|
if (fd2_node->eof_state ==
|
||
|
(EOF_STATE_READ_EOF | EOF_STATE_WRITE_EOF)) {
|
||
|
if (needed_filters.evfilt_read != 1 &&
|
||
|
needed_filters.evfilt_write != 1) {
|
||
|
needed_filters.evfilt_read = 1;
|
||
|
}
|
||
|
|
||
|
if (needed_filters.evfilt_read) {
|
||
|
needed_filters.evfilt_write = 0;
|
||
|
} else {
|
||
|
needed_filters.evfilt_read = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* We need something to detect POLLHUP. */
|
||
|
if (fd2_node->eof_state == 0 &&
|
||
|
needed_filters.evfilt_read == 0 &&
|
||
|
needed_filters.evfilt_write == 0) {
|
||
|
needed_filters.evfilt_read = EV_CLEAR;
|
||
|
}
|
||
|
|
||
|
if (fd2_node->eof_state == EOF_STATE_READ_EOF) {
|
||
|
if (needed_filters.evfilt_write == 0) {
|
||
|
needed_filters.evfilt_write = EV_CLEAR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (fd2_node->eof_state == EOF_STATE_WRITE_EOF) {
|
||
|
if (needed_filters.evfilt_read == 0) {
|
||
|
needed_filters.evfilt_read = EV_CLEAR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
needed_filters.evfilt_read = !!(fd2_node->events & EPOLLIN);
|
||
|
needed_filters.evfilt_write = !!(fd2_node->events & EPOLLOUT);
|
||
|
|
||
|
if (fd2_node->events == 0) {
|
||
|
needed_filters.evfilt_read = fd2_node->eof_state ? 1
|
||
|
: EV_CLEAR;
|
||
|
}
|
||
|
|
||
|
out:
|
||
|
if (fd2_node->is_edge_triggered) {
|
||
|
if (needed_filters.evfilt_read) {
|
||
|
needed_filters.evfilt_read = EV_CLEAR;
|
||
|
}
|
||
|
if (needed_filters.evfilt_write) {
|
||
|
needed_filters.evfilt_write = EV_CLEAR;
|
||
|
}
|
||
|
if (needed_filters.evfilt_except) {
|
||
|
needed_filters.evfilt_except = EV_CLEAR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
assert(needed_filters.evfilt_read || needed_filters.evfilt_write);
|
||
|
assert(needed_filters.evfilt_read == 0 ||
|
||
|
needed_filters.evfilt_read == 1 ||
|
||
|
needed_filters.evfilt_read == EV_CLEAR);
|
||
|
assert(needed_filters.evfilt_write == 0 ||
|
||
|
needed_filters.evfilt_write == 1 ||
|
||
|
needed_filters.evfilt_write == EV_CLEAR);
|
||
|
assert(needed_filters.evfilt_except == 0 ||
|
||
|
needed_filters.evfilt_except == 1 ||
|
||
|
needed_filters.evfilt_except == EV_CLEAR);
|
||
|
|
||
|
return needed_filters;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
registered_fds_node_update_flags_from_epoll_event(RegisteredFDsNode *fd2_node,
|
||
|
struct epoll_event *ev)
|
||
|
{
|
||
|
fd2_node->events = ev->events &
|
||
|
(EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLOUT);
|
||
|
fd2_node->data = ev->data;
|
||
|
fd2_node->is_edge_triggered = ev->events & EPOLLET;
|
||
|
fd2_node->is_oneshot = ev->events & EPOLLONESHOT;
|
||
|
|
||
|
if (fd2_node->is_oneshot) {
|
||
|
fd2_node->is_edge_triggered = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
registered_fds_node_add_self_trigger(RegisteredFDsNode *fd2_node,
|
||
|
EpollFDCtx *epollfd)
|
||
|
{
|
||
|
struct kevent kevs[1];
|
||
|
|
||
|
#ifdef EVFILT_USER
|
||
|
EV_SET(&kevs[0], (uintptr_t)fd2_node, EVFILT_USER, /**/
|
||
|
EV_ADD | EV_CLEAR, 0, 0, fd2_node);
|
||
|
#else
|
||
|
if (fd2_node->self_pipe[0] < 0 && fd2_node->self_pipe[1] < 0) {
|
||
|
if (pipe2(fd2_node->self_pipe, O_NONBLOCK | O_CLOEXEC) < 0) {
|
||
|
errno_t ec = errno;
|
||
|
fd2_node->self_pipe[0] = fd2_node->self_pipe[1] = -1;
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
assert(fd2_node->self_pipe[0] >= 0);
|
||
|
assert(fd2_node->self_pipe[1] >= 0);
|
||
|
}
|
||
|
|
||
|
EV_SET(&kevs[0], fd2_node->self_pipe[0], EVFILT_READ, /**/
|
||
|
EV_ADD | EV_CLEAR, 0, 0, fd2_node);
|
||
|
#endif
|
||
|
|
||
|
if (kevent(epollfd->kq, kevs, 1, NULL, 0, NULL) < 0) {
|
||
|
return errno;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
registered_fds_node_trigger_self(RegisteredFDsNode *fd2_node,
|
||
|
EpollFDCtx *epollfd)
|
||
|
{
|
||
|
#ifdef EVFILT_USER
|
||
|
struct kevent kevs[1];
|
||
|
EV_SET(&kevs[0], (uintptr_t)fd2_node, EVFILT_USER, /**/
|
||
|
0, NOTE_TRIGGER, 0, fd2_node);
|
||
|
(void)kevent(epollfd->kq, kevs, 1, NULL, 0, NULL);
|
||
|
#else
|
||
|
(void)epollfd;
|
||
|
assert(fd2_node->self_pipe[1] >= 0);
|
||
|
|
||
|
char c = 0;
|
||
|
(void)write(fd2_node->self_pipe[1], &c, 1);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
registered_fds_node_feed_event(RegisteredFDsNode *fd2_node,
|
||
|
EpollFDCtx *epollfd, struct kevent const *kev)
|
||
|
{
|
||
|
int revents = 0;
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_POLL) {
|
||
|
assert(fd2_node->revents == 0);
|
||
|
|
||
|
#ifdef EVFILT_USER
|
||
|
assert(kev->filter == EVFILT_USER);
|
||
|
#else
|
||
|
char c[32];
|
||
|
while (read(fd2_node->self_pipe[0], c, sizeof(c)) >= 0) {
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
struct pollfd pfd = {
|
||
|
.fd = fd2_node->fd,
|
||
|
.events = (short)fd2_node->events,
|
||
|
};
|
||
|
|
||
|
revents = poll(&pfd, 1, 0) < 0 ? EPOLLERR : pfd.revents;
|
||
|
|
||
|
fd2_node->revents = revents & POLLNVAL ? 0 : (uint32_t)revents;
|
||
|
assert(!(fd2_node->revents &
|
||
|
~(uint32_t)(POLLIN | POLLOUT | POLLERR | POLLHUP)));
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_FIFO &&
|
||
|
#ifdef EVFILT_USER
|
||
|
kev->filter == EVFILT_USER
|
||
|
#else
|
||
|
(fd2_node->self_pipe[0] >= 0 &&
|
||
|
kev->ident == (uintptr_t)fd2_node->self_pipe[0])
|
||
|
#endif
|
||
|
) {
|
||
|
assert(fd2_node->revents == 0);
|
||
|
|
||
|
assert(!fd2_node->has_evfilt_read);
|
||
|
assert(!fd2_node->has_evfilt_write);
|
||
|
assert(!fd2_node->has_evfilt_except);
|
||
|
|
||
|
NeededFilters needed_filters = get_needed_filters(fd2_node);
|
||
|
assert(needed_filters.evfilt_write);
|
||
|
|
||
|
struct kevent nkev[1];
|
||
|
EV_SET(&nkev[0], fd2_node->fd, EVFILT_WRITE,
|
||
|
EV_ADD | (needed_filters.evfilt_write & EV_CLEAR) |
|
||
|
EV_RECEIPT,
|
||
|
0, 0, fd2_node);
|
||
|
|
||
|
if (kevent(epollfd->kq, nkev, 1, nkev, 1, NULL) != 1 ||
|
||
|
nkev[0].data != 0) {
|
||
|
revents = EPOLLERR | EPOLLOUT;
|
||
|
|
||
|
if (!fd2_node->is_edge_triggered) {
|
||
|
registered_fds_node_trigger_self(fd2_node,
|
||
|
epollfd);
|
||
|
}
|
||
|
|
||
|
goto out;
|
||
|
} else {
|
||
|
fd2_node->has_evfilt_write = true;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef EVFILT_EXCEPT
|
||
|
assert(kev->filter == EVFILT_READ || kev->filter == EVFILT_WRITE ||
|
||
|
kev->filter == EVFILT_EXCEPT);
|
||
|
#else
|
||
|
assert(kev->filter == EVFILT_READ || kev->filter == EVFILT_WRITE);
|
||
|
#endif
|
||
|
assert((int)kev->ident == fd2_node->fd);
|
||
|
|
||
|
if (kev->filter == EVFILT_READ) {
|
||
|
revents |= EPOLLIN;
|
||
|
#ifndef EVFILT_EXCEPT
|
||
|
if (fd2_node->events & EPOLLPRI) {
|
||
|
struct pollfd pfd = {
|
||
|
.fd = fd2_node->fd,
|
||
|
.events = POLLPRI,
|
||
|
};
|
||
|
|
||
|
if ((poll(&pfd, 1, 0) == 1) &&
|
||
|
(pfd.revents & POLLPRI)) {
|
||
|
revents |= EPOLLPRI;
|
||
|
fd2_node->pollpri_active = true;
|
||
|
} else {
|
||
|
fd2_node->pollpri_active = false;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
} else if (kev->filter == EVFILT_WRITE) {
|
||
|
revents |= EPOLLOUT;
|
||
|
}
|
||
|
#ifdef EVFILT_EXCEPT
|
||
|
else if (kev->filter == EVFILT_EXCEPT) {
|
||
|
assert((kev->fflags & NOTE_OOB) != 0);
|
||
|
|
||
|
revents |= EPOLLPRI;
|
||
|
goto out;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_SOCKET) {
|
||
|
if (kev->filter == EVFILT_READ) {
|
||
|
if (kev->flags & EV_EOF) {
|
||
|
fd2_node->eof_state |= EOF_STATE_READ_EOF;
|
||
|
} else {
|
||
|
fd2_node->eof_state &= ~EOF_STATE_READ_EOF;
|
||
|
}
|
||
|
} else if (kev->filter == EVFILT_WRITE) {
|
||
|
if (kev->flags & EV_EOF) {
|
||
|
fd2_node->eof_state |= EOF_STATE_WRITE_EOF;
|
||
|
} else {
|
||
|
fd2_node->eof_state &= ~EOF_STATE_WRITE_EOF;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
if (kev->filter == EVFILT_READ) {
|
||
|
if (kev->flags & EV_EOF) {
|
||
|
fd2_node->eof_state = EOF_STATE_READ_EOF |
|
||
|
EOF_STATE_WRITE_EOF;
|
||
|
} else {
|
||
|
fd2_node->eof_state = 0;
|
||
|
}
|
||
|
} else if (kev->filter == EVFILT_WRITE) {
|
||
|
if (kev->flags & EV_EOF) {
|
||
|
fd2_node->eof_state = EOF_STATE_READ_EOF |
|
||
|
EOF_STATE_WRITE_EOF;
|
||
|
} else {
|
||
|
fd2_node->eof_state = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (kev->flags & EV_ERROR) {
|
||
|
revents |= EPOLLERR;
|
||
|
}
|
||
|
|
||
|
if (kev->flags & EV_EOF) {
|
||
|
if (kev->fflags) {
|
||
|
revents |= EPOLLERR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (fd2_node->eof_state) {
|
||
|
int epoll_event;
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_FIFO) {
|
||
|
if (kev->filter == EVFILT_READ) {
|
||
|
epoll_event = EPOLLHUP;
|
||
|
if (kev->data == 0) {
|
||
|
revents &= ~EPOLLIN;
|
||
|
}
|
||
|
} else if (kev->filter == EVFILT_WRITE) {
|
||
|
if (fd2_node->has_evfilt_read) {
|
||
|
assert(
|
||
|
fd2_node->node_data.fifo.readable);
|
||
|
assert(
|
||
|
fd2_node->node_data.fifo.writable);
|
||
|
|
||
|
/*
|
||
|
* Any non-zero revents must have come
|
||
|
* from the EVFILT_READ filter. It
|
||
|
* could either be "POLLIN",
|
||
|
* "POLLIN | POLLHUP" or "POLLHUP", so
|
||
|
* we know if there is data to read.
|
||
|
* But we also know that the FIFO is
|
||
|
* done, so set POLLHUP because it
|
||
|
* would be set anyway.
|
||
|
*
|
||
|
* If revents is zero, not setting it
|
||
|
* will simply ignore this EVFILT_WRITE
|
||
|
* and wait for the next EVFILT_READ
|
||
|
* (which will be EOF).
|
||
|
*/
|
||
|
|
||
|
if (fd2_node->revents != 0) {
|
||
|
fd2_node->revents |= POLLHUP;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
epoll_event = EPOLLERR;
|
||
|
if (kev->data < PIPE_BUF) {
|
||
|
revents &= ~EPOLLOUT;
|
||
|
}
|
||
|
} else {
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
} else if (fd2_node->node_type == NODE_TYPE_SOCKET) {
|
||
|
epoll_event = 0;
|
||
|
|
||
|
if (fd2_node->eof_state & EOF_STATE_READ_EOF) {
|
||
|
epoll_event |= EPOLLIN | EPOLLRDHUP;
|
||
|
}
|
||
|
|
||
|
if (fd2_node->eof_state & EOF_STATE_WRITE_EOF) {
|
||
|
epoll_event |= EPOLLOUT;
|
||
|
}
|
||
|
|
||
|
if (fd2_node->eof_state ==
|
||
|
(EOF_STATE_READ_EOF | EOF_STATE_WRITE_EOF)) {
|
||
|
epoll_event |= EPOLLHUP;
|
||
|
}
|
||
|
} else {
|
||
|
epoll_event = EPOLLHUP;
|
||
|
}
|
||
|
|
||
|
revents |= epoll_event;
|
||
|
}
|
||
|
|
||
|
out:
|
||
|
fd2_node->revents |= (uint32_t)revents;
|
||
|
fd2_node->revents &= (fd2_node->events | EPOLLHUP | EPOLLERR);
|
||
|
|
||
|
if (fd2_node->revents && (uintptr_t)fd2_node->fd == kev->ident) {
|
||
|
if (kev->filter == EVFILT_READ) {
|
||
|
fd2_node->got_evfilt_read = true;
|
||
|
} else if (kev->filter == EVFILT_WRITE) {
|
||
|
fd2_node->got_evfilt_write = true;
|
||
|
}
|
||
|
#ifdef EVFILT_EXCEPT
|
||
|
else if (kev->filter == EVFILT_EXCEPT) {
|
||
|
fd2_node->got_evfilt_except = true;
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
registered_fds_node_register_for_completion(int *kq,
|
||
|
RegisteredFDsNode *fd2_node)
|
||
|
{
|
||
|
struct kevent kev[3];
|
||
|
int n = 0;
|
||
|
|
||
|
if (fd2_node->has_evfilt_read && !fd2_node->got_evfilt_read) {
|
||
|
EV_SET(&kev[n++], fd2_node->fd, EVFILT_READ,
|
||
|
EV_ADD | EV_ONESHOT | EV_RECEIPT, 0, 0, fd2_node);
|
||
|
}
|
||
|
if (fd2_node->has_evfilt_write && !fd2_node->got_evfilt_write) {
|
||
|
EV_SET(&kev[n++], fd2_node->fd, EVFILT_WRITE,
|
||
|
EV_ADD | EV_ONESHOT | EV_RECEIPT, 0, 0, fd2_node);
|
||
|
}
|
||
|
if (fd2_node->has_evfilt_except && !fd2_node->got_evfilt_except) {
|
||
|
#ifdef EVFILT_EXCEPT
|
||
|
EV_SET(&kev[n++], fd2_node->fd, EVFILT_EXCEPT,
|
||
|
EV_ADD | EV_ONESHOT | EV_RECEIPT, NOTE_OOB, 0, fd2_node);
|
||
|
#else
|
||
|
assert(0);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
if (n == 0) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (*kq < 0) {
|
||
|
*kq = kqueue();
|
||
|
}
|
||
|
|
||
|
if (*kq >= 0) {
|
||
|
(void)kevent(*kq, kev, n, kev, n, NULL);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
registered_fds_node_complete(int kq)
|
||
|
{
|
||
|
if (kq < 0) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
struct kevent kevs[32];
|
||
|
int n;
|
||
|
|
||
|
while ((n = kevent(kq, /**/
|
||
|
NULL, 0, kevs, 32, &(struct timespec){0, 0})) > 0) {
|
||
|
for (int i = 0; i < n; ++i) {
|
||
|
RegisteredFDsNode *fd2_node =
|
||
|
(RegisteredFDsNode *)kevs[i].udata;
|
||
|
|
||
|
registered_fds_node_feed_event(fd2_node, NULL,
|
||
|
&kevs[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
(void)close(kq);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
fd_cmp(RegisteredFDsNode *e1, RegisteredFDsNode *e2)
|
||
|
{
|
||
|
return (e1->fd < e2->fd) ? -1 : (e1->fd > e2->fd);
|
||
|
}
|
||
|
|
||
|
RB_PROTOTYPE_STATIC(registered_fds_set_, registered_fds_node_, entry, fd_cmp);
|
||
|
RB_GENERATE_STATIC(registered_fds_set_, registered_fds_node_, entry, fd_cmp);
|
||
|
|
||
|
errno_t
|
||
|
epollfd_ctx_init(EpollFDCtx *epollfd, int kq)
|
||
|
{
|
||
|
errno_t ec;
|
||
|
|
||
|
*epollfd = (EpollFDCtx){
|
||
|
.kq = kq,
|
||
|
.registered_fds = RB_INITIALIZER(®istered_fds),
|
||
|
.self_pipe = {-1, -1},
|
||
|
};
|
||
|
|
||
|
TAILQ_INIT(&epollfd->poll_fds);
|
||
|
|
||
|
if ((ec = pthread_mutex_init(&epollfd->mutex, NULL)) != 0) {
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
if ((ec = pthread_mutex_init(&epollfd->nr_polling_threads_mutex,
|
||
|
NULL)) != 0) {
|
||
|
pthread_mutex_destroy(&epollfd->mutex);
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
if ((ec = pthread_cond_init(&epollfd->nr_polling_threads_cond,
|
||
|
NULL)) != 0) {
|
||
|
pthread_mutex_destroy(&epollfd->nr_polling_threads_mutex);
|
||
|
pthread_mutex_destroy(&epollfd->mutex);
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
errno_t
|
||
|
epollfd_ctx_terminate(EpollFDCtx *epollfd)
|
||
|
{
|
||
|
errno_t ec = 0;
|
||
|
errno_t ec_local;
|
||
|
|
||
|
ec_local = pthread_cond_destroy(&epollfd->nr_polling_threads_cond);
|
||
|
ec = ec ? ec : ec_local;
|
||
|
ec_local = pthread_mutex_destroy(&epollfd->nr_polling_threads_mutex);
|
||
|
ec = ec ? ec : ec_local;
|
||
|
ec_local = pthread_mutex_destroy(&epollfd->mutex);
|
||
|
ec = ec ? ec : ec_local;
|
||
|
|
||
|
RegisteredFDsNode *np;
|
||
|
RegisteredFDsNode *np_temp;
|
||
|
RB_FOREACH_SAFE(np, registered_fds_set_, &epollfd->registered_fds,
|
||
|
np_temp)
|
||
|
{
|
||
|
RB_REMOVE(registered_fds_set_, &epollfd->registered_fds, np);
|
||
|
registered_fds_node_destroy(np);
|
||
|
}
|
||
|
|
||
|
free(epollfd->kevs);
|
||
|
free(epollfd->pfds);
|
||
|
if (epollfd->self_pipe[0] >= 0 && epollfd->self_pipe[1] >= 0) {
|
||
|
(void)close(epollfd->self_pipe[0]);
|
||
|
(void)close(epollfd->self_pipe[1]);
|
||
|
}
|
||
|
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx_make_kevs_space(EpollFDCtx *epollfd, size_t cnt)
|
||
|
{
|
||
|
assert(cnt > 0);
|
||
|
|
||
|
if (cnt <= epollfd->kevs_length) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
size_t size;
|
||
|
if (__builtin_mul_overflow(cnt, sizeof(struct kevent), &size)) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
|
||
|
struct kevent *new_kevs = realloc(epollfd->kevs, size);
|
||
|
if (!new_kevs) {
|
||
|
return errno;
|
||
|
}
|
||
|
|
||
|
epollfd->kevs = new_kevs;
|
||
|
epollfd->kevs_length = cnt;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx_make_pfds_space(EpollFDCtx *epollfd)
|
||
|
{
|
||
|
size_t cnt = 1 + epollfd->poll_fds_size;
|
||
|
|
||
|
if (cnt <= epollfd->pfds_length) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
size_t size;
|
||
|
if (__builtin_mul_overflow(cnt, sizeof(struct pollfd), &size)) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
|
||
|
struct pollfd *new_pfds = realloc(epollfd->pfds, size);
|
||
|
if (!new_pfds) {
|
||
|
return errno;
|
||
|
}
|
||
|
|
||
|
epollfd->pfds = new_pfds;
|
||
|
epollfd->pfds_length = cnt;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx__add_self_trigger(EpollFDCtx *epollfd)
|
||
|
{
|
||
|
struct kevent kevs[1];
|
||
|
|
||
|
#ifdef EVFILT_USER
|
||
|
EV_SET(&kevs[0], 0, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0);
|
||
|
#else
|
||
|
if (epollfd->self_pipe[0] < 0 && epollfd->self_pipe[1] < 0) {
|
||
|
if (pipe2(epollfd->self_pipe, O_NONBLOCK | O_CLOEXEC) < 0) {
|
||
|
errno_t ec = errno;
|
||
|
epollfd->self_pipe[0] = epollfd->self_pipe[1] = -1;
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
assert(epollfd->self_pipe[0] >= 0);
|
||
|
assert(epollfd->self_pipe[1] >= 0);
|
||
|
}
|
||
|
|
||
|
EV_SET(&kevs[0], epollfd->self_pipe[0], EVFILT_READ, /**/
|
||
|
EV_ADD | EV_CLEAR, 0, 0, 0);
|
||
|
#endif
|
||
|
|
||
|
if (kevent(epollfd->kq, kevs, 1, NULL, 0, NULL) < 0) {
|
||
|
return errno;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
epollfd_ctx__trigger_self(EpollFDCtx *epollfd)
|
||
|
{
|
||
|
#ifdef EVFILT_USER
|
||
|
struct kevent kevs[1];
|
||
|
EV_SET(&kevs[0], 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0);
|
||
|
(void)kevent(epollfd->kq, kevs, 1, NULL, 0, NULL);
|
||
|
#else
|
||
|
assert(epollfd->self_pipe[0] >= 0);
|
||
|
assert(epollfd->self_pipe[1] >= 0);
|
||
|
|
||
|
char c = 0;
|
||
|
(void)write(epollfd->self_pipe[1], &c, 1);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
epollfd_ctx__trigger_repoll(EpollFDCtx *epollfd)
|
||
|
{
|
||
|
(void)pthread_mutex_lock(&epollfd->nr_polling_threads_mutex);
|
||
|
unsigned long nr_polling_threads = epollfd->nr_polling_threads;
|
||
|
(void)pthread_mutex_unlock(&epollfd->nr_polling_threads_mutex);
|
||
|
|
||
|
if (nr_polling_threads == 0) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
epollfd_ctx__trigger_self(epollfd);
|
||
|
|
||
|
(void)pthread_mutex_lock(&epollfd->nr_polling_threads_mutex);
|
||
|
while (epollfd->nr_polling_threads != 0) {
|
||
|
pthread_cond_wait(&epollfd->nr_polling_threads_cond,
|
||
|
&epollfd->nr_polling_threads_mutex);
|
||
|
}
|
||
|
(void)pthread_mutex_unlock(&epollfd->nr_polling_threads_mutex);
|
||
|
|
||
|
#ifndef EVFILT_USER
|
||
|
char c[32];
|
||
|
while (read(epollfd->self_pipe[0], c, sizeof(c)) >= 0) {
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
epollfd_ctx__remove_node_from_kq(EpollFDCtx *epollfd,
|
||
|
RegisteredFDsNode *fd2_node)
|
||
|
{
|
||
|
if (fd2_node->is_on_pollfd_list) {
|
||
|
TAILQ_REMOVE(&epollfd->poll_fds, fd2_node, pollfd_list_entry);
|
||
|
fd2_node->is_on_pollfd_list = false;
|
||
|
assert(epollfd->poll_fds_size != 0);
|
||
|
--epollfd->poll_fds_size;
|
||
|
|
||
|
epollfd_ctx__trigger_repoll(epollfd);
|
||
|
}
|
||
|
|
||
|
if (fd2_node->self_pipe[0] >= 0) {
|
||
|
struct kevent kevs[1];
|
||
|
EV_SET(&kevs[0], fd2_node->self_pipe[0], EVFILT_READ, /**/
|
||
|
EV_DELETE, 0, 0, 0);
|
||
|
(void)kevent(epollfd->kq, kevs, 1, NULL, 0, NULL);
|
||
|
|
||
|
char c[32];
|
||
|
while (read(fd2_node->self_pipe[0], c, sizeof(c)) >= 0) {
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (fd2_node->node_type == NODE_TYPE_POLL) {
|
||
|
#ifdef EVFILT_USER
|
||
|
struct kevent kevs[1];
|
||
|
EV_SET(&kevs[0], (uintptr_t)fd2_node, EVFILT_USER, /**/
|
||
|
EV_DELETE, 0, 0, 0);
|
||
|
(void)kevent(epollfd->kq, kevs, 1, NULL, 0, NULL);
|
||
|
#endif
|
||
|
} else {
|
||
|
struct kevent kevs[3];
|
||
|
int fd2 = fd2_node->fd;
|
||
|
|
||
|
EV_SET(&kevs[0], fd2, EVFILT_READ, /**/
|
||
|
EV_DELETE | EV_RECEIPT, 0, 0, 0);
|
||
|
EV_SET(&kevs[1], fd2, EVFILT_WRITE, /**/
|
||
|
EV_DELETE | EV_RECEIPT, 0, 0, 0);
|
||
|
#ifdef EVFILT_USER
|
||
|
EV_SET(&kevs[2], (uintptr_t)fd2_node, EVFILT_USER, /**/
|
||
|
EV_DELETE | EV_RECEIPT, 0, 0, 0);
|
||
|
#endif
|
||
|
(void)kevent(epollfd->kq, kevs, 3, kevs, 3, NULL);
|
||
|
|
||
|
fd2_node->has_evfilt_read = false;
|
||
|
fd2_node->has_evfilt_write = false;
|
||
|
fd2_node->has_evfilt_except = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx__register_events(EpollFDCtx *epollfd, RegisteredFDsNode *fd2_node)
|
||
|
{
|
||
|
errno_t ec = 0;
|
||
|
|
||
|
/* Only sockets support EPOLLRDHUP and EPOLLPRI. */
|
||
|
if (fd2_node->node_type != NODE_TYPE_SOCKET) {
|
||
|
fd2_node->events &= ~(uint32_t)EPOLLRDHUP;
|
||
|
fd2_node->events &= ~(uint32_t)EPOLLPRI;
|
||
|
}
|
||
|
|
||
|
int const fd2 = fd2_node->fd;
|
||
|
struct kevent kev[4] = {
|
||
|
{.data = 0},
|
||
|
{.data = 0},
|
||
|
{.data = 0},
|
||
|
{.data = 0},
|
||
|
};
|
||
|
|
||
|
assert(fd2 >= 0);
|
||
|
|
||
|
int evfilt_read_index = -1;
|
||
|
int evfilt_write_index = -1;
|
||
|
|
||
|
if (fd2_node->node_type != NODE_TYPE_POLL) {
|
||
|
if (fd2_node->is_registered) {
|
||
|
epollfd_ctx__remove_node_from_kq(epollfd, fd2_node);
|
||
|
}
|
||
|
|
||
|
int n = 0;
|
||
|
|
||
|
assert(!fd2_node->has_evfilt_read);
|
||
|
assert(!fd2_node->has_evfilt_write);
|
||
|
assert(!fd2_node->has_evfilt_except);
|
||
|
|
||
|
NeededFilters needed_filters = get_needed_filters(fd2_node);
|
||
|
|
||
|
if (needed_filters.evfilt_read) {
|
||
|
fd2_node->has_evfilt_read = true;
|
||
|
evfilt_read_index = n;
|
||
|
EV_SET(&kev[n++], fd2, EVFILT_READ,
|
||
|
EV_ADD | (needed_filters.evfilt_read & EV_CLEAR),
|
||
|
0, 0, fd2_node);
|
||
|
}
|
||
|
if (needed_filters.evfilt_write) {
|
||
|
fd2_node->has_evfilt_write = true;
|
||
|
evfilt_write_index = n;
|
||
|
EV_SET(&kev[n++], fd2, EVFILT_WRITE,
|
||
|
EV_ADD | (needed_filters.evfilt_write & EV_CLEAR),
|
||
|
0, 0, fd2_node);
|
||
|
}
|
||
|
|
||
|
assert(n != 0);
|
||
|
|
||
|
if (needed_filters.evfilt_except) {
|
||
|
#ifdef EVFILT_EXCEPT
|
||
|
fd2_node->has_evfilt_except = true;
|
||
|
EV_SET(&kev[n++], fd2, EVFILT_EXCEPT,
|
||
|
EV_ADD | (needed_filters.evfilt_except & EV_CLEAR),
|
||
|
NOTE_OOB, 0, fd2_node);
|
||
|
#else
|
||
|
assert(0);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < n; ++i) {
|
||
|
kev[i].flags |= EV_RECEIPT;
|
||
|
}
|
||
|
|
||
|
int ret = kevent(epollfd->kq, kev, n, kev, n, NULL);
|
||
|
if (ret < 0) {
|
||
|
ec = errno;
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
assert(ret == n);
|
||
|
|
||
|
for (int i = 0; i < n; ++i) {
|
||
|
assert((kev[i].flags & EV_ERROR) != 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Check for fds that only support poll. */
|
||
|
if (((fd2_node->node_type == NODE_TYPE_OTHER &&
|
||
|
kev[0].data == ENODEV) ||
|
||
|
fd2_node->node_type == NODE_TYPE_POLL)) {
|
||
|
|
||
|
assert((fd2_node->events & /**/
|
||
|
~(uint32_t)(EPOLLIN | EPOLLOUT)) == 0);
|
||
|
assert(fd2_node->is_registered ||
|
||
|
fd2_node->node_type == NODE_TYPE_OTHER);
|
||
|
|
||
|
fd2_node->has_evfilt_read = false;
|
||
|
fd2_node->has_evfilt_write = false;
|
||
|
fd2_node->has_evfilt_except = false;
|
||
|
|
||
|
fd2_node->node_type = NODE_TYPE_POLL;
|
||
|
|
||
|
if ((ec = registered_fds_node_add_self_trigger(fd2_node,
|
||
|
epollfd)) != 0) {
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
if (!fd2_node->is_on_pollfd_list) {
|
||
|
if ((ec = /**/
|
||
|
epollfd_ctx__add_self_trigger(epollfd)) != 0) {
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
TAILQ_INSERT_TAIL(&epollfd->poll_fds, fd2_node,
|
||
|
pollfd_list_entry);
|
||
|
fd2_node->is_on_pollfd_list = true;
|
||
|
++epollfd->poll_fds_size;
|
||
|
}
|
||
|
|
||
|
/* This is outside the above if because poll ".events" might
|
||
|
* have changed which needs a retriggering. */
|
||
|
epollfd_ctx__trigger_repoll(epollfd);
|
||
|
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < 4; ++i) {
|
||
|
if (kev[i].data != 0) {
|
||
|
if ((kev[i].data == EPIPE
|
||
|
#ifdef __NetBSD__
|
||
|
|| kev[i].data == EBADF
|
||
|
#endif
|
||
|
) &&
|
||
|
i == evfilt_write_index &&
|
||
|
fd2_node->node_type == NODE_TYPE_FIFO) {
|
||
|
|
||
|
fd2_node->eof_state = EOF_STATE_READ_EOF |
|
||
|
EOF_STATE_WRITE_EOF;
|
||
|
fd2_node->has_evfilt_write = false;
|
||
|
|
||
|
if (evfilt_read_index < 0) {
|
||
|
if ((ec = registered_fds_node_add_self_trigger(
|
||
|
fd2_node, epollfd)) != 0) {
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
registered_fds_node_trigger_self(
|
||
|
fd2_node, epollfd);
|
||
|
}
|
||
|
} else {
|
||
|
ec = (int)kev[i].data;
|
||
|
goto out;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ec = 0;
|
||
|
|
||
|
out:
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
epollfd_ctx_remove_node(EpollFDCtx *epollfd, RegisteredFDsNode *fd2_node)
|
||
|
{
|
||
|
epollfd_ctx__remove_node_from_kq(epollfd, fd2_node);
|
||
|
|
||
|
RB_REMOVE(registered_fds_set_, &epollfd->registered_fds, fd2_node);
|
||
|
assert(epollfd->registered_fds_size > 0);
|
||
|
--epollfd->registered_fds_size;
|
||
|
|
||
|
registered_fds_node_destroy(fd2_node);
|
||
|
}
|
||
|
|
||
|
#if defined(__FreeBSD__)
|
||
|
static void
|
||
|
modify_fifo_rights_from_capabilities(RegisteredFDsNode *fd2_node)
|
||
|
{
|
||
|
assert(fd2_node->node_data.fifo.readable);
|
||
|
assert(fd2_node->node_data.fifo.writable);
|
||
|
|
||
|
cap_rights_t rights;
|
||
|
memset(&rights, 0, sizeof(rights));
|
||
|
|
||
|
if (cap_rights_get(fd2_node->fd, &rights) == 0) {
|
||
|
cap_rights_t test_rights;
|
||
|
|
||
|
cap_rights_init(&test_rights, CAP_READ);
|
||
|
bool has_read_rights = cap_rights_contains(&rights,
|
||
|
&test_rights);
|
||
|
|
||
|
cap_rights_init(&test_rights, CAP_WRITE);
|
||
|
bool has_write_rights = cap_rights_contains(&rights,
|
||
|
&test_rights);
|
||
|
|
||
|
if (has_read_rights != has_write_rights) {
|
||
|
fd2_node->node_data.fifo.readable = has_read_rights;
|
||
|
fd2_node->node_data.fifo.writable = has_write_rights;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx_add_node(EpollFDCtx *epollfd, int fd2, struct epoll_event *ev,
|
||
|
struct stat const *statbuf)
|
||
|
{
|
||
|
RegisteredFDsNode *fd2_node = registered_fds_node_create(fd2);
|
||
|
if (!fd2_node) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
|
||
|
if (S_ISFIFO(statbuf->st_mode)) {
|
||
|
int tmp;
|
||
|
|
||
|
if (ioctl(fd2_node->fd, FIONREAD, &tmp) < 0 &&
|
||
|
errno == ENOTTY) {
|
||
|
#ifdef __FreeBSD__
|
||
|
/*
|
||
|
* On FreeBSD we need to distinguish between kqueues
|
||
|
* and native eventfds.
|
||
|
*/
|
||
|
if (ioctl(fd2_node->fd, FIONBIO, &tmp) < 0 &&
|
||
|
errno == ENOTTY) {
|
||
|
fd2_node->node_type = NODE_TYPE_KQUEUE;
|
||
|
} else {
|
||
|
fd2_node->node_type = NODE_TYPE_OTHER;
|
||
|
}
|
||
|
#else
|
||
|
fd2_node->node_type = NODE_TYPE_KQUEUE;
|
||
|
#endif
|
||
|
} else {
|
||
|
fd2_node->node_type = NODE_TYPE_FIFO;
|
||
|
|
||
|
int fl = fcntl(fd2, F_GETFL, 0);
|
||
|
if (fl < 0) {
|
||
|
errno_t ec = errno;
|
||
|
registered_fds_node_destroy(fd2_node);
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
fl &= O_ACCMODE;
|
||
|
|
||
|
if (fl == O_RDWR) {
|
||
|
fd2_node->node_data.fifo.readable = true;
|
||
|
fd2_node->node_data.fifo.writable = true;
|
||
|
#if defined(__FreeBSD__)
|
||
|
modify_fifo_rights_from_capabilities(fd2_node);
|
||
|
#endif
|
||
|
} else if (fl == O_WRONLY) {
|
||
|
fd2_node->node_data.fifo.writable = true;
|
||
|
} else if (fl == O_RDONLY) {
|
||
|
fd2_node->node_data.fifo.readable = true;
|
||
|
} else {
|
||
|
registered_fds_node_destroy(fd2_node);
|
||
|
return EINVAL;
|
||
|
}
|
||
|
}
|
||
|
} else if (S_ISSOCK(statbuf->st_mode)) {
|
||
|
fd2_node->node_type = NODE_TYPE_SOCKET;
|
||
|
} else {
|
||
|
/* May also be NODE_TYPE_POLL,
|
||
|
will be checked when registering. */
|
||
|
fd2_node->node_type = NODE_TYPE_OTHER;
|
||
|
}
|
||
|
|
||
|
registered_fds_node_update_flags_from_epoll_event(fd2_node, ev);
|
||
|
|
||
|
void *colliding_node = RB_INSERT(registered_fds_set_,
|
||
|
&epollfd->registered_fds, fd2_node);
|
||
|
(void)colliding_node;
|
||
|
assert(colliding_node == NULL);
|
||
|
++epollfd->registered_fds_size;
|
||
|
|
||
|
errno_t ec = epollfd_ctx__register_events(epollfd, fd2_node);
|
||
|
if (ec != 0) {
|
||
|
epollfd_ctx_remove_node(epollfd, fd2_node);
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
fd2_node->is_registered = true;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx_modify_node(EpollFDCtx *epollfd, RegisteredFDsNode *fd2_node,
|
||
|
struct epoll_event *ev)
|
||
|
{
|
||
|
registered_fds_node_update_flags_from_epoll_event(fd2_node, ev);
|
||
|
|
||
|
assert(fd2_node->is_registered);
|
||
|
|
||
|
errno_t ec = epollfd_ctx__register_events(epollfd, fd2_node);
|
||
|
if (ec != 0) {
|
||
|
epollfd_ctx_remove_node(epollfd, fd2_node);
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx_ctl_impl(EpollFDCtx *epollfd, int op, int fd2,
|
||
|
struct epoll_event *ev)
|
||
|
{
|
||
|
assert(op == EPOLL_CTL_DEL || ev != NULL);
|
||
|
|
||
|
if (epollfd->kq == fd2) {
|
||
|
return EINVAL;
|
||
|
}
|
||
|
|
||
|
if (op != EPOLL_CTL_DEL &&
|
||
|
((ev->events &
|
||
|
~(uint32_t)(EPOLLIN | EPOLLOUT | EPOLLRDHUP | /**/
|
||
|
EPOLLPRI | /* unsupported by FreeBSD's kqueue! */
|
||
|
EPOLLHUP | EPOLLERR | /**/
|
||
|
EPOLLET | EPOLLONESHOT)))) {
|
||
|
return EINVAL;
|
||
|
}
|
||
|
|
||
|
RegisteredFDsNode *fd2_node;
|
||
|
{
|
||
|
RegisteredFDsNode find;
|
||
|
find.fd = fd2;
|
||
|
|
||
|
fd2_node = RB_FIND(registered_fds_set_, /**/
|
||
|
&epollfd->registered_fds, &find);
|
||
|
}
|
||
|
|
||
|
struct stat statbuf;
|
||
|
if (fstat(fd2, &statbuf) < 0) {
|
||
|
errno_t ec = errno;
|
||
|
|
||
|
/* If the fstat fails for any reason we must clear
|
||
|
* internal state to avoid EEXIST errors in future
|
||
|
* calls to epoll_ctl. */
|
||
|
if (fd2_node) {
|
||
|
epollfd_ctx_remove_node(epollfd, fd2_node);
|
||
|
}
|
||
|
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
errno_t ec;
|
||
|
|
||
|
if (op == EPOLL_CTL_ADD) {
|
||
|
ec = fd2_node
|
||
|
? EEXIST
|
||
|
: epollfd_ctx_add_node(epollfd, fd2, ev, &statbuf);
|
||
|
} else if (op == EPOLL_CTL_DEL) {
|
||
|
ec = !fd2_node
|
||
|
? ENOENT
|
||
|
: (epollfd_ctx_remove_node(epollfd, fd2_node), 0);
|
||
|
} else if (op == EPOLL_CTL_MOD) {
|
||
|
ec = !fd2_node
|
||
|
? ENOENT
|
||
|
: epollfd_ctx_modify_node(epollfd, fd2_node, ev);
|
||
|
} else {
|
||
|
ec = EINVAL;
|
||
|
}
|
||
|
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
epollfd_ctx_fill_pollfds(EpollFDCtx *epollfd, struct pollfd *pfds)
|
||
|
{
|
||
|
pfds[0] = (struct pollfd){.fd = epollfd->kq, .events = POLLIN};
|
||
|
|
||
|
RegisteredFDsNode *poll_node;
|
||
|
size_t i = 1;
|
||
|
TAILQ_FOREACH(poll_node, &epollfd->poll_fds, pollfd_list_entry)
|
||
|
{
|
||
|
pfds[i++] = (struct pollfd){
|
||
|
.fd = poll_node->fd,
|
||
|
.events = poll_node->node_type == NODE_TYPE_POLL
|
||
|
? (short)poll_node->events
|
||
|
: POLLPRI,
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
errno_t
|
||
|
epollfd_ctx_ctl(EpollFDCtx *epollfd, int op, int fd2, struct epoll_event *ev)
|
||
|
{
|
||
|
errno_t ec;
|
||
|
|
||
|
(void)pthread_mutex_lock(&epollfd->mutex);
|
||
|
ec = epollfd_ctx_ctl_impl(epollfd, op, fd2, ev);
|
||
|
(void)pthread_mutex_unlock(&epollfd->mutex);
|
||
|
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
static errno_t
|
||
|
epollfd_ctx_wait_impl(EpollFDCtx *epollfd, struct epoll_event *ev, int cnt,
|
||
|
int *actual_cnt)
|
||
|
{
|
||
|
errno_t ec;
|
||
|
|
||
|
assert(cnt >= 1);
|
||
|
|
||
|
ec = epollfd_ctx_make_pfds_space(epollfd);
|
||
|
if (ec != 0) {
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
epollfd_ctx_fill_pollfds(epollfd, epollfd->pfds);
|
||
|
|
||
|
int n = poll(epollfd->pfds, (nfds_t)(1 + epollfd->poll_fds_size), 0);
|
||
|
if (n < 0) {
|
||
|
return errno;
|
||
|
}
|
||
|
if (n == 0) {
|
||
|
*actual_cnt = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
{
|
||
|
RegisteredFDsNode *poll_node, *tmp_poll_node;
|
||
|
size_t i = 1;
|
||
|
TAILQ_FOREACH_SAFE(poll_node, &epollfd->poll_fds,
|
||
|
pollfd_list_entry, tmp_poll_node)
|
||
|
{
|
||
|
struct pollfd *pfd = &epollfd->pfds[i++];
|
||
|
|
||
|
if (pfd->revents & POLLNVAL) {
|
||
|
epollfd_ctx_remove_node(epollfd, poll_node);
|
||
|
} else if (pfd->revents) {
|
||
|
registered_fds_node_trigger_self(poll_node,
|
||
|
epollfd);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
again:;
|
||
|
|
||
|
/*
|
||
|
* Each registered fd can produce a maximum of 3 kevents. If
|
||
|
* the provided space in 'ev' is large enough to hold results
|
||
|
* for all registered fds, provide enough space for the kevent
|
||
|
* call as well. Add some wiggle room for the 'poll only fd'
|
||
|
* notification mechanism.
|
||
|
*/
|
||
|
if ((size_t)cnt >= epollfd->registered_fds_size) {
|
||
|
if (__builtin_add_overflow(cnt, 1, &cnt)) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
if (__builtin_mul_overflow(cnt, 3, &cnt)) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ec = epollfd_ctx_make_kevs_space(epollfd, (size_t)cnt);
|
||
|
if (ec != 0) {
|
||
|
return ec;
|
||
|
}
|
||
|
|
||
|
struct kevent *kevs = epollfd->kevs;
|
||
|
assert(kevs != NULL);
|
||
|
|
||
|
n = kevent(epollfd->kq, NULL, 0, kevs, cnt, &(struct timespec){0, 0});
|
||
|
if (n < 0) {
|
||
|
return errno;
|
||
|
}
|
||
|
|
||
|
int j = 0;
|
||
|
|
||
|
for (int i = 0; i < n; ++i) {
|
||
|
RegisteredFDsNode *fd2_node =
|
||
|
(RegisteredFDsNode *)kevs[i].udata;
|
||
|
|
||
|
if (!fd2_node) {
|
||
|
#ifdef EVFILT_USER
|
||
|
assert(kevs[i].filter == EVFILT_USER);
|
||
|
#else
|
||
|
assert(kevs[i].filter == EVFILT_READ);
|
||
|
#endif
|
||
|
assert(kevs[i].udata == 0);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
uint32_t old_revents = fd2_node->revents;
|
||
|
NeededFilters old_needed_filters = get_needed_filters(
|
||
|
fd2_node);
|
||
|
|
||
|
registered_fds_node_feed_event(fd2_node, epollfd, &kevs[i]);
|
||
|
|
||
|
if (fd2_node->node_type != NODE_TYPE_POLL &&
|
||
|
!(fd2_node->is_edge_triggered &&
|
||
|
fd2_node->eof_state ==
|
||
|
(EOF_STATE_READ_EOF | EOF_STATE_WRITE_EOF) &&
|
||
|
fd2_node->node_type != NODE_TYPE_FIFO)) {
|
||
|
|
||
|
NeededFilters needed_filters = get_needed_filters(
|
||
|
fd2_node);
|
||
|
|
||
|
if (old_needed_filters.evfilt_read !=
|
||
|
needed_filters.evfilt_read ||
|
||
|
old_needed_filters.evfilt_write !=
|
||
|
needed_filters.evfilt_write) {
|
||
|
|
||
|
if (epollfd_ctx__register_events(epollfd,
|
||
|
fd2_node) != 0) {
|
||
|
epollfd_ctx__remove_node_from_kq(
|
||
|
epollfd, fd2_node);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (fd2_node->revents && !old_revents) {
|
||
|
ev[j++].data.ptr = fd2_node;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
int completion_kq = -1;
|
||
|
|
||
|
for (int i = 0; i < j; ++i) {
|
||
|
RegisteredFDsNode *fd2_node =
|
||
|
(RegisteredFDsNode *)ev[i].data.ptr;
|
||
|
|
||
|
if (n == cnt || fd2_node->is_edge_triggered) {
|
||
|
registered_fds_node_register_for_completion(
|
||
|
&completion_kq, fd2_node);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
registered_fds_node_complete(completion_kq);
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < j; ++i) {
|
||
|
RegisteredFDsNode *fd2_node =
|
||
|
(RegisteredFDsNode *)ev[i].data.ptr;
|
||
|
|
||
|
ev[i].events = fd2_node->revents;
|
||
|
ev[i].data = fd2_node->data;
|
||
|
|
||
|
fd2_node->revents = 0;
|
||
|
fd2_node->got_evfilt_read = false;
|
||
|
fd2_node->got_evfilt_write = false;
|
||
|
fd2_node->got_evfilt_except = false;
|
||
|
|
||
|
if (fd2_node->is_oneshot) {
|
||
|
epollfd_ctx__remove_node_from_kq(epollfd, fd2_node);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (n && j == 0) {
|
||
|
goto again;
|
||
|
}
|
||
|
|
||
|
*actual_cnt = j;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
errno_t
|
||
|
epollfd_ctx_wait(EpollFDCtx *epollfd, struct epoll_event *ev, int cnt,
|
||
|
int *actual_cnt)
|
||
|
{
|
||
|
errno_t ec;
|
||
|
|
||
|
(void)pthread_mutex_lock(&epollfd->mutex);
|
||
|
ec = epollfd_ctx_wait_impl(epollfd, ev, cnt, actual_cnt);
|
||
|
(void)pthread_mutex_unlock(&epollfd->mutex);
|
||
|
|
||
|
return ec;
|
||
|
}
|