diff options
| author | Christian Brauner <brauner@kernel.org> | 2026-06-04 10:25:14 +0200 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2026-06-04 10:25:14 +0200 |
| commit | b466bfe0ec0ff80994b0fa3ffe7bc36f0ce6be4b (patch) | |
| tree | 1e65bf935c10395b928dfccc132c604f5413ee91 | |
| parent | 70a03a385de2b8f0fa54dbc70bdc3ed176853d1c (diff) | |
| parent | 0c4aefe3c2d0f272a2ad73699a12d4446ffdbe7b (diff) | |
Merge patch series "eventpoll: Fix epoll_wait() report false negative"
Nam Cao <namcao@linutronix.de> says:
While staring at epoll, I noticed ep_events_available() looks wrong. I
wrote a small program to confirm, and yes it is definitely wrong.
This series adds a reproducer to kselftest, and fix the bug.
* patches from https://patch.msgid.link/cover.1780422137.git.namcao@linutronix.de:
eventpoll: Fix epoll_wait() report false negative
selftests/eventpoll: Add test for multiple waiters
Link: https://patch.msgid.link/cover.1780422137.git.namcao@linutronix.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
| -rw-r--r-- | fs/eventpoll.c | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c | 45 |
2 files changed, 64 insertions, 1 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index baa97d0edade..df364a8783b5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -38,6 +38,7 @@ #include <linux/compat.h> #include <linux/rculist.h> #include <linux/capability.h> +#include <linux/seqlock.h> #include <net/busy_poll.h> /* @@ -312,6 +313,9 @@ struct eventpoll { /* Lock which protects rdllist and ovflist */ spinlock_t lock; + /* Protect switching between rdllist and ovflist */ + seqcount_spinlock_t seq; + /* RB tree root used to store monitored fd structs */ struct rb_root_cached rbr; @@ -590,7 +594,10 @@ static inline void epi_clear_ovflist(struct epitem *epi) /* True iff @ep has ready events that epoll_wait() might harvest. */ static inline bool ep_events_available(struct eventpoll *ep) { - return !list_empty_careful(&ep->rdllist) || ep_is_scanning(ep); + unsigned int seq = read_seqcount_begin(&ep->seq); + + return !list_empty_careful(&ep->rdllist) || ep_is_scanning(ep) || + read_seqcount_retry(&ep->seq, seq); } #ifdef CONFIG_NET_RX_BUSY_POLL @@ -947,8 +954,12 @@ static void ep_start_scan(struct eventpoll *ep, struct list_head *scan_batch) */ lockdep_assert_irqs_enabled(); spin_lock_irq(&ep->lock); + write_seqcount_begin(&ep->seq); + list_splice_init(&ep->rdllist, scan_batch); ep_enter_scan(ep); + + write_seqcount_end(&ep->seq); spin_unlock_irq(&ep->lock); } @@ -979,6 +990,9 @@ static void ep_done_scan(struct eventpoll *ep, ep_pm_stay_awake(epi); } } + + write_seqcount_begin(&ep->seq); + /* Back out of scan mode; callbacks target ep->rdllist again. */ ep_exit_scan(ep); @@ -986,6 +1000,9 @@ static void ep_done_scan(struct eventpoll *ep, * Quickly re-inject items left on "scan_batch". */ list_splice(scan_batch, &ep->rdllist); + + write_seqcount_end(&ep->seq); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { @@ -1405,6 +1422,7 @@ static int ep_alloc(struct eventpoll **pep) mutex_init(&ep->mtx); spin_lock_init(&ep->lock); + seqcount_spinlock_init(&ep->seq, &ep->lock); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index 8bc57a2ef966..f6f1a7ff01b0 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3493,4 +3493,49 @@ TEST(epoll64) close(ctx.sfd[1]); } +static void *epoll65_wait(void *ctx_) +{ + struct epoll_mtcontext *ctx = ctx_; + struct epoll_event event; + + for (int i = 0; i < 100000; ++i) { + if (!epoll_wait(ctx->efd[0], &event, 1, 0)) + return (void *)ENODATA; + } + + return (void *)0; +} + +TEST(epoll65) +{ + struct epoll_mtcontext ctx; + struct epoll_event event; + int64_t dummy_data = 99; + pthread_t threads[64]; + uintptr_t ret; + int i, err; + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + ctx.efd[1] = eventfd(0, 0); + ASSERT_GE(ctx.efd[1], 0); + + event.events = EPOLLIN; + err = epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &event); + ASSERT_EQ(err, 0); + + write(ctx.efd[1], &dummy_data, sizeof(dummy_data)); + + for (i = 0; i < ARRAY_SIZE(threads); ++i) + ASSERT_EQ(pthread_create(&threads[i], NULL, epoll65_wait, &ctx), 0); + + for (i = 0; i < ARRAY_SIZE(threads); ++i) { + ASSERT_EQ(pthread_join(threads[i], (void **)&ret), 0); + ASSERT_EQ(ret, 0); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); +} + TEST_HARNESS_MAIN |
