diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2024-03-29 08:28:50 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2024-03-29 08:28:51 -0700 |
| commit | da493dbb1f2a156a1b6d8d8a447f2c3affe43678 (patch) | |
| tree | 0c59686980a7ef30db7510b22fc5245b052b35ae /include | |
| parent | 50e2907ef8bb52cf80ecde9eec5c4dac07177146 (diff) | |
| parent | 2aa0cff26ed53bc8d4855292b501759435ffdd38 (diff) | |
Merge branch 'af_unix-rework-gc'
Kuniyuki Iwashima says:
====================
af_unix: Rework GC.
When we pass a file descriptor to an AF_UNIX socket via SCM_RIGTHS,
the underlying struct file of the inflight fd gets its refcount bumped.
If the fd is of an AF_UNIX socket, we need to track it in case it forms
cyclic references.
Let's say we send a fd of AF_UNIX socket A to B and vice versa and
close() both sockets.
When created, each socket's struct file initially has one reference.
After the fd exchange, both refcounts are bumped up to 2. Then, close()
decreases both to 1. From this point on, no one can touch the file/socket.
However, the struct file has one refcount and thus never calls the
release() function of the AF_UNIX socket.
That's why we need to track all inflight AF_UNIX sockets and run garbage
collection.
This series replaces the current GC implementation that locks each inflight
socket's receive queue and requires trickiness in other places.
The new GC does not lock each socket's queue to minimise its effect and
tries to be lightweight if there is no cyclic reference or no update in
the shape of the inflight fd graph.
The new implementation is based on Tarjan's Strongly Connected Components
algorithm, and we will consider each inflight AF_UNIX socket as a vertex
and its file descriptor as an edge in a directed graph.
For the details, please see each patch.
patch 1 - 3 : Add struct to express inflight socket graphs
patch 4 : Optimse inflight fd counting
patch 5 - 6 : Group SCC possibly forming a cycle
patch 7 - 8 : Support embryo socket
patch 9 - 11 : Make GC lightweight
patch 12 - 13 : Detect dead cycle references
patch 14 : Replace GC algorithm
patch 15 : selftest
After this series is applied, we can remove the two ugly tricks for race,
scm_fp_dup() in unix_attach_fds() and spin_lock dance in unix_peek_fds()
as done in patch 14/15 of v1.
Also, we will add cond_resched_lock() in __unix_gc() and convert it to
use a dedicated kthread instead of global system workqueue as suggested
by Paolo in a v4 thread.
v4: https://lore.kernel.org/netdev/20240301022243.73908-1-kuniyu@amazon.com/
v3: https://lore.kernel.org/netdev/20240223214003.17369-1-kuniyu@amazon.com/
v2: https://lore.kernel.org/netdev/20240216210556.65913-1-kuniyu@amazon.com/
v1: https://lore.kernel.org/netdev/20240203030058.60750-1-kuniyu@amazon.com/
====================
Link: https://lore.kernel.org/r/20240325202425.60930-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/af_unix.h | 31 | ||||
| -rw-r--r-- | include/net/scm.h | 9 |
2 files changed, 32 insertions, 8 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 627ea8e2d915..226a8da2cbe3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -19,12 +19,30 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) extern spinlock_t unix_gc_lock; extern unsigned int unix_tot_inflight; - -void unix_inflight(struct user_struct *user, struct file *fp); -void unix_notinflight(struct user_struct *user, struct file *fp); +void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver); +void unix_del_edges(struct scm_fp_list *fpl); +void unix_update_edges(struct unix_sock *receiver); +int unix_prepare_fpl(struct scm_fp_list *fpl); +void unix_destroy_fpl(struct scm_fp_list *fpl); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); +struct unix_vertex { + struct list_head edges; + struct list_head entry; + struct list_head scc_entry; + unsigned long out_degree; + unsigned long index; + unsigned long scc_index; +}; + +struct unix_edge { + struct unix_sock *predecessor; + struct unix_sock *successor; + struct list_head vertex_entry; + struct list_head stack_entry; +}; + struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) @@ -62,12 +80,9 @@ struct unix_sock { struct path path; struct mutex iolock, bindlock; struct sock *peer; - struct list_head link; - unsigned long inflight; + struct sock *listener; + struct unix_vertex *vertex; spinlock_t lock; - unsigned long gc_flags; -#define UNIX_GC_CANDIDATE 0 -#define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; diff --git a/include/net/scm.h b/include/net/scm.h index 92276a2c5543..bbc5527809d1 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -23,10 +23,19 @@ struct scm_creds { kgid_t gid; }; +#ifdef CONFIG_UNIX +struct unix_edge; +#endif + struct scm_fp_list { short count; short count_unix; short max; +#ifdef CONFIG_UNIX + bool inflight; + struct list_head vertices; + struct unix_edge *edges; +#endif struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; |
