summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-03-29 08:28:50 -0700
committerJakub Kicinski <kuba@kernel.org>2024-03-29 08:28:51 -0700
commitda493dbb1f2a156a1b6d8d8a447f2c3affe43678 (patch)
tree0c59686980a7ef30db7510b22fc5245b052b35ae /include
parent50e2907ef8bb52cf80ecde9eec5c4dac07177146 (diff)
parent2aa0cff26ed53bc8d4855292b501759435ffdd38 (diff)
Merge branch 'af_unix-rework-gc'
Kuniyuki Iwashima says: ==================== af_unix: Rework GC. When we pass a file descriptor to an AF_UNIX socket via SCM_RIGTHS, the underlying struct file of the inflight fd gets its refcount bumped. If the fd is of an AF_UNIX socket, we need to track it in case it forms cyclic references. Let's say we send a fd of AF_UNIX socket A to B and vice versa and close() both sockets. When created, each socket's struct file initially has one reference. After the fd exchange, both refcounts are bumped up to 2. Then, close() decreases both to 1. From this point on, no one can touch the file/socket. However, the struct file has one refcount and thus never calls the release() function of the AF_UNIX socket. That's why we need to track all inflight AF_UNIX sockets and run garbage collection. This series replaces the current GC implementation that locks each inflight socket's receive queue and requires trickiness in other places. The new GC does not lock each socket's queue to minimise its effect and tries to be lightweight if there is no cyclic reference or no update in the shape of the inflight fd graph. The new implementation is based on Tarjan's Strongly Connected Components algorithm, and we will consider each inflight AF_UNIX socket as a vertex and its file descriptor as an edge in a directed graph. For the details, please see each patch. patch 1 - 3 : Add struct to express inflight socket graphs patch 4 : Optimse inflight fd counting patch 5 - 6 : Group SCC possibly forming a cycle patch 7 - 8 : Support embryo socket patch 9 - 11 : Make GC lightweight patch 12 - 13 : Detect dead cycle references patch 14 : Replace GC algorithm patch 15 : selftest After this series is applied, we can remove the two ugly tricks for race, scm_fp_dup() in unix_attach_fds() and spin_lock dance in unix_peek_fds() as done in patch 14/15 of v1. Also, we will add cond_resched_lock() in __unix_gc() and convert it to use a dedicated kthread instead of global system workqueue as suggested by Paolo in a v4 thread. v4: https://lore.kernel.org/netdev/20240301022243.73908-1-kuniyu@amazon.com/ v3: https://lore.kernel.org/netdev/20240223214003.17369-1-kuniyu@amazon.com/ v2: https://lore.kernel.org/netdev/20240216210556.65913-1-kuniyu@amazon.com/ v1: https://lore.kernel.org/netdev/20240203030058.60750-1-kuniyu@amazon.com/ ==================== Link: https://lore.kernel.org/r/20240325202425.60930-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/af_unix.h31
-rw-r--r--include/net/scm.h9
2 files changed, 32 insertions, 8 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 627ea8e2d915..226a8da2cbe3 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -19,12 +19,30 @@ static inline struct unix_sock *unix_get_socket(struct file *filp)
extern spinlock_t unix_gc_lock;
extern unsigned int unix_tot_inflight;
-
-void unix_inflight(struct user_struct *user, struct file *fp);
-void unix_notinflight(struct user_struct *user, struct file *fp);
+void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
+void unix_del_edges(struct scm_fp_list *fpl);
+void unix_update_edges(struct unix_sock *receiver);
+int unix_prepare_fpl(struct scm_fp_list *fpl);
+void unix_destroy_fpl(struct scm_fp_list *fpl);
void unix_gc(void);
void wait_for_unix_gc(struct scm_fp_list *fpl);
+struct unix_vertex {
+ struct list_head edges;
+ struct list_head entry;
+ struct list_head scc_entry;
+ unsigned long out_degree;
+ unsigned long index;
+ unsigned long scc_index;
+};
+
+struct unix_edge {
+ struct unix_sock *predecessor;
+ struct unix_sock *successor;
+ struct list_head vertex_entry;
+ struct list_head stack_entry;
+};
+
struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_MOD (256 - 1)
@@ -62,12 +80,9 @@ struct unix_sock {
struct path path;
struct mutex iolock, bindlock;
struct sock *peer;
- struct list_head link;
- unsigned long inflight;
+ struct sock *listener;
+ struct unix_vertex *vertex;
spinlock_t lock;
- unsigned long gc_flags;
-#define UNIX_GC_CANDIDATE 0
-#define UNIX_GC_MAYBE_CYCLE 1
struct socket_wq peer_wq;
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
diff --git a/include/net/scm.h b/include/net/scm.h
index 92276a2c5543..bbc5527809d1 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -23,10 +23,19 @@ struct scm_creds {
kgid_t gid;
};
+#ifdef CONFIG_UNIX
+struct unix_edge;
+#endif
+
struct scm_fp_list {
short count;
short count_unix;
short max;
+#ifdef CONFIG_UNIX
+ bool inflight;
+ struct list_head vertices;
+ struct unix_edge *edges;
+#endif
struct user_struct *user;
struct file *fp[SCM_MAX_FD];
};