diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-15 04:15:31 +0530 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-15 04:15:31 +0530 |
| commit | e8a56d6fc828bb569fa2dd33c3e6eb16a165b097 (patch) | |
| tree | 1742623129141d47c68509271cf8183133e48849 | |
| parent | 79169a1624253363fed3e9a447b77e50bb226206 (diff) | |
| parent | 3df5153c5f123d6018c82a24341ccd99c79d64a0 (diff) | |
Merge tag 'pull-dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull dcache updates from Al Viro:
- d_alloc_parallel() API change (Neil's with my changes)
- NORCU fixes
- Reorganization and simplification of dentry eviction logic
- Simplifying rcu_read_lock() scopes in fs/dcache.c
- Secondary roots work - getting rid of NFS fake root dentries and
dealing with remaining shrink_dcache_for_umount() and
shrink_dentry_list() races
- making cursors NORCU (surprisingly easy)
* tag 'pull-dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (22 commits)
make cursors NORCU
nfs: get rid of fake root dentries
wind ->s_roots via ->d_sib instead of ->d_hash
shrink_dentry_tree(): unify the calls of shrink_dentry_list()
shrinking rcu_read_lock() scope in d_alloc_parallel()
d_walk(): shrink rcu_read_lock() scope
document dentry_kill()
adjust calling conventions of lock_for_kill(), fold __dentry_kill() into dentry_kill()
Document rcu_read_lock() use in select_collect2()
Shift rcu_read_{,un}lock() inside fast_dput()
simplify safety for lock_for_kill() slowpath
fold lock_for_kill() and __dentry_kill() into common helper
fold lock_for_kill() into shrink_kill()
shrink_dentry_list(): start with removing from shrink list
d_prune_aliases(): make sure to skip NORCU aliases
kill d_dispose_if_unused()
make to_shrink_list() return whether it has moved dentry to list
select_collect(): ignore dentries on shrink lists if they have positive refcounts
find_acceptable_alias(): skip NORCU aliases with zero refcount
fix a race between d_find_any_alias() and final dput() of NORCU dentries
...
| -rw-r--r-- | Documentation/filesystems/porting.rst | 17 | ||||
| -rw-r--r-- | fs/afs/dir_silly.c | 4 | ||||
| -rw-r--r-- | fs/dcache.c | 549 | ||||
| -rw-r--r-- | fs/exportfs/expfs.c | 9 | ||||
| -rw-r--r-- | fs/file_table.c | 2 | ||||
| -rw-r--r-- | fs/fuse/dir.c | 2 | ||||
| -rw-r--r-- | fs/fuse/readdir.c | 3 | ||||
| -rw-r--r-- | fs/namei.c | 6 | ||||
| -rw-r--r-- | fs/nfs/dir.c | 6 | ||||
| -rw-r--r-- | fs/nfs/getroot.c | 35 | ||||
| -rw-r--r-- | fs/nfs/unlink.c | 3 | ||||
| -rw-r--r-- | fs/proc/base.c | 3 | ||||
| -rw-r--r-- | fs/proc/proc_sysctl.c | 3 | ||||
| -rw-r--r-- | fs/smb/client/readdir.c | 3 | ||||
| -rw-r--r-- | fs/super.c | 1 | ||||
| -rw-r--r-- | include/linux/dcache.h | 31 | ||||
| -rw-r--r-- | include/linux/fs/super_types.h | 3 | ||||
| -rw-r--r-- | include/linux/nfs_xdr.h | 1 |
18 files changed, 387 insertions, 294 deletions
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index f546b1d3897f..d13f0a23c882 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1384,3 +1384,20 @@ for_each_alias(dentry, inode) instead of hlist_for_each_entry; better yet, see if any of the exported primitives could be used instead of the entire loop. You still need to hold ->i_lock of the inode over either form of manual loop. + +--- + +**mandatory** + +d_alloc_parallel() no longer requires a waitqueue_head. + +--- + +**mandatory** + +d_dispose_if_unused() is gone; use __move_to_shrink_list() if you really +need that functionality, but watch out for memory safety issues - just +as with d_dispose_if_unused() these are not trivial; with this variant +of API it's more explicit, since grabbing ->d_lock is caller-side, but +d_dispose_if_unused() had all the same issues. It's a low-level primitive; +use only if you have no alternative. diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index a748fd133faf..982bb6ec15f0 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -248,13 +248,11 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode) struct dentry *alias; int ret; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode); down_read(&dvnode->rmdir_lock); - alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq); + alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name); if (IS_ERR(alias)) { up_read(&dvnode->rmdir_lock); return 0; diff --git a/fs/dcache.c b/fs/dcache.c index d6f505313205..3e9af9de7074 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -43,8 +43,8 @@ * - i_dentry, d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table - * s_roots bl list spinlock protects: - * - the s_roots list (see __d_drop) + * s_roots_lock protects: + * - the s_roots list (see __d_move()/dentry_unlist()/d_obtain_root()) * dentry->d_sb->s_dentry_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: @@ -462,14 +462,10 @@ static void dentry_unlink_inode(struct dentry * dentry) raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); - hlist_del_init(&dentry->d_alias); + __hlist_del(&dentry->d_alias); /* * dentry becomes negative, so the space occupied by ->d_alias - * belongs to ->waiters now; we could use __hlist_del() instead - * of hlist_del_init(), if not for the stunt pulled by nfs - * dummy root dentries - positive dentry *not* included into - * the alias list of its inode. Open-coding hlist_del_init() - * and removing zeroing would be too clumsy... + * belongs to ->waiters now. */ dentry->waiters = NULL; raw_write_seqcount_end(&dentry->d_seq); @@ -568,16 +564,7 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, static void ___d_drop(struct dentry *dentry) { - struct hlist_bl_head *b; - /* - * Hashed dentries are normally on the dentry hashtable, - * with the exception of those newly allocated by - * d_obtain_root, which are always IS_ROOT: - */ - if (unlikely(IS_ROOT(dentry))) - b = &dentry->d_sb->s_roots; - else - b = d_hash(dentry->d_name.hash); + struct hlist_bl_head *b = d_hash(dentry->d_name.hash); hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); @@ -636,12 +623,14 @@ struct completion_list { * Use ->waiters for a single-linked list of struct completion_list of * waiters. */ -static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p) +static inline bool d_add_waiter(struct dentry *dentry, struct completion_list *p) { - struct completion_list *v = dentry->waiters; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) + return false; init_completion(&p->completion); - p->next = v; + p->next = dentry->waiters; dentry->waiters = p; + return true; } static inline void d_complete_waiters(struct dentry *dentry) @@ -658,6 +647,13 @@ static inline void d_complete_waiters(struct dentry *dentry) } } +static void unlink_secondary_root(struct dentry *dentry) +{ + spin_lock(&dentry->d_sb->s_roots_lock); + hlist_del_init(&dentry->d_sib); + spin_unlock(&dentry->d_sb->s_roots_lock); +} + static inline void dentry_unlist(struct dentry *dentry) { struct dentry *next; @@ -669,6 +665,10 @@ static inline void dentry_unlist(struct dentry *dentry) d_complete_waiters(dentry); if (unlikely(hlist_unhashed(&dentry->d_sib))) return; + if (unlikely(IS_ROOT(dentry))) { + unlink_secondary_root(dentry); // secondary root goes away + return; + } __hlist_del(&dentry->d_sib); /* * Cursors can move around the list of children. While we'd been @@ -697,11 +697,113 @@ static inline void dentry_unlist(struct dentry *dentry) } } -static struct dentry *__dentry_kill(struct dentry *dentry) +/* + * Prepare locking environment for killing a dentry. + * Called under dentry->d_lock. To proceed with eviction of a positive dentry + * we need to get ->i_lock of the inode of that dentry as well. + * However, ->i_lock nests outside of ->d_lock, so if trylock fails we might + * have to drop and regain the latter. Dentry state can change while its + * ->d_lock is not held - it might end up getting killed, becoming busy, + * negative, etc., so we need to be careful. + * + * For NORCU dentries memory safety relies upon having only one call of + * lock_for_kill() in the entire lifetime of dentry and dentry_free() being + * called only by the caller of lock_for_kill(). That this is NORCU-specific; + * the crucial part is that refcounts of NORCU dentries never grow once having + * dropped to zero. + * + * For normal dentries we can not assume that there won't be concurrent calls + * of dentry_free() - dentry might end up being evicted by another thread + * while we are dropping/retaking locks on the slow path. Memory safety is + * provided by keeping the RCU read-side critical area contiguous with + * an explicit rcu_read_lock() scope bridging over the break in spinlock scopes. + * + * If dentry is busy (or busy dying, or already dead), unlock dentry + * and return false. Otherwise, return true and have that dentry's + * inode (if any) locked in addition to dentry itself. + */ +static bool lock_for_kill(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + if (unlikely(dentry->d_lockref.count)) { + spin_unlock(&dentry->d_lock); + return false; + } + + if (!inode || likely(spin_trylock(&inode->i_lock))) + return true; + + // Too bad - we need to drop ->d_lock and take locks in correct order. + // To avoid breaking RCU read-side critical area when we drop ->d_lock, + // take an explicit rcu_read_lock() while we are switching locks. + rcu_read_lock(); + do { + spin_unlock(&dentry->d_lock); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + // make sure we'd locked the right inode - ->d_inode might've + // changed while we were not holding ->d_lock + if (likely(inode == dentry->d_inode)) + break; + spin_unlock(&inode->i_lock); + inode = dentry->d_inode; + } while (inode); + rcu_read_unlock(); + if (likely(!dentry->d_lockref.count)) + return true; + if (inode) + spin_unlock(&inode->i_lock); + spin_unlock(&dentry->d_lock); + return false; +} + +/** + * dentry_kill - evict a dentry + * @dentry: dentry to be evicted + * + * All dentry evictions are done by this function. The reference we are + * passed does not contribute to the refcount; the caller had either + * already decremented the refcount or it had never held one in the + * first place. @dentry->d_lock is held by the caller and dropped + * by dentry_kill(@dentry). + * + * We are guaranteed that nobody had called dentry_free(@dentry) + * prior to the beginning of RCU read-side critical area we are in. + * + * Caller must not access @dentry after the call. + * + * If eviction of @dentry drops the last reference to its parent, + * the reference to parent is returned to caller. In that case + * it is guaranteed to satisfy the requirements for dentry_kill() + * argument - its ->d_lock is held and we are guaranteed that nobody + * had passed it to dentry_free() prior to acquisition of its ->d_lock. + * Otherwise %NULL is returned. + * + * If @dentry is idle and remains such after we assemble the full + * locking environment for eviction (see lock_for_kill() for details) + * we mark it doomed (->d_lockref.count < 0) and proceed to detaching + * it from any filesystem objects. Otherwise we drop ->d_lock and + * return %NULL. + * + * Once @dentry is detached from the filesystem objects, we complete + * detaching it from dentry tree. The parent, if any, gets locked + * and its refcount is decremented; dentry is carefully removed from + * the tree (see dentry_unlist() for details) and marked killed + * (%DCACHE_DENTRY_KILLED set in ->d_flags). At that point it's just + * an inert chunk of memory, accessible only via RCU references + * and possibly via a shrink list. If it is not on any shrink lists, + * we call dentry_free(), which schedules actual freeing of memory. + * Othewise freeing is left to the owner of the shrink list in question. + */ +static struct dentry *dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; bool can_free = true; + if (unlikely(!lock_for_kill(dentry))) + return NULL; + /* * The dentry is now unrecoverably dead to the world. */ @@ -749,43 +851,6 @@ static struct dentry *__dentry_kill(struct dentry *dentry) } /* - * Lock a dentry for feeding it to __dentry_kill(). - * Called under rcu_read_lock() and dentry->d_lock; the former - * guarantees that nothing we access will be freed under us. - * Note that dentry is *not* protected from concurrent dentry_kill(), - * d_delete(), etc. - * - * Return false if dentry is busy. Otherwise, return true and have - * that dentry's inode locked. - */ - -static bool lock_for_kill(struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - - if (unlikely(dentry->d_lockref.count)) - return false; - - if (!inode || likely(spin_trylock(&inode->i_lock))) - return true; - - do { - spin_unlock(&dentry->d_lock); - spin_lock(&inode->i_lock); - spin_lock(&dentry->d_lock); - if (likely(inode == dentry->d_inode)) - break; - spin_unlock(&inode->i_lock); - inode = dentry->d_inode; - } while (inode); - if (likely(!dentry->d_lockref.count)) - return true; - if (inode) - spin_unlock(&inode->i_lock); - return false; -} - -/* * Decide if dentry is worth retaining. Usually this is called with dentry * locked; if not locked, we are more limited and might not be able to tell * without a lock. False in this case means "punt to locked path and recheck". @@ -860,17 +925,17 @@ EXPORT_SYMBOL(d_mark_dontcache); * If unsuccessful, we return false, having already taken the dentry lock. * In that case refcount is guaranteed to be zero and we have already * decided that it's not worth keeping around. - * - * The caller needs to hold the RCU read lock, so that the dentry is - * guaranteed to stay around even if the refcount goes down to zero! */ static inline bool fast_dput(struct dentry *dentry) { int ret; /* - * try to decrement the lockref optimistically. + * Try to decrement the lockref optimistically. + * RCU read lock held so that dentry is guaranteed to stay around + * even if the refcount goes down to zero. */ + rcu_read_lock(); ret = lockref_put_return(&dentry->d_lockref); /* @@ -880,6 +945,7 @@ static inline bool fast_dput(struct dentry *dentry) */ if (unlikely(ret < 0)) { spin_lock(&dentry->d_lock); + rcu_read_unlock(); if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) { spin_unlock(&dentry->d_lock); return true; @@ -891,8 +957,10 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we weren't the last ref, we're done. */ - if (ret) + if (ret) { + rcu_read_unlock(); return true; + } /* * Can we decide that decrement of refcount is all we needed without @@ -900,8 +968,10 @@ static inline bool fast_dput(struct dentry *dentry) * dentry looks like it ought to be retained and there's nothing else * to do. */ - if (retain_dentry(dentry, false)) + if (retain_dentry(dentry, false)) { + rcu_read_unlock(); return true; + } /* * Either not worth retaining or we can't tell without the lock. @@ -909,6 +979,7 @@ static inline bool fast_dput(struct dentry *dentry) * but we'll need to re-check the situation after getting the lock. */ spin_lock(&dentry->d_lock); + rcu_read_unlock(); /* * Did somebody else grab a reference to it in the meantime, and @@ -926,21 +997,13 @@ locked: static void finish_dput(struct dentry *dentry) __releases(dentry->d_lock) - __releases(RCU) { - while (lock_for_kill(dentry)) { - rcu_read_unlock(); - dentry = __dentry_kill(dentry); - if (!dentry) - return; + while ((dentry = dentry_kill(dentry)) != NULL) { if (retain_dentry(dentry, true)) { spin_unlock(&dentry->d_lock); return; } - rcu_read_lock(); } - rcu_read_unlock(); - spin_unlock(&dentry->d_lock); } /* @@ -974,11 +1037,8 @@ void dput(struct dentry *dentry) if (!dentry) return; might_sleep(); - rcu_read_lock(); - if (likely(fast_dput(dentry))) { - rcu_read_unlock(); + if (likely(fast_dput(dentry))) return; - } finish_dput(dentry); } EXPORT_SYMBOL(dput); @@ -989,30 +1049,46 @@ void d_make_discardable(struct dentry *dentry) WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT)); dentry->d_flags &= ~DCACHE_PERSISTENT; dentry->d_lockref.count--; - rcu_read_lock(); finish_dput(dentry); } EXPORT_SYMBOL(d_make_discardable); -static void to_shrink_list(struct dentry *dentry, struct list_head *list) +/** + * __move_to_shrink_list - try to place a dentry into a shrink list + * @dentry: dentry to try putting into shrink list + * @list: the list to put @dentry into. + * Returns: true @dentry had been placed into @list, false otherwise + * + * If @dentry is idle and not already include into a shrink list, move + * it into @list and return %true; otherwise do nothing and return %false. + * + * Caller must be holding @dentry->d_lock. There must have been no calls of + * dentry_free(@dentry) prior to the beginning of the RCU read-side critical + * area in which __move_to_shrink_list(@dentry, @list) is called. + * + * @list should be thread-private and eventually emptied by passing it to + * shrink_dentry_list(). + */ + +bool __move_to_shrink_list(struct dentry *dentry, struct list_head *list) __must_hold(&dentry->d_lock) { - if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { + if (likely(!dentry->d_lockref.count && + !(dentry->d_flags & DCACHE_SHRINK_LIST))) { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); d_shrink_add(dentry, list); + return true; } + return false; } +EXPORT_SYMBOL(__move_to_shrink_list); void dput_to_list(struct dentry *dentry, struct list_head *list) { - rcu_read_lock(); - if (likely(fast_dput(dentry))) { - rcu_read_unlock(); + if (likely(fast_dput(dentry))) return; - } - rcu_read_unlock(); - to_shrink_list(dentry, list); + __move_to_shrink_list(dentry, list); spin_unlock(&dentry->d_lock); } @@ -1058,7 +1134,10 @@ repeat: } EXPORT_SYMBOL(dget_parent); -static struct dentry * __d_find_any_alias(struct inode *inode) +/* + * inode is a directory, inode->i_lock is held by the caller + */ +static struct dentry * __d_find_dir_alias(struct inode *inode) { struct dentry *alias; @@ -1069,6 +1148,18 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } +static struct dentry * __d_find_any_alias(struct inode *inode) +{ + struct dentry *alias; + + if (hlist_empty(&inode->i_dentry)) + return NULL; + for_each_alias(alias, inode) + if (dget_alias_ilocked(alias)) + return alias; + return NULL; +} + /** * d_find_any_alias - find any alias for a given inode * @inode: inode to find an alias for @@ -1092,7 +1183,7 @@ static struct dentry *__d_find_alias(struct inode *inode) struct dentry *alias; if (S_ISDIR(inode->i_mode)) - return __d_find_any_alias(inode); + return __d_find_dir_alias(inode); for_each_alias(alias, inode) { spin_lock(&alias->d_lock); @@ -1158,25 +1249,6 @@ struct dentry *d_find_alias_rcu(struct inode *inode) return de; } -/** - * d_dispose_if_unused - move unreferenced dentries to shrink list - * @dentry: dentry in question - * @dispose: head of shrink list - * - * If dentry has no external references, move it to shrink list. - * - * NOTE!!! The caller is responsible for preventing eviction of the dentry by - * holding dentry->d_inode->i_lock or equivalent. - */ -void d_dispose_if_unused(struct dentry *dentry, struct list_head *dispose) -{ - spin_lock(&dentry->d_lock); - if (!dentry->d_lockref.count) - to_shrink_list(dentry, dispose); - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(d_dispose_if_unused); - /* * Try to kill dentries associated with this inode. * WARNING: you must own a reference to inode. @@ -1187,8 +1259,12 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - for_each_alias(dentry, inode) - d_dispose_if_unused(dentry, &dispose); + for_each_alias(dentry, inode) { + spin_lock(&dentry->d_lock); + if (likely(!(dentry->d_flags & DCACHE_NORCU))) + __move_to_shrink_list(dentry, &dispose); + spin_unlock(&dentry->d_lock); + } spin_unlock(&inode->i_lock); shrink_dentry_list(&dispose); } @@ -1196,14 +1272,8 @@ EXPORT_SYMBOL(d_prune_aliases); static inline void shrink_kill(struct dentry *victim) { - do { - rcu_read_unlock(); - victim = __dentry_kill(victim); - rcu_read_lock(); - } while (victim && lock_for_kill(victim)); - rcu_read_unlock(); - if (victim) - spin_unlock(&victim->d_lock); + while ((victim = dentry_kill(victim)) != NULL) + ; } void shrink_dentry_list(struct list_head *list) @@ -1213,18 +1283,12 @@ void shrink_dentry_list(struct list_head *list) dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); - rcu_read_lock(); - if (!lock_for_kill(dentry)) { - bool can_free; - rcu_read_unlock(); - d_shrink_del(dentry); - can_free = dentry->d_flags & DCACHE_DENTRY_KILLED; + d_shrink_del(dentry); + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { spin_unlock(&dentry->d_lock); - if (can_free) - dentry_free(dentry); + dentry_free(dentry); continue; } - d_shrink_del(dentry); shrink_kill(dentry); } } @@ -1385,6 +1449,8 @@ again: read_seqbegin_or_lock(&rename_lock, &seq); this_parent = parent; spin_lock(&this_parent->d_lock); + if (unlikely(this_parent->d_flags & DCACHE_DENTRY_CURSOR)) + goto out_unlock; ret = enter(data, this_parent); switch (ret) { @@ -1433,14 +1499,15 @@ resume: /* * All done at this level ... ascend and resume the search. */ - rcu_read_lock(); ascend: if (this_parent != parent) { dentry = this_parent; this_parent = dentry->d_parent; + rcu_read_lock(); spin_unlock(&dentry->d_lock); spin_lock(&this_parent->d_lock); + rcu_read_unlock(); /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) @@ -1448,7 +1515,6 @@ ascend: /* go into the first sibling still alive */ hlist_for_each_entry_continue(dentry, d_sib) { if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) { - rcu_read_unlock(); goto resume; } } @@ -1456,7 +1522,6 @@ ascend: } if (need_seqretry(&rename_lock, seq)) goto rename_retry; - rcu_read_unlock(); out_unlock: spin_unlock(&this_parent->d_lock); @@ -1465,7 +1530,6 @@ out_unlock: rename_retry: spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); BUG_ON(seq & 1); if (!retry) return; @@ -1580,12 +1644,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (data->start == dentry) goto out; - if (dentry->d_flags & DCACHE_SHRINK_LIST) { - data->found++; - } else if (!dentry->d_lockref.count) { - to_shrink_list(dentry, &data->dispose); - data->found++; - } else if (dentry->d_lockref.count < 0) { + if (dentry->d_lockref.count <= 0) { + __move_to_shrink_list(dentry, &data->dispose); data->found++; } /* @@ -1616,17 +1676,21 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) if (data->start == dentry) goto out; - if (!dentry->d_lockref.count) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) { + if (dentry->d_lockref.count <= 0) { + if (!__move_to_shrink_list(dentry, &data->dispose)) { + /* + * We need an enter RCU read-side critical area that + * would extend past the return from d_walk() and + * we are in the scope of ->d_lock that will terminate + * before that, so we use rcu_read_lock() to bridge + * over to the scope of ->d_lock in d_walk() caller. + * The scope of rcu_read_lock() spans from here to + * paired rcu_read_unlock() in shrink_dcache_tree(). + */ rcu_read_lock(); data->victim = dentry; return D_WALK_QUIT; } - to_shrink_list(dentry, &data->dispose); - } else if (dentry->d_lockref.count < 0) { - rcu_read_lock(); - data->victim = dentry; - return D_WALK_QUIT; } /* * We can return to the caller if we have found some (this @@ -1649,7 +1713,9 @@ out: static void shrink_dcache_tree(struct dentry *parent, bool for_umount) { for (;;) { - struct select_data data = {.start = parent}; + struct completion_list wait; + bool need_wait = false; + struct select_data data = { .start = parent }; INIT_LIST_HEAD(&data.dispose); d_walk(parent, &data, @@ -1667,30 +1733,32 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount) d_walk(parent, &data, select_collect2); if (data.victim) { struct dentry *v = data.victim; - + /* + * select_collect2() has picked a dentry that was + * either dying or on a shrink list and arranged + * for it to be returned to us. We are still in + * the RCU read-side critical area started there + * (rcu_read_lock() scope opened in select_collect2()), + * so dentry couldn't have been freed yet, but its + * state might've changed since we dropped ->d_lock + * on the way out. Switch over to ->d_lock scope + * and recheck the dentry state. + */ spin_lock(&v->d_lock); - if (v->d_lockref.count < 0 && - !(v->d_flags & DCACHE_DENTRY_KILLED)) { - struct completion_list wait; - // It's busy dying; have it notify us once - // it becomes invisible to d_walk(). - d_add_waiter(v, &wait); - spin_unlock(&v->d_lock); - rcu_read_unlock(); - if (!list_empty(&data.dispose)) - shrink_dentry_list(&data.dispose); - wait_for_completion(&wait.completion); - continue; - } - if (!lock_for_kill(v)) { + rcu_read_unlock(); + + if (unlikely(v->d_lockref.count < 0)) { + // It's doomed; if it isn't dead yet, notify us + // once it becomes invisible to d_walk(). + need_wait = d_add_waiter(v, &wait); spin_unlock(&v->d_lock); - rcu_read_unlock(); } else { shrink_kill(v); } } - if (!list_empty(&data.dispose)) - shrink_dentry_list(&data.dispose); + shrink_dentry_list(&data.dispose); + if (unlikely(need_wait)) + wait_for_completion(&wait.completion); } } @@ -1743,9 +1811,30 @@ void shrink_dcache_for_umount(struct super_block *sb) sb->s_root = NULL; do_one_tree(dentry); - while (!hlist_bl_empty(&sb->s_roots)) { - dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash)); - do_one_tree(dentry); + for (;;) { + spin_lock(&sb->s_roots_lock); + dentry = hlist_entry_safe(sb->s_roots.first, + struct dentry, d_sib); + if (!dentry) { + spin_unlock(&sb->s_roots_lock); + break; + } + rcu_read_lock(); + spin_unlock(&sb->s_roots_lock); + spin_lock(&dentry->d_lock); + rcu_read_unlock(); + if (unlikely(dentry->d_lockref.count < 0)) { + struct completion_list wait; + bool need_wait = d_add_waiter(dentry, &wait); + + spin_unlock(&dentry->d_lock); + if (need_wait) + wait_for_completion(&wait.completion); + } else { + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + do_one_tree(dentry); + } } } @@ -1915,7 +2004,7 @@ struct dentry *d_alloc_cursor(struct dentry * parent) { struct dentry *dentry = d_alloc_anon(parent->d_sb); if (dentry) { - dentry->d_flags |= DCACHE_DENTRY_CURSOR; + dentry->d_flags |= DCACHE_DENTRY_CURSOR | DCACHE_NORCU; dentry->d_parent = dget(parent); } return dentry; @@ -2166,9 +2255,9 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) __d_set_inode_and_type(new, inode, add_flags); hlist_add_head(&new->d_alias, &inode->i_dentry); if (!disconnected) { - hlist_bl_lock(&sb->s_roots); - hlist_bl_add_head(&new->d_hash, &sb->s_roots); - hlist_bl_unlock(&sb->s_roots); + spin_lock(&sb->s_roots_lock); + hlist_add_head(&new->d_sib, &sb->s_roots); + spin_unlock(&sb->s_roots_lock); } spin_unlock(&new->d_lock); spin_unlock(&inode->i_lock); @@ -2260,8 +2349,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, return found; } if (d_in_lookup(dentry)) { - found = d_alloc_parallel(dentry->d_parent, name, - dentry->d_wait); + found = d_alloc_parallel(dentry->d_parent, name); if (IS_ERR(found) || !d_in_lookup(found)) { iput(inode); return found; @@ -2648,32 +2736,24 @@ static inline unsigned start_dir_add(struct inode *dir) } } -static inline void end_dir_add(struct inode *dir, unsigned int n, - wait_queue_head_t *d_wait) +static inline void end_dir_add(struct inode *dir, unsigned int n) { smp_store_release(&dir->i_dir_seq, n + 2); preempt_enable_nested(); - if (wq_has_sleeper(d_wait)) - wake_up_all(d_wait); } static void d_wait_lookup(struct dentry *dentry) { - if (d_in_lookup(dentry)) { - DECLARE_WAITQUEUE(wait, current); - add_wait_queue(dentry->d_wait, &wait); - do { - set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock(&dentry->d_lock); - schedule(); - spin_lock(&dentry->d_lock); - } while (d_in_lookup(dentry)); + if (likely(d_in_lookup(dentry))) { + dentry->d_flags |= DCACHE_LOOKUP_WAITERS; + wait_var_event_spinlock(&dentry->d_flags, + !d_in_lookup(dentry), + &dentry->d_lock); } } struct dentry *d_alloc_parallel(struct dentry *parent, - const struct qstr *name, - wait_queue_head_t *wq) + const struct qstr *name) { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); @@ -2694,38 +2774,33 @@ struct dentry *d_alloc_parallel(struct dentry *parent, spin_unlock(&parent->d_lock); retry: - rcu_read_lock(); seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); + rcu_read_lock(); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } + rcu_read_unlock(); if (read_seqcount_retry(&dentry->d_seq, d_seq)) { - rcu_read_unlock(); dput(dentry); goto retry; } - rcu_read_unlock(); dput(new); return dentry; } - if (unlikely(read_seqretry(&rename_lock, r_seq))) { - rcu_read_unlock(); + rcu_read_unlock(); + if (unlikely(read_seqretry(&rename_lock, r_seq))) goto retry; - } - if (unlikely(seq & 1)) { - rcu_read_unlock(); + if (unlikely(seq & 1)) goto retry; - } hlist_bl_lock(b); if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); - rcu_read_unlock(); goto retry; } /* @@ -2742,19 +2817,20 @@ retry: continue; if (!d_same_name(dentry, parent, name)) continue; + rcu_read_lock(); hlist_bl_unlock(b); + spin_lock(&dentry->d_lock); + rcu_read_unlock(); /* now we can try to grab a reference */ - if (!lockref_get_not_dead(&dentry->d_lockref)) { - rcu_read_unlock(); + if (unlikely(dentry->d_lockref.count < 0)) { + spin_unlock(&dentry->d_lock); goto retry; } - - rcu_read_unlock(); /* * somebody is likely to be still doing lookup for it; - * wait for them to finish + * pin it and wait for them to finish */ - spin_lock(&dentry->d_lock); + dget_dlock(dentry); d_wait_lookup(dentry); /* * it's not in-lookup anymore; in principle we should repeat @@ -2775,8 +2851,6 @@ retry: dput(new); return dentry; } - rcu_read_unlock(); - new->d_wait = wq; hlist_bl_add_head(&new->d_in_lookup_hash, b); hlist_bl_unlock(b); return new; @@ -2788,13 +2862,26 @@ mismatch: EXPORT_SYMBOL(d_alloc_parallel); /* - * - Unhash the dentry - * - Retrieve and clear the waitqueue head in dentry - * - Return the waitqueue head + * Move dentry from in-lookup state to busy-negative one. + * + * From now on d_in_lookup(dentry) will return false and dentry is gone from + * in-lookup hash. + * + * Anyone who had been waiting on it in d_alloc_parallel() is free to + * proceed after that. Note that waking such waiters up is left to + * the callers; PREEMPT_RT kernels can't have that wakeup done while + * in write-side critical area for ->i_dir_seq, so it's done by calling + * __d_wake_in_lookup_waiters() once it's safe to do so. + * + * Both __d_lookup_unhash() and __d_wake_in_lookup_waiters() should + * be called within the same ->d_lock scope. PAR_LOOKUP is cleared + * here, while LOOKUP_WAITERS (set by somebody finding dentry in + * the in-lookup hash and setting down to wait) is checked and cleared + * in __d_wake_in_lookup_waiters(). Both are gone by the end of + * ->d_lock scope. */ -static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) +static void __d_lookup_unhash(struct dentry *dentry) { - wait_queue_head_t *d_wait; struct hlist_bl_head *b; lockdep_assert_held(&dentry->d_lock); @@ -2803,18 +2890,23 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_in_lookup_hash); - d_wait = dentry->d_wait; - dentry->d_wait = NULL; hlist_bl_unlock(b); dentry->waiters = NULL; - INIT_LIST_HEAD(&dentry->d_lru); - return d_wait; +} + +static inline void __d_wake_in_lookup_waiters(struct dentry *dentry) +{ + if (dentry->d_flags & DCACHE_LOOKUP_WAITERS) { + wake_up_var_locked(&dentry->d_flags, &dentry->d_lock); + dentry->d_flags &= ~DCACHE_LOOKUP_WAITERS; + } } void __d_lookup_unhash_wake(struct dentry *dentry) { spin_lock(&dentry->d_lock); - wake_up_all(__d_lookup_unhash(dentry)); + __d_lookup_unhash(dentry); + __d_wake_in_lookup_waiters(dentry); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(__d_lookup_unhash_wake); @@ -2824,14 +2916,13 @@ EXPORT_SYMBOL(__d_lookup_unhash_wake); static inline void __d_add(struct dentry *dentry, struct inode *inode, const struct dentry_operations *ops) { - wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; spin_lock(&dentry->d_lock); if (unlikely(d_in_lookup(dentry))) { dir = dentry->d_parent->d_inode; n = start_dir_add(dir); - d_wait = __d_lookup_unhash(dentry); + __d_lookup_unhash(dentry); } if (unlikely(ops)) d_set_d_op(dentry, ops); @@ -2844,8 +2935,10 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode, fsnotify_update_flags(dentry); } __d_rehash(dentry); - if (dir) - end_dir_add(dir, n, d_wait); + if (dir) { + end_dir_add(dir, n); + __d_wake_in_lookup_waiters(dentry); + } spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); @@ -2958,7 +3051,6 @@ static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { struct dentry *old_parent, *p; - wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; @@ -2989,7 +3081,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, if (unlikely(d_in_lookup(target))) { dir = target->d_parent->d_inode; n = start_dir_add(dir); - d_wait = __d_lookup_unhash(target); + __d_lookup_unhash(target); } write_seqcount_begin(&dentry->d_seq); @@ -3028,9 +3120,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); - if (dir) - end_dir_add(dir, n, d_wait); - + if (dir) { + end_dir_add(dir, n); + __d_wake_in_lookup_waiters(target); + } if (dentry->d_parent != old_parent) spin_unlock(&dentry->d_parent->d_lock); if (dentry != old_parent) @@ -3151,7 +3244,7 @@ struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry, security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { - struct dentry *new = __d_find_any_alias(inode); + struct dentry *new = __d_find_dir_alias(inode); if (unlikely(new)) { /* The reference to new ensures it remains an alias */ spin_unlock(&inode->i_lock); @@ -3176,6 +3269,12 @@ struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry, } dput(old_parent); } else { + if (unlikely(!hlist_unhashed(&new->d_sib))) { + // secondary root getting spliced + spin_lock(&new->d_lock); + unlink_secondary_root(new); + spin_unlock(&new->d_lock); + } __d_move(new, dentry, false); write_sequnlock(&rename_lock); } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index fbd45e7ae706..eafd99507afe 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -53,10 +53,10 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); for_each_alias(dentry, inode) { - dget(dentry); + if (!dget_alias_ilocked(dentry)) + continue; spin_unlock(&inode->i_lock); - if (toput) - dput(toput); + dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; @@ -66,8 +66,7 @@ find_acceptable_alias(struct dentry *result, } spin_unlock(&inode->i_lock); - if (toput) - dput(toput); + dput(toput); return NULL; } diff --git a/fs/file_table.c b/fs/file_table.c index 3c08832aa387..c68b8c0a4097 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -402,6 +402,8 @@ static struct file *alloc_file(const struct path *path, int flags, static inline int alloc_path_pseudo(const char *name, struct inode *inode, struct vfsmount *mnt, struct path *path) { + if (WARN_ON_ONCE(S_ISDIR(inode->i_mode))) + return -EINVAL; path->dentry = d_alloc_pseudo(mnt->mnt_sb, &QSTR(name)); if (!path->dentry) return -ENOMEM; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b658b6baf72f..d8e8ea7280bc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -177,8 +177,8 @@ static void fuse_dentry_tree_work(struct work_struct *work) spin_lock(&fd->dentry->d_lock); /* If dentry is still referenced, let next dput release it */ fd->dentry->d_flags |= DCACHE_OP_DELETE; + __move_to_shrink_list(fd->dentry, &dispose); spin_unlock(&fd->dentry->d_lock); - d_dispose_if_unused(fd->dentry, &dispose); if (need_resched()) { spin_unlock(&dentry_hash[i].lock); cond_resched(); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index db5ae8ec1030..a2361f1d9905 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -164,7 +164,6 @@ static int fuse_direntplus_link(struct file *file, struct inode *dir = d_inode(parent); struct fuse_conn *fc; struct inode *inode; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); int epoch; if (!o->nodeid) { @@ -201,7 +200,7 @@ static int fuse_direntplus_link(struct file *file, dentry = d_lookup(parent, &name); if (!dentry) { retry: - dentry = d_alloc_parallel(parent, &name, &wq); + dentry = d_alloc_parallel(parent, &name); if (IS_ERR(dentry)) return PTR_ERR(dentry); } diff --git a/fs/namei.c b/fs/namei.c index 8340dc4108be..5cc9f0f466b8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1896,13 +1896,12 @@ static struct dentry *__lookup_slow(const struct qstr *name, { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) return ERR_PTR(-ENOENT); again: - dentry = d_alloc_parallel(dir, name, &wq); + dentry = d_alloc_parallel(dir, name); if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { @@ -4413,7 +4412,6 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); if (unlikely(IS_DEADDIR(dir_inode))) return ERR_PTR(-ENOENT); @@ -4422,7 +4420,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, dentry = d_lookup(dir, &nd->last); for (;;) { if (!dentry) { - dentry = d_alloc_parallel(dir, &nd->last, &wq); + dentry = d_alloc_parallel(dir, &nd->last); if (IS_ERR(dentry)) return dentry; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1b9c368fb133..2f5f26f93238 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -726,7 +726,6 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry, unsigned long dir_verifier) { struct qstr filename = QSTR_INIT(entry->name, entry->len); - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct dentry *dentry; struct dentry *alias; struct inode *inode; @@ -755,7 +754,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry, dentry = d_lookup(parent, &filename); again: if (!dentry) { - dentry = d_alloc_parallel(parent, &filename, &wq); + dentry = d_alloc_parallel(parent, &filename); if (IS_ERR(dentry)) return; } @@ -2106,7 +2105,6 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, umode_t mode) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; @@ -2162,7 +2160,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, d_drop(dentry); switched = true; dentry = d_alloc_parallel(dentry->d_parent, - &dentry->d_name, &wq); + &dentry->d_name); if (IS_ERR(dentry)) return PTR_ERR(dentry); if (unlikely(!d_in_lookup(dentry))) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index eef0736beb67..ff7424bc4bec 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -33,35 +33,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT /* - * Set the superblock root dentry. - * Note that this function frees the inode in case of error. - */ -static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *inode) -{ - /* The mntroot acts as the dummy root dentry for this superblock */ - if (sb->s_root == NULL) { - sb->s_root = d_make_root(inode); - if (sb->s_root == NULL) - return -ENOMEM; - ihold(inode); - /* - * Ensure that this dentry is invisible to d_find_alias(). - * Otherwise, it may be spliced into the tree by - * d_splice_alias if a parent directory from the same - * filesystem gets mounted at a later time. - * This again causes shrink_dcache_for_umount_subtree() to - * Oops, since the test for IS_ROOT() will fail. - */ - spin_lock(&d_inode(sb->s_root)->i_lock); - spin_lock(&sb->s_root->d_lock); - hlist_del_init(&sb->s_root->d_alias); - spin_unlock(&sb->s_root->d_lock); - spin_unlock(&d_inode(sb->s_root)->i_lock); - } - return 0; -} - -/* * get a root dentry from the root filehandle */ int nfs_get_root(struct super_block *s, struct fs_context *fc) @@ -99,10 +70,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) goto out_fattr; } - error = nfs_superblock_set_dummy_root(s, inode); - if (error != 0) - goto out_fattr; - /* root dentries normally start off anonymous and get spliced in later * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root @@ -123,6 +90,8 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) name = NULL; } spin_unlock(&root->d_lock); + if (!s->s_root) + s->s_root = dget(root); fc->root = root; if (server->caps & NFS_CAP_SECURITY_LABEL) kflags |= SECURITY_LSM_NATIVE_LABELS; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index df3ca4669df6..43ea897943c0 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -124,7 +124,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf struct dentry *alias; down_read_non_owner(&NFS_I(dir)->rmdir_sem); - alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); + alias = d_alloc_parallel(dentry->d_parent, &data->args.name); if (IS_ERR(alias)) { up_read_non_owner(&NFS_I(dir)->rmdir_sem); return 0; @@ -185,7 +185,6 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) data->cred = get_current_cred(); data->res.dir_attr = &data->dir_attr; - init_waitqueue_head(&data->wq); status = -EBUSY; spin_lock(&dentry->d_lock); diff --git a/fs/proc/base.c b/fs/proc/base.c index b67e8c3605fb..aae47c676f0b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2111,8 +2111,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, goto end_instantiate; if (!child) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - child = d_alloc_parallel(dir, &qname, &wq); + child = d_alloc_parallel(dir, &qname); if (IS_ERR(child)) goto end_instantiate; if (d_in_lookup(child)) { diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 49ab74e0bfde..04a382178c65 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -692,8 +692,7 @@ static bool proc_sys_fill_cache(struct file *file, child = d_lookup(dir, &qname); if (!child) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - child = d_alloc_parallel(dir, &qname, &wq); + child = d_alloc_parallel(dir, &qname); if (IS_ERR(child)) return false; if (d_in_lookup(child)) { diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index e860fa08b5e3..1ff77f3d1de0 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -73,7 +73,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; bool reparse_need_reval = false; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); int rc; cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); @@ -105,7 +104,7 @@ retry: (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)) return; - dentry = d_alloc_parallel(parent, name, &wq); + dentry = d_alloc_parallel(parent, name); } if (IS_ERR(dentry)) return; diff --git a/fs/super.c b/fs/super.c index a117e1f0dfe3..a8fd61136aaf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -359,6 +359,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_iflags |= SB_I_NODEV; INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_roots); + spin_lock_init(&s->s_roots_lock); mutex_init(&s->s_sync_lock); INIT_LIST_HEAD(&s->s_inodes); spin_lock_init(&s->s_inode_list_lock); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2577c05f84ec..4b1ff99608e0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -116,10 +116,7 @@ struct dentry { * possible! */ - union { - struct list_head d_lru; /* LRU list */ - wait_queue_head_t *d_wait; /* in-lookup ones only */ - }; + struct list_head d_lru; /* LRU list */ struct hlist_node d_sib; /* child of parent list */ struct hlist_head d_children; /* our children */ /* @@ -210,6 +207,9 @@ enum dentry_flags { DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ DCACHE_CANT_MOUNT = BIT(8), + DCACHE_LOOKUP_WAITERS = BIT(9), /* A thread is waiting for + * PAR_LOOKUP to clear + */ DCACHE_SHRINK_LIST = BIT(10), DCACHE_OP_WEAK_REVALIDATE = BIT(11), /* @@ -256,8 +256,7 @@ extern void d_delete(struct dentry *); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct super_block *); -extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, - wait_queue_head_t *); +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); /* weird procfs mess; *NOT* exported */ extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *, @@ -281,7 +280,7 @@ extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); -extern void d_dispose_if_unused(struct dentry *, struct list_head *); +extern bool __move_to_shrink_list(struct dentry *, struct list_head *); extern void shrink_dentry_list(struct list_head *); extern struct dentry *d_find_alias_rcu(struct inode *); @@ -366,6 +365,24 @@ static inline struct dentry *dget(struct dentry *dentry) return dentry; } +/* dentry->d_inode->i_lock must be held by caller */ +static inline bool dget_alias_ilocked(struct dentry *dentry) +{ + if (likely(!(READ_ONCE(dentry->d_flags) & DCACHE_NORCU))) { + lockref_get(&dentry->d_lockref); + return true; + } + // NORCU dentries with zero refcount MUST NOT be grabbed + spin_lock(&dentry->d_lock); + if (dentry->d_lockref.count > 0) { + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + return true; + } + spin_unlock(&dentry->d_lock); + return false; +} + extern struct dentry *dget_parent(struct dentry *dentry); /** diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index aa86e4944dbf..ef7941e9dc79 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -162,7 +162,8 @@ struct super_block { struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif - struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ + struct hlist_head s_roots; /* alternate root dentries for NFS */ + spinlock_t s_roots_lock; struct mount *s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ struct file *s_bdev_file; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 83ee991cde2b..35ea18a40b66 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1745,7 +1745,6 @@ struct nfs_unlinkdata { struct nfs_removeargs args; struct nfs_removeres res; struct dentry *dentry; - wait_queue_head_t wq; const struct cred *cred; struct nfs_fattr dir_attr; long timeout; |
