summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 04:15:31 +0530
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 04:15:31 +0530
commite8a56d6fc828bb569fa2dd33c3e6eb16a165b097 (patch)
tree1742623129141d47c68509271cf8183133e48849
parent79169a1624253363fed3e9a447b77e50bb226206 (diff)
parent3df5153c5f123d6018c82a24341ccd99c79d64a0 (diff)
Merge tag 'pull-dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull dcache updates from Al Viro: - d_alloc_parallel() API change (Neil's with my changes) - NORCU fixes - Reorganization and simplification of dentry eviction logic - Simplifying rcu_read_lock() scopes in fs/dcache.c - Secondary roots work - getting rid of NFS fake root dentries and dealing with remaining shrink_dcache_for_umount() and shrink_dentry_list() races - making cursors NORCU (surprisingly easy) * tag 'pull-dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (22 commits) make cursors NORCU nfs: get rid of fake root dentries wind ->s_roots via ->d_sib instead of ->d_hash shrink_dentry_tree(): unify the calls of shrink_dentry_list() shrinking rcu_read_lock() scope in d_alloc_parallel() d_walk(): shrink rcu_read_lock() scope document dentry_kill() adjust calling conventions of lock_for_kill(), fold __dentry_kill() into dentry_kill() Document rcu_read_lock() use in select_collect2() Shift rcu_read_{,un}lock() inside fast_dput() simplify safety for lock_for_kill() slowpath fold lock_for_kill() and __dentry_kill() into common helper fold lock_for_kill() into shrink_kill() shrink_dentry_list(): start with removing from shrink list d_prune_aliases(): make sure to skip NORCU aliases kill d_dispose_if_unused() make to_shrink_list() return whether it has moved dentry to list select_collect(): ignore dentries on shrink lists if they have positive refcounts find_acceptable_alias(): skip NORCU aliases with zero refcount fix a race between d_find_any_alias() and final dput() of NORCU dentries ...
-rw-r--r--Documentation/filesystems/porting.rst17
-rw-r--r--fs/afs/dir_silly.c4
-rw-r--r--fs/dcache.c549
-rw-r--r--fs/exportfs/expfs.c9
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/readdir.c3
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/getroot.c35
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/smb/client/readdir.c3
-rw-r--r--fs/super.c1
-rw-r--r--include/linux/dcache.h31
-rw-r--r--include/linux/fs/super_types.h3
-rw-r--r--include/linux/nfs_xdr.h1
18 files changed, 387 insertions, 294 deletions
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index f546b1d3897f..d13f0a23c882 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1384,3 +1384,20 @@ for_each_alias(dentry, inode) instead of hlist_for_each_entry; better
yet, see if any of the exported primitives could be used instead of
the entire loop. You still need to hold ->i_lock of the inode over
either form of manual loop.
+
+---
+
+**mandatory**
+
+d_alloc_parallel() no longer requires a waitqueue_head.
+
+---
+
+**mandatory**
+
+d_dispose_if_unused() is gone; use __move_to_shrink_list() if you really
+need that functionality, but watch out for memory safety issues - just
+as with d_dispose_if_unused() these are not trivial; with this variant
+of API it's more explicit, since grabbing ->d_lock is caller-side, but
+d_dispose_if_unused() had all the same issues. It's a low-level primitive;
+use only if you have no alternative.
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index a748fd133faf..982bb6ec15f0 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -248,13 +248,11 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
struct dentry *alias;
int ret;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-
_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
down_read(&dvnode->rmdir_lock);
- alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+ alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name);
if (IS_ERR(alias)) {
up_read(&dvnode->rmdir_lock);
return 0;
diff --git a/fs/dcache.c b/fs/dcache.c
index d6f505313205..3e9af9de7074 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -43,8 +43,8 @@
* - i_dentry, d_alias, d_inode of aliases
* dcache_hash_bucket lock protects:
* - the dcache hash table
- * s_roots bl list spinlock protects:
- * - the s_roots list (see __d_drop)
+ * s_roots_lock protects:
+ * - the s_roots list (see __d_move()/dentry_unlist()/d_obtain_root())
* dentry->d_sb->s_dentry_lru_lock protects:
* - the dcache lru lists and counters
* d_lock protects:
@@ -462,14 +462,10 @@ static void dentry_unlink_inode(struct dentry * dentry)
raw_write_seqcount_begin(&dentry->d_seq);
__d_clear_type_and_inode(dentry);
- hlist_del_init(&dentry->d_alias);
+ __hlist_del(&dentry->d_alias);
/*
* dentry becomes negative, so the space occupied by ->d_alias
- * belongs to ->waiters now; we could use __hlist_del() instead
- * of hlist_del_init(), if not for the stunt pulled by nfs
- * dummy root dentries - positive dentry *not* included into
- * the alias list of its inode. Open-coding hlist_del_init()
- * and removing zeroing would be too clumsy...
+ * belongs to ->waiters now.
*/
dentry->waiters = NULL;
raw_write_seqcount_end(&dentry->d_seq);
@@ -568,16 +564,7 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
static void ___d_drop(struct dentry *dentry)
{
- struct hlist_bl_head *b;
- /*
- * Hashed dentries are normally on the dentry hashtable,
- * with the exception of those newly allocated by
- * d_obtain_root, which are always IS_ROOT:
- */
- if (unlikely(IS_ROOT(dentry)))
- b = &dentry->d_sb->s_roots;
- else
- b = d_hash(dentry->d_name.hash);
+ struct hlist_bl_head *b = d_hash(dentry->d_name.hash);
hlist_bl_lock(b);
__hlist_bl_del(&dentry->d_hash);
@@ -636,12 +623,14 @@ struct completion_list {
* Use ->waiters for a single-linked list of struct completion_list of
* waiters.
*/
-static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p)
+static inline bool d_add_waiter(struct dentry *dentry, struct completion_list *p)
{
- struct completion_list *v = dentry->waiters;
+ if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED))
+ return false;
init_completion(&p->completion);
- p->next = v;
+ p->next = dentry->waiters;
dentry->waiters = p;
+ return true;
}
static inline void d_complete_waiters(struct dentry *dentry)
@@ -658,6 +647,13 @@ static inline void d_complete_waiters(struct dentry *dentry)
}
}
+static void unlink_secondary_root(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_sb->s_roots_lock);
+ hlist_del_init(&dentry->d_sib);
+ spin_unlock(&dentry->d_sb->s_roots_lock);
+}
+
static inline void dentry_unlist(struct dentry *dentry)
{
struct dentry *next;
@@ -669,6 +665,10 @@ static inline void dentry_unlist(struct dentry *dentry)
d_complete_waiters(dentry);
if (unlikely(hlist_unhashed(&dentry->d_sib)))
return;
+ if (unlikely(IS_ROOT(dentry))) {
+ unlink_secondary_root(dentry); // secondary root goes away
+ return;
+ }
__hlist_del(&dentry->d_sib);
/*
* Cursors can move around the list of children. While we'd been
@@ -697,11 +697,113 @@ static inline void dentry_unlist(struct dentry *dentry)
}
}
-static struct dentry *__dentry_kill(struct dentry *dentry)
+/*
+ * Prepare locking environment for killing a dentry.
+ * Called under dentry->d_lock. To proceed with eviction of a positive dentry
+ * we need to get ->i_lock of the inode of that dentry as well.
+ * However, ->i_lock nests outside of ->d_lock, so if trylock fails we might
+ * have to drop and regain the latter. Dentry state can change while its
+ * ->d_lock is not held - it might end up getting killed, becoming busy,
+ * negative, etc., so we need to be careful.
+ *
+ * For NORCU dentries memory safety relies upon having only one call of
+ * lock_for_kill() in the entire lifetime of dentry and dentry_free() being
+ * called only by the caller of lock_for_kill(). That this is NORCU-specific;
+ * the crucial part is that refcounts of NORCU dentries never grow once having
+ * dropped to zero.
+ *
+ * For normal dentries we can not assume that there won't be concurrent calls
+ * of dentry_free() - dentry might end up being evicted by another thread
+ * while we are dropping/retaking locks on the slow path. Memory safety is
+ * provided by keeping the RCU read-side critical area contiguous with
+ * an explicit rcu_read_lock() scope bridging over the break in spinlock scopes.
+ *
+ * If dentry is busy (or busy dying, or already dead), unlock dentry
+ * and return false. Otherwise, return true and have that dentry's
+ * inode (if any) locked in addition to dentry itself.
+ */
+static bool lock_for_kill(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+
+ if (unlikely(dentry->d_lockref.count)) {
+ spin_unlock(&dentry->d_lock);
+ return false;
+ }
+
+ if (!inode || likely(spin_trylock(&inode->i_lock)))
+ return true;
+
+ // Too bad - we need to drop ->d_lock and take locks in correct order.
+ // To avoid breaking RCU read-side critical area when we drop ->d_lock,
+ // take an explicit rcu_read_lock() while we are switching locks.
+ rcu_read_lock();
+ do {
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ // make sure we'd locked the right inode - ->d_inode might've
+ // changed while we were not holding ->d_lock
+ if (likely(inode == dentry->d_inode))
+ break;
+ spin_unlock(&inode->i_lock);
+ inode = dentry->d_inode;
+ } while (inode);
+ rcu_read_unlock();
+ if (likely(!dentry->d_lockref.count))
+ return true;
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&dentry->d_lock);
+ return false;
+}
+
+/**
+ * dentry_kill - evict a dentry
+ * @dentry: dentry to be evicted
+ *
+ * All dentry evictions are done by this function. The reference we are
+ * passed does not contribute to the refcount; the caller had either
+ * already decremented the refcount or it had never held one in the
+ * first place. @dentry->d_lock is held by the caller and dropped
+ * by dentry_kill(@dentry).
+ *
+ * We are guaranteed that nobody had called dentry_free(@dentry)
+ * prior to the beginning of RCU read-side critical area we are in.
+ *
+ * Caller must not access @dentry after the call.
+ *
+ * If eviction of @dentry drops the last reference to its parent,
+ * the reference to parent is returned to caller. In that case
+ * it is guaranteed to satisfy the requirements for dentry_kill()
+ * argument - its ->d_lock is held and we are guaranteed that nobody
+ * had passed it to dentry_free() prior to acquisition of its ->d_lock.
+ * Otherwise %NULL is returned.
+ *
+ * If @dentry is idle and remains such after we assemble the full
+ * locking environment for eviction (see lock_for_kill() for details)
+ * we mark it doomed (->d_lockref.count < 0) and proceed to detaching
+ * it from any filesystem objects. Otherwise we drop ->d_lock and
+ * return %NULL.
+ *
+ * Once @dentry is detached from the filesystem objects, we complete
+ * detaching it from dentry tree. The parent, if any, gets locked
+ * and its refcount is decremented; dentry is carefully removed from
+ * the tree (see dentry_unlist() for details) and marked killed
+ * (%DCACHE_DENTRY_KILLED set in ->d_flags). At that point it's just
+ * an inert chunk of memory, accessible only via RCU references
+ * and possibly via a shrink list. If it is not on any shrink lists,
+ * we call dentry_free(), which schedules actual freeing of memory.
+ * Othewise freeing is left to the owner of the shrink list in question.
+ */
+static struct dentry *dentry_kill(struct dentry *dentry)
{
struct dentry *parent = NULL;
bool can_free = true;
+ if (unlikely(!lock_for_kill(dentry)))
+ return NULL;
+
/*
* The dentry is now unrecoverably dead to the world.
*/
@@ -749,43 +851,6 @@ static struct dentry *__dentry_kill(struct dentry *dentry)
}
/*
- * Lock a dentry for feeding it to __dentry_kill().
- * Called under rcu_read_lock() and dentry->d_lock; the former
- * guarantees that nothing we access will be freed under us.
- * Note that dentry is *not* protected from concurrent dentry_kill(),
- * d_delete(), etc.
- *
- * Return false if dentry is busy. Otherwise, return true and have
- * that dentry's inode locked.
- */
-
-static bool lock_for_kill(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
-
- if (unlikely(dentry->d_lockref.count))
- return false;
-
- if (!inode || likely(spin_trylock(&inode->i_lock)))
- return true;
-
- do {
- spin_unlock(&dentry->d_lock);
- spin_lock(&inode->i_lock);
- spin_lock(&dentry->d_lock);
- if (likely(inode == dentry->d_inode))
- break;
- spin_unlock(&inode->i_lock);
- inode = dentry->d_inode;
- } while (inode);
- if (likely(!dentry->d_lockref.count))
- return true;
- if (inode)
- spin_unlock(&inode->i_lock);
- return false;
-}
-
-/*
* Decide if dentry is worth retaining. Usually this is called with dentry
* locked; if not locked, we are more limited and might not be able to tell
* without a lock. False in this case means "punt to locked path and recheck".
@@ -860,17 +925,17 @@ EXPORT_SYMBOL(d_mark_dontcache);
* If unsuccessful, we return false, having already taken the dentry lock.
* In that case refcount is guaranteed to be zero and we have already
* decided that it's not worth keeping around.
- *
- * The caller needs to hold the RCU read lock, so that the dentry is
- * guaranteed to stay around even if the refcount goes down to zero!
*/
static inline bool fast_dput(struct dentry *dentry)
{
int ret;
/*
- * try to decrement the lockref optimistically.
+ * Try to decrement the lockref optimistically.
+ * RCU read lock held so that dentry is guaranteed to stay around
+ * even if the refcount goes down to zero.
*/
+ rcu_read_lock();
ret = lockref_put_return(&dentry->d_lockref);
/*
@@ -880,6 +945,7 @@ static inline bool fast_dput(struct dentry *dentry)
*/
if (unlikely(ret < 0)) {
spin_lock(&dentry->d_lock);
+ rcu_read_unlock();
if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) {
spin_unlock(&dentry->d_lock);
return true;
@@ -891,8 +957,10 @@ static inline bool fast_dput(struct dentry *dentry)
/*
* If we weren't the last ref, we're done.
*/
- if (ret)
+ if (ret) {
+ rcu_read_unlock();
return true;
+ }
/*
* Can we decide that decrement of refcount is all we needed without
@@ -900,8 +968,10 @@ static inline bool fast_dput(struct dentry *dentry)
* dentry looks like it ought to be retained and there's nothing else
* to do.
*/
- if (retain_dentry(dentry, false))
+ if (retain_dentry(dentry, false)) {
+ rcu_read_unlock();
return true;
+ }
/*
* Either not worth retaining or we can't tell without the lock.
@@ -909,6 +979,7 @@ static inline bool fast_dput(struct dentry *dentry)
* but we'll need to re-check the situation after getting the lock.
*/
spin_lock(&dentry->d_lock);
+ rcu_read_unlock();
/*
* Did somebody else grab a reference to it in the meantime, and
@@ -926,21 +997,13 @@ locked:
static void finish_dput(struct dentry *dentry)
__releases(dentry->d_lock)
- __releases(RCU)
{
- while (lock_for_kill(dentry)) {
- rcu_read_unlock();
- dentry = __dentry_kill(dentry);
- if (!dentry)
- return;
+ while ((dentry = dentry_kill(dentry)) != NULL) {
if (retain_dentry(dentry, true)) {
spin_unlock(&dentry->d_lock);
return;
}
- rcu_read_lock();
}
- rcu_read_unlock();
- spin_unlock(&dentry->d_lock);
}
/*
@@ -974,11 +1037,8 @@ void dput(struct dentry *dentry)
if (!dentry)
return;
might_sleep();
- rcu_read_lock();
- if (likely(fast_dput(dentry))) {
- rcu_read_unlock();
+ if (likely(fast_dput(dentry)))
return;
- }
finish_dput(dentry);
}
EXPORT_SYMBOL(dput);
@@ -989,30 +1049,46 @@ void d_make_discardable(struct dentry *dentry)
WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT));
dentry->d_flags &= ~DCACHE_PERSISTENT;
dentry->d_lockref.count--;
- rcu_read_lock();
finish_dput(dentry);
}
EXPORT_SYMBOL(d_make_discardable);
-static void to_shrink_list(struct dentry *dentry, struct list_head *list)
+/**
+ * __move_to_shrink_list - try to place a dentry into a shrink list
+ * @dentry: dentry to try putting into shrink list
+ * @list: the list to put @dentry into.
+ * Returns: true @dentry had been placed into @list, false otherwise
+ *
+ * If @dentry is idle and not already include into a shrink list, move
+ * it into @list and return %true; otherwise do nothing and return %false.
+ *
+ * Caller must be holding @dentry->d_lock. There must have been no calls of
+ * dentry_free(@dentry) prior to the beginning of the RCU read-side critical
+ * area in which __move_to_shrink_list(@dentry, @list) is called.
+ *
+ * @list should be thread-private and eventually emptied by passing it to
+ * shrink_dentry_list().
+ */
+
+bool __move_to_shrink_list(struct dentry *dentry, struct list_head *list)
__must_hold(&dentry->d_lock)
{
- if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+ if (likely(!dentry->d_lockref.count &&
+ !(dentry->d_flags & DCACHE_SHRINK_LIST))) {
if (dentry->d_flags & DCACHE_LRU_LIST)
d_lru_del(dentry);
d_shrink_add(dentry, list);
+ return true;
}
+ return false;
}
+EXPORT_SYMBOL(__move_to_shrink_list);
void dput_to_list(struct dentry *dentry, struct list_head *list)
{
- rcu_read_lock();
- if (likely(fast_dput(dentry))) {
- rcu_read_unlock();
+ if (likely(fast_dput(dentry)))
return;
- }
- rcu_read_unlock();
- to_shrink_list(dentry, list);
+ __move_to_shrink_list(dentry, list);
spin_unlock(&dentry->d_lock);
}
@@ -1058,7 +1134,10 @@ repeat:
}
EXPORT_SYMBOL(dget_parent);
-static struct dentry * __d_find_any_alias(struct inode *inode)
+/*
+ * inode is a directory, inode->i_lock is held by the caller
+ */
+static struct dentry * __d_find_dir_alias(struct inode *inode)
{
struct dentry *alias;
@@ -1069,6 +1148,18 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
return alias;
}
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (hlist_empty(&inode->i_dentry))
+ return NULL;
+ for_each_alias(alias, inode)
+ if (dget_alias_ilocked(alias))
+ return alias;
+ return NULL;
+}
+
/**
* d_find_any_alias - find any alias for a given inode
* @inode: inode to find an alias for
@@ -1092,7 +1183,7 @@ static struct dentry *__d_find_alias(struct inode *inode)
struct dentry *alias;
if (S_ISDIR(inode->i_mode))
- return __d_find_any_alias(inode);
+ return __d_find_dir_alias(inode);
for_each_alias(alias, inode) {
spin_lock(&alias->d_lock);
@@ -1158,25 +1249,6 @@ struct dentry *d_find_alias_rcu(struct inode *inode)
return de;
}
-/**
- * d_dispose_if_unused - move unreferenced dentries to shrink list
- * @dentry: dentry in question
- * @dispose: head of shrink list
- *
- * If dentry has no external references, move it to shrink list.
- *
- * NOTE!!! The caller is responsible for preventing eviction of the dentry by
- * holding dentry->d_inode->i_lock or equivalent.
- */
-void d_dispose_if_unused(struct dentry *dentry, struct list_head *dispose)
-{
- spin_lock(&dentry->d_lock);
- if (!dentry->d_lockref.count)
- to_shrink_list(dentry, dispose);
- spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(d_dispose_if_unused);
-
/*
* Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
@@ -1187,8 +1259,12 @@ void d_prune_aliases(struct inode *inode)
struct dentry *dentry;
spin_lock(&inode->i_lock);
- for_each_alias(dentry, inode)
- d_dispose_if_unused(dentry, &dispose);
+ for_each_alias(dentry, inode) {
+ spin_lock(&dentry->d_lock);
+ if (likely(!(dentry->d_flags & DCACHE_NORCU)))
+ __move_to_shrink_list(dentry, &dispose);
+ spin_unlock(&dentry->d_lock);
+ }
spin_unlock(&inode->i_lock);
shrink_dentry_list(&dispose);
}
@@ -1196,14 +1272,8 @@ EXPORT_SYMBOL(d_prune_aliases);
static inline void shrink_kill(struct dentry *victim)
{
- do {
- rcu_read_unlock();
- victim = __dentry_kill(victim);
- rcu_read_lock();
- } while (victim && lock_for_kill(victim));
- rcu_read_unlock();
- if (victim)
- spin_unlock(&victim->d_lock);
+ while ((victim = dentry_kill(victim)) != NULL)
+ ;
}
void shrink_dentry_list(struct list_head *list)
@@ -1213,18 +1283,12 @@ void shrink_dentry_list(struct list_head *list)
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
- rcu_read_lock();
- if (!lock_for_kill(dentry)) {
- bool can_free;
- rcu_read_unlock();
- d_shrink_del(dentry);
- can_free = dentry->d_flags & DCACHE_DENTRY_KILLED;
+ d_shrink_del(dentry);
+ if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
spin_unlock(&dentry->d_lock);
- if (can_free)
- dentry_free(dentry);
+ dentry_free(dentry);
continue;
}
- d_shrink_del(dentry);
shrink_kill(dentry);
}
}
@@ -1385,6 +1449,8 @@ again:
read_seqbegin_or_lock(&rename_lock, &seq);
this_parent = parent;
spin_lock(&this_parent->d_lock);
+ if (unlikely(this_parent->d_flags & DCACHE_DENTRY_CURSOR))
+ goto out_unlock;
ret = enter(data, this_parent);
switch (ret) {
@@ -1433,14 +1499,15 @@ resume:
/*
* All done at this level ... ascend and resume the search.
*/
- rcu_read_lock();
ascend:
if (this_parent != parent) {
dentry = this_parent;
this_parent = dentry->d_parent;
+ rcu_read_lock();
spin_unlock(&dentry->d_lock);
spin_lock(&this_parent->d_lock);
+ rcu_read_unlock();
/* might go back up the wrong parent if we have had a rename. */
if (need_seqretry(&rename_lock, seq))
@@ -1448,7 +1515,6 @@ ascend:
/* go into the first sibling still alive */
hlist_for_each_entry_continue(dentry, d_sib) {
if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) {
- rcu_read_unlock();
goto resume;
}
}
@@ -1456,7 +1522,6 @@ ascend:
}
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
- rcu_read_unlock();
out_unlock:
spin_unlock(&this_parent->d_lock);
@@ -1465,7 +1530,6 @@ out_unlock:
rename_retry:
spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
BUG_ON(seq & 1);
if (!retry)
return;
@@ -1580,12 +1644,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
if (data->start == dentry)
goto out;
- if (dentry->d_flags & DCACHE_SHRINK_LIST) {
- data->found++;
- } else if (!dentry->d_lockref.count) {
- to_shrink_list(dentry, &data->dispose);
- data->found++;
- } else if (dentry->d_lockref.count < 0) {
+ if (dentry->d_lockref.count <= 0) {
+ __move_to_shrink_list(dentry, &data->dispose);
data->found++;
}
/*
@@ -1616,17 +1676,21 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
if (data->start == dentry)
goto out;
- if (!dentry->d_lockref.count) {
- if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+ if (dentry->d_lockref.count <= 0) {
+ if (!__move_to_shrink_list(dentry, &data->dispose)) {
+ /*
+ * We need an enter RCU read-side critical area that
+ * would extend past the return from d_walk() and
+ * we are in the scope of ->d_lock that will terminate
+ * before that, so we use rcu_read_lock() to bridge
+ * over to the scope of ->d_lock in d_walk() caller.
+ * The scope of rcu_read_lock() spans from here to
+ * paired rcu_read_unlock() in shrink_dcache_tree().
+ */
rcu_read_lock();
data->victim = dentry;
return D_WALK_QUIT;
}
- to_shrink_list(dentry, &data->dispose);
- } else if (dentry->d_lockref.count < 0) {
- rcu_read_lock();
- data->victim = dentry;
- return D_WALK_QUIT;
}
/*
* We can return to the caller if we have found some (this
@@ -1649,7 +1713,9 @@ out:
static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
{
for (;;) {
- struct select_data data = {.start = parent};
+ struct completion_list wait;
+ bool need_wait = false;
+ struct select_data data = { .start = parent };
INIT_LIST_HEAD(&data.dispose);
d_walk(parent, &data,
@@ -1667,30 +1733,32 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
d_walk(parent, &data, select_collect2);
if (data.victim) {
struct dentry *v = data.victim;
-
+ /*
+ * select_collect2() has picked a dentry that was
+ * either dying or on a shrink list and arranged
+ * for it to be returned to us. We are still in
+ * the RCU read-side critical area started there
+ * (rcu_read_lock() scope opened in select_collect2()),
+ * so dentry couldn't have been freed yet, but its
+ * state might've changed since we dropped ->d_lock
+ * on the way out. Switch over to ->d_lock scope
+ * and recheck the dentry state.
+ */
spin_lock(&v->d_lock);
- if (v->d_lockref.count < 0 &&
- !(v->d_flags & DCACHE_DENTRY_KILLED)) {
- struct completion_list wait;
- // It's busy dying; have it notify us once
- // it becomes invisible to d_walk().
- d_add_waiter(v, &wait);
- spin_unlock(&v->d_lock);
- rcu_read_unlock();
- if (!list_empty(&data.dispose))
- shrink_dentry_list(&data.dispose);
- wait_for_completion(&wait.completion);
- continue;
- }
- if (!lock_for_kill(v)) {
+ rcu_read_unlock();
+
+ if (unlikely(v->d_lockref.count < 0)) {
+ // It's doomed; if it isn't dead yet, notify us
+ // once it becomes invisible to d_walk().
+ need_wait = d_add_waiter(v, &wait);
spin_unlock(&v->d_lock);
- rcu_read_unlock();
} else {
shrink_kill(v);
}
}
- if (!list_empty(&data.dispose))
- shrink_dentry_list(&data.dispose);
+ shrink_dentry_list(&data.dispose);
+ if (unlikely(need_wait))
+ wait_for_completion(&wait.completion);
}
}
@@ -1743,9 +1811,30 @@ void shrink_dcache_for_umount(struct super_block *sb)
sb->s_root = NULL;
do_one_tree(dentry);
- while (!hlist_bl_empty(&sb->s_roots)) {
- dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
- do_one_tree(dentry);
+ for (;;) {
+ spin_lock(&sb->s_roots_lock);
+ dentry = hlist_entry_safe(sb->s_roots.first,
+ struct dentry, d_sib);
+ if (!dentry) {
+ spin_unlock(&sb->s_roots_lock);
+ break;
+ }
+ rcu_read_lock();
+ spin_unlock(&sb->s_roots_lock);
+ spin_lock(&dentry->d_lock);
+ rcu_read_unlock();
+ if (unlikely(dentry->d_lockref.count < 0)) {
+ struct completion_list wait;
+ bool need_wait = d_add_waiter(dentry, &wait);
+
+ spin_unlock(&dentry->d_lock);
+ if (need_wait)
+ wait_for_completion(&wait.completion);
+ } else {
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ do_one_tree(dentry);
+ }
}
}
@@ -1915,7 +2004,7 @@ struct dentry *d_alloc_cursor(struct dentry * parent)
{
struct dentry *dentry = d_alloc_anon(parent->d_sb);
if (dentry) {
- dentry->d_flags |= DCACHE_DENTRY_CURSOR;
+ dentry->d_flags |= DCACHE_DENTRY_CURSOR | DCACHE_NORCU;
dentry->d_parent = dget(parent);
}
return dentry;
@@ -2166,9 +2255,9 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
__d_set_inode_and_type(new, inode, add_flags);
hlist_add_head(&new->d_alias, &inode->i_dentry);
if (!disconnected) {
- hlist_bl_lock(&sb->s_roots);
- hlist_bl_add_head(&new->d_hash, &sb->s_roots);
- hlist_bl_unlock(&sb->s_roots);
+ spin_lock(&sb->s_roots_lock);
+ hlist_add_head(&new->d_sib, &sb->s_roots);
+ spin_unlock(&sb->s_roots_lock);
}
spin_unlock(&new->d_lock);
spin_unlock(&inode->i_lock);
@@ -2260,8 +2349,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
return found;
}
if (d_in_lookup(dentry)) {
- found = d_alloc_parallel(dentry->d_parent, name,
- dentry->d_wait);
+ found = d_alloc_parallel(dentry->d_parent, name);
if (IS_ERR(found) || !d_in_lookup(found)) {
iput(inode);
return found;
@@ -2648,32 +2736,24 @@ static inline unsigned start_dir_add(struct inode *dir)
}
}
-static inline void end_dir_add(struct inode *dir, unsigned int n,
- wait_queue_head_t *d_wait)
+static inline void end_dir_add(struct inode *dir, unsigned int n)
{
smp_store_release(&dir->i_dir_seq, n + 2);
preempt_enable_nested();
- if (wq_has_sleeper(d_wait))
- wake_up_all(d_wait);
}
static void d_wait_lookup(struct dentry *dentry)
{
- if (d_in_lookup(dentry)) {
- DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(dentry->d_wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&dentry->d_lock);
- schedule();
- spin_lock(&dentry->d_lock);
- } while (d_in_lookup(dentry));
+ if (likely(d_in_lookup(dentry))) {
+ dentry->d_flags |= DCACHE_LOOKUP_WAITERS;
+ wait_var_event_spinlock(&dentry->d_flags,
+ !d_in_lookup(dentry),
+ &dentry->d_lock);
}
}
struct dentry *d_alloc_parallel(struct dentry *parent,
- const struct qstr *name,
- wait_queue_head_t *wq)
+ const struct qstr *name)
{
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2694,38 +2774,33 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
spin_unlock(&parent->d_lock);
retry:
- rcu_read_lock();
seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
r_seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
dentry = __d_lookup_rcu(parent, name, &d_seq);
if (unlikely(dentry)) {
if (!lockref_get_not_dead(&dentry->d_lockref)) {
rcu_read_unlock();
goto retry;
}
+ rcu_read_unlock();
if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
- rcu_read_unlock();
dput(dentry);
goto retry;
}
- rcu_read_unlock();
dput(new);
return dentry;
}
- if (unlikely(read_seqretry(&rename_lock, r_seq))) {
- rcu_read_unlock();
+ rcu_read_unlock();
+ if (unlikely(read_seqretry(&rename_lock, r_seq)))
goto retry;
- }
- if (unlikely(seq & 1)) {
- rcu_read_unlock();
+ if (unlikely(seq & 1))
goto retry;
- }
hlist_bl_lock(b);
if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
hlist_bl_unlock(b);
- rcu_read_unlock();
goto retry;
}
/*
@@ -2742,19 +2817,20 @@ retry:
continue;
if (!d_same_name(dentry, parent, name))
continue;
+ rcu_read_lock();
hlist_bl_unlock(b);
+ spin_lock(&dentry->d_lock);
+ rcu_read_unlock();
/* now we can try to grab a reference */
- if (!lockref_get_not_dead(&dentry->d_lockref)) {
- rcu_read_unlock();
+ if (unlikely(dentry->d_lockref.count < 0)) {
+ spin_unlock(&dentry->d_lock);
goto retry;
}
-
- rcu_read_unlock();
/*
* somebody is likely to be still doing lookup for it;
- * wait for them to finish
+ * pin it and wait for them to finish
*/
- spin_lock(&dentry->d_lock);
+ dget_dlock(dentry);
d_wait_lookup(dentry);
/*
* it's not in-lookup anymore; in principle we should repeat
@@ -2775,8 +2851,6 @@ retry:
dput(new);
return dentry;
}
- rcu_read_unlock();
- new->d_wait = wq;
hlist_bl_add_head(&new->d_in_lookup_hash, b);
hlist_bl_unlock(b);
return new;
@@ -2788,13 +2862,26 @@ mismatch:
EXPORT_SYMBOL(d_alloc_parallel);
/*
- * - Unhash the dentry
- * - Retrieve and clear the waitqueue head in dentry
- * - Return the waitqueue head
+ * Move dentry from in-lookup state to busy-negative one.
+ *
+ * From now on d_in_lookup(dentry) will return false and dentry is gone from
+ * in-lookup hash.
+ *
+ * Anyone who had been waiting on it in d_alloc_parallel() is free to
+ * proceed after that. Note that waking such waiters up is left to
+ * the callers; PREEMPT_RT kernels can't have that wakeup done while
+ * in write-side critical area for ->i_dir_seq, so it's done by calling
+ * __d_wake_in_lookup_waiters() once it's safe to do so.
+ *
+ * Both __d_lookup_unhash() and __d_wake_in_lookup_waiters() should
+ * be called within the same ->d_lock scope. PAR_LOOKUP is cleared
+ * here, while LOOKUP_WAITERS (set by somebody finding dentry in
+ * the in-lookup hash and setting down to wait) is checked and cleared
+ * in __d_wake_in_lookup_waiters(). Both are gone by the end of
+ * ->d_lock scope.
*/
-static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
+static void __d_lookup_unhash(struct dentry *dentry)
{
- wait_queue_head_t *d_wait;
struct hlist_bl_head *b;
lockdep_assert_held(&dentry->d_lock);
@@ -2803,18 +2890,23 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
hlist_bl_lock(b);
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
__hlist_bl_del(&dentry->d_in_lookup_hash);
- d_wait = dentry->d_wait;
- dentry->d_wait = NULL;
hlist_bl_unlock(b);
dentry->waiters = NULL;
- INIT_LIST_HEAD(&dentry->d_lru);
- return d_wait;
+}
+
+static inline void __d_wake_in_lookup_waiters(struct dentry *dentry)
+{
+ if (dentry->d_flags & DCACHE_LOOKUP_WAITERS) {
+ wake_up_var_locked(&dentry->d_flags, &dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_LOOKUP_WAITERS;
+ }
}
void __d_lookup_unhash_wake(struct dentry *dentry)
{
spin_lock(&dentry->d_lock);
- wake_up_all(__d_lookup_unhash(dentry));
+ __d_lookup_unhash(dentry);
+ __d_wake_in_lookup_waiters(dentry);
spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(__d_lookup_unhash_wake);
@@ -2824,14 +2916,13 @@ EXPORT_SYMBOL(__d_lookup_unhash_wake);
static inline void __d_add(struct dentry *dentry, struct inode *inode,
const struct dentry_operations *ops)
{
- wait_queue_head_t *d_wait;
struct inode *dir = NULL;
unsigned n;
spin_lock(&dentry->d_lock);
if (unlikely(d_in_lookup(dentry))) {
dir = dentry->d_parent->d_inode;
n = start_dir_add(dir);
- d_wait = __d_lookup_unhash(dentry);
+ __d_lookup_unhash(dentry);
}
if (unlikely(ops))
d_set_d_op(dentry, ops);
@@ -2844,8 +2935,10 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode,
fsnotify_update_flags(dentry);
}
__d_rehash(dentry);
- if (dir)
- end_dir_add(dir, n, d_wait);
+ if (dir) {
+ end_dir_add(dir, n);
+ __d_wake_in_lookup_waiters(dentry);
+ }
spin_unlock(&dentry->d_lock);
if (inode)
spin_unlock(&inode->i_lock);
@@ -2958,7 +3051,6 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
struct dentry *old_parent, *p;
- wait_queue_head_t *d_wait;
struct inode *dir = NULL;
unsigned n;
@@ -2989,7 +3081,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
if (unlikely(d_in_lookup(target))) {
dir = target->d_parent->d_inode;
n = start_dir_add(dir);
- d_wait = __d_lookup_unhash(target);
+ __d_lookup_unhash(target);
}
write_seqcount_begin(&dentry->d_seq);
@@ -3028,9 +3120,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
write_seqcount_end(&target->d_seq);
write_seqcount_end(&dentry->d_seq);
- if (dir)
- end_dir_add(dir, n, d_wait);
-
+ if (dir) {
+ end_dir_add(dir, n);
+ __d_wake_in_lookup_waiters(target);
+ }
if (dentry->d_parent != old_parent)
spin_unlock(&dentry->d_parent->d_lock);
if (dentry != old_parent)
@@ -3151,7 +3244,7 @@ struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry,
security_d_instantiate(dentry, inode);
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode)) {
- struct dentry *new = __d_find_any_alias(inode);
+ struct dentry *new = __d_find_dir_alias(inode);
if (unlikely(new)) {
/* The reference to new ensures it remains an alias */
spin_unlock(&inode->i_lock);
@@ -3176,6 +3269,12 @@ struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry,
}
dput(old_parent);
} else {
+ if (unlikely(!hlist_unhashed(&new->d_sib))) {
+ // secondary root getting spliced
+ spin_lock(&new->d_lock);
+ unlink_secondary_root(new);
+ spin_unlock(&new->d_lock);
+ }
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index fbd45e7ae706..eafd99507afe 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -53,10 +53,10 @@ find_acceptable_alias(struct dentry *result,
inode = result->d_inode;
spin_lock(&inode->i_lock);
for_each_alias(dentry, inode) {
- dget(dentry);
+ if (!dget_alias_ilocked(dentry))
+ continue;
spin_unlock(&inode->i_lock);
- if (toput)
- dput(toput);
+ dput(toput);
if (dentry != result && acceptable(context, dentry)) {
dput(result);
return dentry;
@@ -66,8 +66,7 @@ find_acceptable_alias(struct dentry *result,
}
spin_unlock(&inode->i_lock);
- if (toput)
- dput(toput);
+ dput(toput);
return NULL;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 3c08832aa387..c68b8c0a4097 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -402,6 +402,8 @@ static struct file *alloc_file(const struct path *path, int flags,
static inline int alloc_path_pseudo(const char *name, struct inode *inode,
struct vfsmount *mnt, struct path *path)
{
+ if (WARN_ON_ONCE(S_ISDIR(inode->i_mode)))
+ return -EINVAL;
path->dentry = d_alloc_pseudo(mnt->mnt_sb, &QSTR(name));
if (!path->dentry)
return -ENOMEM;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b658b6baf72f..d8e8ea7280bc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -177,8 +177,8 @@ static void fuse_dentry_tree_work(struct work_struct *work)
spin_lock(&fd->dentry->d_lock);
/* If dentry is still referenced, let next dput release it */
fd->dentry->d_flags |= DCACHE_OP_DELETE;
+ __move_to_shrink_list(fd->dentry, &dispose);
spin_unlock(&fd->dentry->d_lock);
- d_dispose_if_unused(fd->dentry, &dispose);
if (need_resched()) {
spin_unlock(&dentry_hash[i].lock);
cond_resched();
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index db5ae8ec1030..a2361f1d9905 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -164,7 +164,6 @@ static int fuse_direntplus_link(struct file *file,
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
int epoch;
if (!o->nodeid) {
@@ -201,7 +200,7 @@ static int fuse_direntplus_link(struct file *file,
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
- dentry = d_alloc_parallel(parent, &name, &wq);
+ dentry = d_alloc_parallel(parent, &name);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
diff --git a/fs/namei.c b/fs/namei.c
index 8340dc4108be..5cc9f0f466b8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1896,13 +1896,12 @@ static struct dentry *__lookup_slow(const struct qstr *name,
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
return ERR_PTR(-ENOENT);
again:
- dentry = d_alloc_parallel(dir, name, &wq);
+ dentry = d_alloc_parallel(dir, name);
if (IS_ERR(dentry))
return dentry;
if (unlikely(!d_in_lookup(dentry))) {
@@ -4413,7 +4412,6 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode)))
return ERR_PTR(-ENOENT);
@@ -4422,7 +4420,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
dentry = d_lookup(dir, &nd->last);
for (;;) {
if (!dentry) {
- dentry = d_alloc_parallel(dir, &nd->last, &wq);
+ dentry = d_alloc_parallel(dir, &nd->last);
if (IS_ERR(dentry))
return dentry;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1b9c368fb133..2f5f26f93238 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -726,7 +726,6 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
unsigned long dir_verifier)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
struct inode *inode;
@@ -755,7 +754,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
dentry = d_lookup(parent, &filename);
again:
if (!dentry) {
- dentry = d_alloc_parallel(parent, &filename, &wq);
+ dentry = d_alloc_parallel(parent, &filename);
if (IS_ERR(dentry))
return;
}
@@ -2106,7 +2105,6 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
@@ -2162,7 +2160,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
d_drop(dentry);
switched = true;
dentry = d_alloc_parallel(dentry->d_parent,
- &dentry->d_name, &wq);
+ &dentry->d_name);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (unlikely(!d_in_lookup(dentry)))
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index eef0736beb67..ff7424bc4bec 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -33,35 +33,6 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
/*
- * Set the superblock root dentry.
- * Note that this function frees the inode in case of error.
- */
-static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *inode)
-{
- /* The mntroot acts as the dummy root dentry for this superblock */
- if (sb->s_root == NULL) {
- sb->s_root = d_make_root(inode);
- if (sb->s_root == NULL)
- return -ENOMEM;
- ihold(inode);
- /*
- * Ensure that this dentry is invisible to d_find_alias().
- * Otherwise, it may be spliced into the tree by
- * d_splice_alias if a parent directory from the same
- * filesystem gets mounted at a later time.
- * This again causes shrink_dcache_for_umount_subtree() to
- * Oops, since the test for IS_ROOT() will fail.
- */
- spin_lock(&d_inode(sb->s_root)->i_lock);
- spin_lock(&sb->s_root->d_lock);
- hlist_del_init(&sb->s_root->d_alias);
- spin_unlock(&sb->s_root->d_lock);
- spin_unlock(&d_inode(sb->s_root)->i_lock);
- }
- return 0;
-}
-
-/*
* get a root dentry from the root filehandle
*/
int nfs_get_root(struct super_block *s, struct fs_context *fc)
@@ -99,10 +70,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
goto out_fattr;
}
- error = nfs_superblock_set_dummy_root(s, inode);
- if (error != 0)
- goto out_fattr;
-
/* root dentries normally start off anonymous and get spliced in later
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
@@ -123,6 +90,8 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
name = NULL;
}
spin_unlock(&root->d_lock);
+ if (!s->s_root)
+ s->s_root = dget(root);
fc->root = root;
if (server->caps & NFS_CAP_SECURITY_LABEL)
kflags |= SECURITY_LSM_NATIVE_LABELS;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index df3ca4669df6..43ea897943c0 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -124,7 +124,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf
struct dentry *alias;
down_read_non_owner(&NFS_I(dir)->rmdir_sem);
- alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
+ alias = d_alloc_parallel(dentry->d_parent, &data->args.name);
if (IS_ERR(alias)) {
up_read_non_owner(&NFS_I(dir)->rmdir_sem);
return 0;
@@ -185,7 +185,6 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
data->cred = get_current_cred();
data->res.dir_attr = &data->dir_attr;
- init_waitqueue_head(&data->wq);
status = -EBUSY;
spin_lock(&dentry->d_lock);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b67e8c3605fb..aae47c676f0b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2111,8 +2111,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
goto end_instantiate;
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- child = d_alloc_parallel(dir, &qname, &wq);
+ child = d_alloc_parallel(dir, &qname);
if (IS_ERR(child))
goto end_instantiate;
if (d_in_lookup(child)) {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 49ab74e0bfde..04a382178c65 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -692,8 +692,7 @@ static bool proc_sys_fill_cache(struct file *file,
child = d_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- child = d_alloc_parallel(dir, &qname, &wq);
+ child = d_alloc_parallel(dir, &qname);
if (IS_ERR(child))
return false;
if (d_in_lookup(child)) {
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index e860fa08b5e3..1ff77f3d1de0 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -73,7 +73,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions;
bool reparse_need_reval = false;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
int rc;
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
@@ -105,7 +104,7 @@ retry:
(fattr->cf_flags & CIFS_FATTR_NEED_REVAL))
return;
- dentry = d_alloc_parallel(parent, name, &wq);
+ dentry = d_alloc_parallel(parent, name);
}
if (IS_ERR(dentry))
return;
diff --git a/fs/super.c b/fs/super.c
index a117e1f0dfe3..a8fd61136aaf 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -359,6 +359,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_iflags |= SB_I_NODEV;
INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_roots);
+ spin_lock_init(&s->s_roots_lock);
mutex_init(&s->s_sync_lock);
INIT_LIST_HEAD(&s->s_inodes);
spin_lock_init(&s->s_inode_list_lock);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 2577c05f84ec..4b1ff99608e0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -116,10 +116,7 @@ struct dentry {
* possible!
*/
- union {
- struct list_head d_lru; /* LRU list */
- wait_queue_head_t *d_wait; /* in-lookup ones only */
- };
+ struct list_head d_lru; /* LRU list */
struct hlist_node d_sib; /* child of parent list */
struct hlist_head d_children; /* our children */
/*
@@ -210,6 +207,9 @@ enum dentry_flags {
DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */
DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */
DCACHE_CANT_MOUNT = BIT(8),
+ DCACHE_LOOKUP_WAITERS = BIT(9), /* A thread is waiting for
+ * PAR_LOOKUP to clear
+ */
DCACHE_SHRINK_LIST = BIT(10),
DCACHE_OP_WEAK_REVALIDATE = BIT(11),
/*
@@ -256,8 +256,7 @@ extern void d_delete(struct dentry *);
/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
extern struct dentry * d_alloc_anon(struct super_block *);
-extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
- wait_queue_head_t *);
+extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
/* weird procfs mess; *NOT* exported */
extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *,
@@ -281,7 +280,7 @@ extern void d_tmpfile(struct file *, struct inode *);
extern struct dentry *d_find_alias(struct inode *);
extern void d_prune_aliases(struct inode *);
-extern void d_dispose_if_unused(struct dentry *, struct list_head *);
+extern bool __move_to_shrink_list(struct dentry *, struct list_head *);
extern void shrink_dentry_list(struct list_head *);
extern struct dentry *d_find_alias_rcu(struct inode *);
@@ -366,6 +365,24 @@ static inline struct dentry *dget(struct dentry *dentry)
return dentry;
}
+/* dentry->d_inode->i_lock must be held by caller */
+static inline bool dget_alias_ilocked(struct dentry *dentry)
+{
+ if (likely(!(READ_ONCE(dentry->d_flags) & DCACHE_NORCU))) {
+ lockref_get(&dentry->d_lockref);
+ return true;
+ }
+ // NORCU dentries with zero refcount MUST NOT be grabbed
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_lockref.count > 0) {
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ return true;
+ }
+ spin_unlock(&dentry->d_lock);
+ return false;
+}
+
extern struct dentry *dget_parent(struct dentry *dentry);
/**
diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h
index aa86e4944dbf..ef7941e9dc79 100644
--- a/include/linux/fs/super_types.h
+++ b/include/linux/fs/super_types.h
@@ -162,7 +162,8 @@ struct super_block {
struct unicode_map *s_encoding;
__u16 s_encoding_flags;
#endif
- struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
+ struct hlist_head s_roots; /* alternate root dentries for NFS */
+ spinlock_t s_roots_lock;
struct mount *s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
struct file *s_bdev_file;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 83ee991cde2b..35ea18a40b66 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1745,7 +1745,6 @@ struct nfs_unlinkdata {
struct nfs_removeargs args;
struct nfs_removeres res;
struct dentry *dentry;
- wait_queue_head_t wq;
const struct cred *cred;
struct nfs_fattr dir_attr;
long timeout;