summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2026-03-11 11:28:37 +0800
committerJens Axboe <axboe@kernel.dk>2026-03-11 08:30:30 -0600
commit3dbaacf6ab68f81e3375fe769a2ecdbd3ce386fd (patch)
treef5b83097242860abc02d0c5b6dcbdd84b6a7cc36
parentdaa6c79858e9ca75c548452bf71db8a9e61bde42 (diff)
blk-cgroup: wait for blkcg cleanup before initializing new disk
When a queue is shared across disk rebind (e.g., SCSI unbind/bind), the previous disk's blkcg state is cleaned up asynchronously via disk_release() -> blkcg_exit_disk(). If the new disk's blkcg_init_disk() runs before that cleanup finishes, we may overwrite q->root_blkg while the old one is still alive, and radix_tree_insert() in blkg_create() fails with -EEXIST because the old blkg entries still occupy the same queue id slot in blkcg->blkg_tree. This causes the sd probe to fail with -ENOMEM. Fix it by waiting in blkcg_init_disk() for root_blkg to become NULL, which indicates the previous disk's blkcg cleanup has completed. Fixes: 1059699f87eb ("block: move blkcg initialization/destroy into disk allocation/release handler") Cc: Yi Zhang <yi.zhang@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://patch.msgid.link/20260311032837.2368714-1-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-cgroup.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b70096497d38..2d7b18eb7291 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -24,6 +24,7 @@
#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/wait_bit.h>
#include <linux/atomic.h>
#include <linux/ctype.h>
#include <linux/resume_user_mode.h>
@@ -611,6 +612,8 @@ restart:
q->root_blkg = NULL;
spin_unlock_irq(&q->queue_lock);
+
+ wake_up_var(&q->root_blkg);
}
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
@@ -1498,6 +1501,18 @@ int blkcg_init_disk(struct gendisk *disk)
struct blkcg_gq *new_blkg, *blkg;
bool preloaded;
+ /*
+ * If the queue is shared across disk rebind (e.g., SCSI), the
+ * previous disk's blkcg state is cleaned up asynchronously via
+ * disk_release() -> blkcg_exit_disk(). Wait for that cleanup to
+ * finish (indicated by root_blkg becoming NULL) before setting up
+ * new blkcg state. Otherwise, we may overwrite q->root_blkg while
+ * the old one is still alive, and radix_tree_insert() in
+ * blkg_create() will fail with -EEXIST because the old entries
+ * still occupy the same queue id slot in blkcg->blkg_tree.
+ */
+ wait_var_event(&q->root_blkg, !READ_ONCE(q->root_blkg));
+
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;