summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Cui <cuitao@kylinos.cn>2026-05-22 17:15:30 +0800
committerJens Axboe <axboe@kernel.dk>2026-05-27 08:37:47 -0600
commit6235ea3f8b8ffca0333ade0863992f3cd69592ea (patch)
tree8f3b13515a1e575e576b050feac7717aaf2a552e
parent6b2f3e4970e48e70c10111366f59f908f2ea6f96 (diff)
blk-throttle: schedule parent dispatch in tg_flush_bios()
tg_flush_bios() schedules pending_timer on the child tg's own service_queue, which causes throtl_pending_timer_fn() to dispatch from the child's pending_tree. For leaf cgroups this tree is empty, so the timer fires and exits without dispatching the throttled bio. The throttled bio sits in the parent's pending_tree with disptime set to jiffies (THROTL_TG_CANCELING zeroes all dispatch times), but the parent's timer is never explicitly rescheduled. The bio only gets dispatched when the parent timer eventually fires at its previously scheduled expiry. Fix by calling throtl_schedule_next_dispatch(sq->parent_sq, true) instead, matching what tg_set_limit() already does. This forces the parent's dispatch cycle to run immediately and flush all canceling bios without waiting for a stale timer. For the device deletion path (blk_throtl_cancel_bios), directly complete throttled bios with EIO via bio_io_error() instead of dispatching them through the timer -> work -> submission chain. This avoids a race with the SCSI state machine where bios can reach the SCSI layer while the device is in SDEV_CANCEL state, causing ENODEV instead of the expected EIO. Reported-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> Link: https://lore.kernel.org/all/ag2owaQQoigp_fSV@shinmob/ Signed-off-by: Tao Cui <cuitao@kylinos.cn> Link: https://patch.msgid.link/20260522091530.1901437-1-cuitao@kylinos.cn Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-throttle.c51
1 files changed, 49 insertions, 2 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index cabf91f0d0dc..88986dde1e18 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1649,7 +1649,7 @@ static void tg_flush_bios(struct throtl_grp *tg)
*/
tg_update_disptime(tg);
- throtl_schedule_pending_timer(sq, jiffies + 1);
+ throtl_schedule_next_dispatch(sq->parent_sq, true);
}
static void throtl_pd_offline(struct blkg_policy_data *pd)
@@ -1668,11 +1668,52 @@ struct blkcg_policy blkcg_policy_throtl = {
.pd_free_fn = throtl_pd_free,
};
+static void tg_cancel_writeback_bios(struct throtl_grp *tg,
+ struct bio_list *cancel_bios)
+{
+ struct throtl_service_queue *sq = &tg->service_queue;
+ struct throtl_data *td = sq_to_td(sq);
+ int rw;
+
+ if (tg->flags & THROTL_TG_CANCELING)
+ return;
+ tg->flags |= THROTL_TG_CANCELING;
+
+ for (rw = READ; rw <= WRITE; rw++) {
+ struct throtl_qnode *qn, *tmp;
+ unsigned int nr_bios = 0;
+
+ list_for_each_entry_safe(qn, tmp, &sq->queued[rw], node) {
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&qn->bios_iops))) {
+ sq->nr_queued_iops[rw]--;
+ bio_list_add(&cancel_bios[rw], bio);
+ nr_bios++;
+ }
+ while ((bio = bio_list_pop(&qn->bios_bps))) {
+ sq->nr_queued_bps[rw]--;
+ bio_list_add(&cancel_bios[rw], bio);
+ nr_bios++;
+ }
+
+ list_del_init(&qn->node);
+ blkg_put(tg_to_blkg(qn->tg));
+ }
+
+ td->nr_queued[rw] -= nr_bios;
+ }
+
+ throtl_dequeue_tg(tg);
+}
+
void blk_throtl_cancel_bios(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct cgroup_subsys_state *pos_css;
struct blkcg_gq *blkg;
+ struct bio_list cancel_bios[2] = { };
+ int rw;
if (!blk_throtl_activated(q))
return;
@@ -1693,10 +1734,16 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
* Cancel bios here to ensure no bios are inflight after
* del_gendisk.
*/
- tg_flush_bios(blkg_to_tg(blkg));
+ tg_cancel_writeback_bios(blkg_to_tg(blkg), cancel_bios);
}
rcu_read_unlock();
spin_unlock_irq(&q->queue_lock);
+
+ for (rw = READ; rw <= WRITE; rw++) {
+ struct bio *bio;
+ while ((bio = bio_list_pop(&cancel_bios[rw])))
+ bio_io_error(bio);
+ }
}
static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw)