diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-09 08:53:24 +0900 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-09 08:53:24 +0900 |
| commit | 4482ebb2970efa58173075c101426b2f3af40b41 (patch) | |
| tree | f2e39afc0e4f305d9a6597464ea3efbb50c218d6 /block | |
| parent | 70e3083ec686100682c146346efc2b3780d717df (diff) | |
| parent | 0f45353dd48037af61f70df3468d25ca46afe909 (diff) | |
Merge tag 'block-6.19-20251208' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block updates from Jens Axboe:
"Followup set of fixes and updates for block for the 6.19 merge window.
NVMe had some late minute debates which lead to dropping some patches
from that tree, which is why the initial PR didn't have NVMe included.
It's here now. This pull request contains:
- NVMe pull request via Keith:
- Subsystem usage cleanups (Max)
- Endpoint device fixes (Shin'ichiro)
- Debug statements (Gerd)
- FC fabrics cleanups and fixes (Daniel)
- Consistent alloc API usages (Israel)
- Code comment updates (Chu)
- Authentication retry fix (Justin)
- Fix a memory leak in the discard ioctl code, if the task is being
interrupted by a signal at just the wrong time
- Zoned write plugging fixes
- Add ioctls for for persistent reservations
- Enable per-cpu bio caching by default
- Various little fixes and tweaks"
* tag 'block-6.19-20251208' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (27 commits)
nvme-fabrics: add ENOKEY to no retry criteria for authentication failures
nvme-auth: use kvfree() for memory allocated with kvcalloc()
nvmet-tcp: use kvcalloc for commands array
nvmet-rdma: use kvcalloc for commands and responses arrays
nvme: fix typo error in nvme target
nvmet-fc: use pr_* print macros instead of dev_*
nvmet-fcloop: remove unused lsdir member.
nvmet-fcloop: check all request and response have been processed
nvme-fc: check all request and response have been processed
block: fix memory leak in __blkdev_issue_zero_pages
block: fix comment for op_is_zone_mgmt() to include RESET_ALL
block: Clear BLK_ZONE_WPLUG_PLUGGED when aborting plugged BIOs
blk-mq: Abort suspend when wakeup events are pending
blk-mq: add blk_rq_nr_bvec() helper
block: add IOC_PR_READ_RESERVATION ioctl
block: add IOC_PR_READ_KEYS ioctl
nvme: reject invalid pr_read_keys() num_keys values
scsi: sd: reject invalid pr_read_keys() num_keys values
block: enable per-cpu bio cache by default
block: use bio_alloc_bioset for passthru IO by default
...
Diffstat (limited to 'block')
| -rw-r--r-- | block/bio.c | 26 | ||||
| -rw-r--r-- | block/blk-lib.c | 6 | ||||
| -rw-r--r-- | block/blk-map.c | 90 | ||||
| -rw-r--r-- | block/blk-mq.c | 18 | ||||
| -rw-r--r-- | block/blk-zoned.c | 4 | ||||
| -rw-r--r-- | block/fops.c | 4 | ||||
| -rw-r--r-- | block/ioctl.c | 84 |
7 files changed, 155 insertions, 77 deletions
diff --git a/block/bio.c b/block/bio.c index 7b13bdf72de0..fa5ff36b443f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -517,20 +517,18 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0)) return NULL; - if (opf & REQ_ALLOC_CACHE) { - if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { - bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, - gfp_mask, bs); - if (bio) - return bio; - /* - * No cached bio available, bio returned below marked with - * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache. - */ - } else { - opf &= ~REQ_ALLOC_CACHE; - } - } + if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { + opf |= REQ_ALLOC_CACHE; + bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, + gfp_mask, bs); + if (bio) + return bio; + /* + * No cached bio available, bio returned below marked with + * REQ_ALLOC_CACHE to participate in per-cpu alloc cache. + */ + } else + opf &= ~REQ_ALLOC_CACHE; /* * submit_bio_noacct() converts recursion to iteration; this means if diff --git a/block/blk-lib.c b/block/blk-lib.c index 19e0203cc18a..9e2cc58f881f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -202,13 +202,13 @@ static void __blkdev_issue_zero_pages(struct block_device *bdev, unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); struct bio *bio; - bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); - bio->bi_iter.bi_sector = sector; - if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) break; + bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); + bio->bi_iter.bi_sector = sector; + do { unsigned int len; diff --git a/block/blk-map.c b/block/blk-map.c index 17a1dc288678..4533094d9458 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -37,6 +37,25 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, return bmd; } +static inline void blk_mq_map_bio_put(struct bio *bio) +{ + bio_put(bio); +} + +static struct bio *blk_rq_map_bio_alloc(struct request *rq, + unsigned int nr_vecs, gfp_t gfp_mask) +{ + struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL; + struct bio *bio; + + bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask, + &fs_bio_set); + if (!bio) + return NULL; + + return bio; +} + /** * bio_copy_from_iter - copy all pages from iov_iter to bio * @bio: The &struct bio which describes the I/O as destination @@ -154,10 +173,9 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); ret = -ENOMEM; - bio = bio_kmalloc(nr_pages, gfp_mask); + bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask); if (!bio) goto out_bmd; - bio_init_inline(bio, NULL, nr_pages, req_op(rq)); if (map_data) { nr_pages = 1U << map_data->page_order; @@ -233,43 +251,12 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, cleanup: if (!map_data) bio_free_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); out_bmd: kfree(bmd); return ret; } -static void blk_mq_map_bio_put(struct bio *bio) -{ - if (bio->bi_opf & REQ_ALLOC_CACHE) { - bio_put(bio); - } else { - bio_uninit(bio); - kfree(bio); - } -} - -static struct bio *blk_rq_map_bio_alloc(struct request *rq, - unsigned int nr_vecs, gfp_t gfp_mask) -{ - struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL; - struct bio *bio; - - if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) { - bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask, - &fs_bio_set); - if (!bio) - return NULL; - } else { - bio = bio_kmalloc(nr_vecs, gfp_mask); - if (!bio) - return NULL; - bio_init_inline(bio, bdev, nr_vecs, req_op(rq)); - } - return bio; -} - static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { @@ -318,25 +305,23 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio) static void bio_map_kern_endio(struct bio *bio) { bio_invalidate_vmalloc_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); } -static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op, +static struct bio *bio_map_kern(struct request *rq, void *data, unsigned int len, gfp_t gfp_mask) { unsigned int nr_vecs = bio_add_max_vecs(data, len); struct bio *bio; - bio = bio_kmalloc(nr_vecs, gfp_mask); + bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); - bio_init_inline(bio, NULL, nr_vecs, op); + if (is_vmalloc_addr(data)) { bio->bi_private = data; if (!bio_add_vmalloc(bio, data, len)) { - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); return ERR_PTR(-EINVAL); } } else { @@ -349,8 +334,7 @@ static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op, static void bio_copy_kern_endio(struct bio *bio) { bio_free_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); } static void bio_copy_kern_endio_read(struct bio *bio) @@ -369,6 +353,7 @@ static void bio_copy_kern_endio_read(struct bio *bio) /** * bio_copy_kern - copy kernel address into bio + * @rq: request to fill * @data: pointer to buffer to copy * @len: length in bytes * @op: bio/request operation @@ -377,9 +362,10 @@ static void bio_copy_kern_endio_read(struct bio *bio) * copy the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, +static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int len, gfp_t gfp_mask) { + enum req_op op = req_op(rq); unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long start = kaddr >> PAGE_SHIFT; @@ -394,10 +380,9 @@ static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, return ERR_PTR(-EINVAL); nr_pages = end - start; - bio = bio_kmalloc(nr_pages, gfp_mask); + bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); - bio_init_inline(bio, NULL, nr_pages, op); while (len) { struct page *page; @@ -431,8 +416,7 @@ static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, cleanup: bio_free_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); return ERR_PTR(-ENOMEM); } @@ -679,18 +663,16 @@ int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, return -EINVAL; if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf)) - bio = bio_copy_kern(kbuf, len, req_op(rq), gfp_mask); + bio = bio_copy_kern(rq, kbuf, len, gfp_mask); else - bio = bio_map_kern(kbuf, len, req_op(rq), gfp_mask); + bio = bio_map_kern(rq, kbuf, len, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); ret = blk_rq_append_bio(rq, bio); - if (unlikely(ret)) { - bio_uninit(bio); - kfree(bio); - } + if (unlikely(ret)) + blk_mq_map_bio_put(bio); return ret; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/block/blk-mq.c b/block/blk-mq.c index 4e96bb246247..bd8b11c472a2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -23,6 +23,7 @@ #include <linux/cache.h> #include <linux/sched/topology.h> #include <linux/sched/signal.h> +#include <linux/suspend.h> #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/prefetch.h> @@ -3718,6 +3719,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) { struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_online); + int ret = 0; if (blk_mq_hctx_has_online_cpu(hctx, cpu)) return 0; @@ -3738,12 +3740,24 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) * frozen and there are no requests. */ if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) { - while (blk_mq_hctx_has_requests(hctx)) + while (blk_mq_hctx_has_requests(hctx)) { + /* + * The wakeup capable IRQ handler of block device is + * not called during suspend. Skip the loop by checking + * pm_wakeup_pending to prevent the deadlock and improve + * suspend latency. + */ + if (pm_wakeup_pending()) { + clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); + ret = -EBUSY; + break; + } msleep(5); + } percpu_ref_put(&hctx->queue->q_usage_counter); } - return 0; + return ret; } /* diff --git a/block/blk-zoned.c b/block/blk-zoned.c index dcc295721c2c..394d8d74bba9 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -741,6 +741,8 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; + lockdep_assert_held(&zwplug->lock); + if (bio_list_empty(&zwplug->bio_list)) return; @@ -748,6 +750,8 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) zwplug->disk->disk_name, zwplug->zone_no); while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); + + zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; } /* diff --git a/block/fops.c b/block/fops.c index 4dad9c2d5796..4d32785b31d9 100644 --- a/block/fops.c +++ b/block/fops.c @@ -184,8 +184,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos = iocb->ki_pos; int ret = 0; - if (iocb->ki_flags & IOCB_ALLOC_CACHE) - opf |= REQ_ALLOC_CACHE; bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); @@ -333,8 +331,6 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, loff_t pos = iocb->ki_pos; int ret = 0; - if (iocb->ki_flags & IOCB_ALLOC_CACHE) - opf |= REQ_ALLOC_CACHE; bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); diff --git a/block/ioctl.c b/block/ioctl.c index 2b3ab9bfc413..61feed686418 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -423,6 +423,86 @@ static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, return ops->pr_clear(bdev, c.key); } +static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, + struct pr_read_keys __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_keys *keys_info; + struct pr_read_keys read_keys; + u64 __user *keys_ptr; + size_t keys_info_len; + size_t keys_copy_len; + int ret; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_read_keys) + return -EOPNOTSUPP; + + if (copy_from_user(&read_keys, arg, sizeof(read_keys))) + return -EFAULT; + + keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); + if (keys_info_len == SIZE_MAX) + return -EINVAL; + + keys_info = kzalloc(keys_info_len, GFP_KERNEL); + if (!keys_info) + return -ENOMEM; + + keys_info->num_keys = read_keys.num_keys; + + ret = ops->pr_read_keys(bdev, keys_info); + if (ret) + goto out; + + /* Copy out individual keys */ + keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); + keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * + sizeof(keys_info->keys[0]); + + if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { + ret = -EFAULT; + goto out; + } + + /* Copy out the arg struct */ + read_keys.generation = keys_info->generation; + read_keys.num_keys = keys_info->num_keys; + + if (copy_to_user(arg, &read_keys, sizeof(read_keys))) + ret = -EFAULT; +out: + kfree(keys_info); + return ret; +} + +static int blkdev_pr_read_reservation(struct block_device *bdev, + blk_mode_t mode, struct pr_read_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_held_reservation rsv = {}; + struct pr_read_reservation out = {}; + int ret; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_read_reservation) + return -EOPNOTSUPP; + + ret = ops->pr_read_reservation(bdev, &rsv); + if (ret) + return ret; + + out.key = rsv.key; + out.generation = rsv.generation; + out.type = rsv.type; + + if (copy_to_user(arg, &out, sizeof(out))) + return -EFAULT; + return 0; +} + static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, unsigned long arg) { @@ -645,6 +725,10 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, return blkdev_pr_preempt(bdev, mode, argp, true); case IOC_PR_CLEAR: return blkdev_pr_clear(bdev, mode, argp); + case IOC_PR_READ_KEYS: + return blkdev_pr_read_keys(bdev, mode, argp); + case IOC_PR_READ_RESERVATION: + return blkdev_pr_read_reservation(bdev, mode, argp); default: return blk_get_meta_cap(bdev, cmd, argp); } |
