diff options
Diffstat (limited to 'include')
76 files changed, 927 insertions, 194 deletions
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b701b5f972cb..c41d9a7565cf 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -17,6 +17,8 @@ #include <linux/property.h> #include <linux/types.h> +struct notifier_block; + struct acpi_handle_list { u32 count; acpi_handle *handles; diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4e15583e0d25..f72e00517eb3 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1386,6 +1386,12 @@ enum acpi_einj_command_status { #define ACPI_EINJ_CXL_MEM_FATAL (1<<17) #define ACPI_EINJ_VENDOR_DEFINED (1<<31) +/* EINJV2 error types from EINJV2_GET_ERROR_TYPE (ACPI 6.6) */ + +#define ACPI_EINJV2_PROCESSOR (1) +#define ACPI_EINJV2_MEMORY (1<<1) +#define ACPI_EINJV2_PCIE (1<<2) + /******************************************************************************* * * ERST - Error Record Serialization Table (ACPI 4.0) diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h index 060eab094e5a..5290a2b2e15a 100644 --- a/include/asm-generic/kprobes.h +++ b/include/asm-generic/kprobes.h @@ -14,7 +14,7 @@ static unsigned long __used \ _kbl_addr_##fname = (unsigned long)fname; # define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) /* Use this to forbid a kprobes attach on very low level functions */ -# define __kprobes __section(".kprobes.text") +# define __kprobes notrace __section(".kprobes.text") # define nokprobe_inline __always_inline #else # define NOKPROBE_SYMBOL(fname) diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index bf391903443d..0c5e5ed7b5e7 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -273,6 +273,12 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper); bool drm_fb_helper_gem_is_fb(const struct drm_fb_helper *fb_helper, const struct drm_gem_object *obj); +#else +static inline bool drm_fb_helper_gem_is_fb(const struct drm_fb_helper *fb_helper, + const struct drm_gem_object *obj) +{ + return false; +} #endif #endif diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 33e80f30b8b8..a5d386583fb6 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -448,6 +448,8 @@ void ttm_resource_add_bulk_move(struct ttm_resource *res, struct ttm_buffer_object *bo); void ttm_resource_del_bulk_move(struct ttm_resource *res, struct ttm_buffer_object *bo); +void ttm_resource_del_bulk_move_unevictable(struct ttm_resource *res, + struct ttm_buffer_object *bo); void ttm_resource_move_to_lru_tail(struct ttm_resource *res); void ttm_resource_init(struct ttm_buffer_object *bo, diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 50b47eba7d01..e7195750d21b 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -105,6 +105,12 @@ ARM_SMCCC_SMC_32, \ 0, 0x3fff) +/* C1-Pro erratum 4193714: SME DVMSync early acknowledgement */ +#define ARM_SMCCC_CPU_WORKAROUND_4193714 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_CPU, 0x10) + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ diff --git a/include/linux/bio.h b/include/linux/bio.h index 97d747320b35..dc17780d6c1e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -475,7 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen); +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen, + size_t minsize); void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4b703c90ca9..01e203964892 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto; extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index b907e6c2307d..260d7968cf72 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -108,6 +108,7 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, unsigned int clearing); +extern void cdrom_probe_write_features(struct cdrom_device_info *cdi); extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi); extern void unregister_cdrom(struct cdrom_device_info *cdi); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f42563739d2e..50a784da7a81 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -611,8 +611,8 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; - /* used by cgroup_rmdir() to wait for dying tasks to leave */ - wait_queue_head_t dying_populated_waitq; + /* defers killing csses after removal until cgroup is depopulated */ + struct work_struct finish_destroy_work; /* used to schedule release agent */ struct work_struct release_agent_work; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e52160e85af4..f6d037a30fd8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -53,6 +53,7 @@ struct kernel_clone_args; enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ + CSS_TASK_ITER_WITH_DEAD = (1U << 2), /* include exiting tasks */ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ }; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 166933b82e27..d1203da56fc5 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -322,13 +322,13 @@ struct dma_buf { * @vmapping_counter: * * Used internally to refcnt the vmaps returned by dma_buf_vmap(). - * Protected by @lock. + * Protected by @resv. */ unsigned vmapping_counter; /** * @vmap_ptr: - * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. + * The current vmap ptr if @vmapping_counter > 0. Protected by @resv. */ struct iosys_map vmap_ptr; diff --git a/include/linux/dpll.h b/include/linux/dpll.h index b7277a8b484d..f8037f1ab20b 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -286,6 +286,7 @@ int dpll_pin_ref_sync_pair_add(struct dpll_pin *pin, int dpll_device_change_ntf(struct dpll_device *dpll); +int __dpll_pin_change_ntf(struct dpll_pin *pin); int dpll_pin_change_ntf(struct dpll_pin *pin); int register_dpll_notifier(struct notifier_block *nb); diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 0a3bcd1718f3..be1b38c981d4 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -94,6 +94,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); int unregister_fprobe(struct fprobe *fp); +int unregister_fprobe_async(struct fprobe *fp); bool fprobe_is_registered(struct fprobe *fp); int fprobe_count_ips_from_filter(const char *filter, const char *notfilter); #else @@ -113,6 +114,10 @@ static inline int unregister_fprobe(struct fprobe *fp) { return -EOPNOTSUPP; } +static inline int unregister_fprobe_async(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} static inline bool fprobe_is_registered(struct fprobe *fp) { return false; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 95985400d3d8..e5cde39d6e85 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -915,6 +915,7 @@ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); +struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 6c75df30a281..cd4972a7c97c 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -273,11 +273,11 @@ enum { * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself - * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that - * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting - * memory tags at the same time as zeroing memory has minimal additional - * performance impact. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time. Setting memory tags at + * the same time as zeroing memory (e.g., with __GFP_ZERO) has minimal + * additional performance impact. However, __GFP_ZEROTAGS also zeroes the tags + * even if memory is not getting zeroed at allocation time (e.g., + * with init_on_free). * * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. * Used for userspace and vmalloc pages; the latter are unpoisoned by diff --git a/include/linux/hid.h b/include/linux/hid.h index 442a80d79e89..bfb9859f391e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1030,6 +1030,8 @@ struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_ty int hid_set_field(struct hid_field *, unsigned, __s32); int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt); +int hid_safe_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, + size_t bufsize, u32 size, int interrupt); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code); @@ -1298,8 +1300,8 @@ static inline u32 hid_report_len(struct hid_report *report) return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, - int interrupt); +int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, + size_t bufsize, u32 size, int interrupt); /* HID quirks API */ unsigned long hid_lookup_quirk(const struct hid_device *hdev); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index a2e47dbcf82c..19fffa4574a4 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -72,8 +72,8 @@ struct hid_ops { int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, u64 source, bool from_bpf); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt, u64 source, bool from_bpf, - bool lock_already_taken); + u8 *data, size_t bufsize, u32 size, int interrupt, u64 source, + bool from_bpf, bool lock_already_taken); struct module *owner; const struct bus_type *bus_type; }; @@ -200,7 +200,8 @@ struct hid_bpf { #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, - u32 *size, int interrupt, u64 source, bool from_bpf); + size_t *buf_size, u32 *size, int interrupt, u64 source, + bool from_bpf); int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, u32 size, enum hid_report_type rtype, @@ -215,8 +216,11 @@ int hid_bpf_device_init(struct hid_device *hid); const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 *size, int interrupt, - u64 source, bool from_bpf) { return data; } + u8 *data, size_t *buf_size, u32 *size, + int interrupt, u64 source, bool from_bpf) +{ + return data; +} static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, u8 *buf, u32 size, enum hid_report_type rtype, diff --git a/include/linux/highmem.h b/include/linux/highmem.h index af03db851a1d..d7aac9de1c8a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -347,10 +347,11 @@ static inline void clear_highpage_kasan_tagged(struct page *page) #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES -/* Return false to let people know we did not initialize the pages */ -static inline bool tag_clear_highpages(struct page *page, int numpages) +/* Returns true if the caller has to initialize the pages */ +static inline bool tag_clear_highpages(struct page *page, int numpages, + bool clear_pages) { - return false; + return clear_pages; } #endif diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 94c06bf214fb..15f02422e9ca 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -28,6 +28,12 @@ enum intel_tpmi_id { TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ }; +#define TPMI_CORE_INIT 0 +#define TPMI_CORE_EXIT 1 + +int tpmi_register_notifier(struct notifier_block *nb); +int tpmi_unregister_notifier(struct notifier_block *nb); + struct oobmsm_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); int tpmi_get_resource_count(struct auxiliary_device *auxdev); diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index 167fba7dbf04..1fabf0f5ea8e 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -218,14 +218,6 @@ static __always_inline void __exit_to_user_mode_validate(void) lockdep_sys_exit(); } -/* Temporary workaround to keep ARM64 alive */ -static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) -{ - __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK); - rseq_exit_to_user_mode_legacy(); - __exit_to_user_mode_validate(); -} - /** * syscall_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required * @regs: Pointer to pt_regs on entry stack diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 40d2fce68294..f78787e654f4 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -425,9 +425,6 @@ struct gicv5_its_itt_cfg { void gicv5_init_lpis(u32 max); void gicv5_deinit_lpis(void); -int gicv5_alloc_lpi(void); -void gicv5_free_lpi(u32 lpi); - void __init gicv5_its_of_probe(struct device_node *parent); void __init gicv5_its_acpi_probe(void); #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index 5c085ef4eda7..127229fbd1a6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -371,6 +371,7 @@ enum { /* return values for ->qc_defer */ ATA_DEFER_LINK = 1, ATA_DEFER_PORT = 2, + ATA_DEFER_LINK_EXCL = 3, /* desc_len for ata_eh_info and context */ ATA_EH_DESC_LEN = 80, @@ -854,6 +855,9 @@ struct ata_link { unsigned int sata_spd; /* current SATA PHY speed */ enum ata_lpm_policy lpm_policy; + struct work_struct deferred_qc_work; + struct ata_queued_cmd *deferred_qc; + /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; /* EH context */ @@ -899,9 +903,6 @@ struct ata_port { u64 qc_active; int nr_active_links; /* #links with active qcs */ - struct work_struct deferred_qc_work; - struct ata_queued_cmd *deferred_qc; - struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ diff --git a/include/linux/list.h b/include/linux/list.h index 00ea8e5fb88b..09d979976b3b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -191,6 +191,29 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) __list_add(new, head->prev, head); } +/** + * list_add_tail_release - add a new entry with release barrier + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head, using a release barrier to set + * the ->next pointer that points to it. This is useful for implementing + * queues, in particular one that the elements will be walked through forwards + * locklessly. + */ +static inline void list_add_tail_release(struct list_head *new, + struct list_head *head) +{ + struct list_head *prev = head->prev; + + if (__list_add_valid(new, prev, head)) { + new->next = head; + new->prev = prev; + head->prev = new; + smp_store_release(&prev->next, new); + } +} + /* * Delete a list entry by making the prev/next entries * point to each other. @@ -645,6 +668,20 @@ static inline void list_splice_tail_init(struct list_head *list, }) /** + * list_first_entry_or_null_acquire - get the first element from a list with barrier + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the list is empty, it returns NULL. + */ +#define list_first_entry_or_null_acquire(ptr, type, member) ({ \ + struct list_head *head__ = (ptr); \ + struct list_head *pos__ = smp_load_acquire(&head__->next); \ + pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ +}) + +/** * list_last_entry_or_null - get the last element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index c6eea9afb943..e5997120f45c 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -45,6 +45,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg); int mbox_flush(struct mbox_chan *chan, unsigned long timeout); void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */ bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */ +unsigned int mbox_chan_tx_slots_available(struct mbox_chan *chan); /* atomic */ void mbox_free_channel(struct mbox_chan *chan); /* may sleep */ #endif /* __MAILBOX_CLIENT_H */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 80a427c7ca29..dc93287a2a01 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -3,6 +3,7 @@ #ifndef __MAILBOX_CONTROLLER_H #define __MAILBOX_CONTROLLER_H +#include <linux/bits.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/hrtimer.h> @@ -11,6 +12,13 @@ struct mbox_chan; +/* Sentinel value distinguishing "no active request" from "NULL message data" */ +#define MBOX_NO_MSG ((void *)-1) + +#define MBOX_TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */ +#define MBOX_TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */ +#define MBOX_TXDONE_BY_ACK BIT(2) /* S/W ACK received by Client ticks the TX */ + /** * struct mbox_chan_ops - methods to control mailbox channels * @send_data: The API asks the MBOX controller driver, in atomic @@ -54,10 +62,10 @@ struct mbox_chan_ops { /** * struct mbox_controller - Controller of a class of communication channels - * @dev: Device backing this controller - * @ops: Operators that work on each communication chan - * @chans: Array of channels - * @num_chans: Number of channels in the 'chans' array. + * @dev: Device backing this controller. Required. + * @ops: Operators that work on each communication chan. Required. + * @chans: Array of channels. Required. + * @num_chans: Number of channels in the 'chans' array. Required. * @txdone_irq: Indicates if the controller can report to API when * the last transmitted data was read by the remote. * Eg, if it has some TX ACK irq. @@ -70,6 +78,7 @@ struct mbox_chan_ops { * @of_xlate: Controller driver specific mapping of channel via DT * @poll_hrt: API private. hrtimer used to poll for TXDONE on all * channels. + * @poll_hrt_lock: API private. Lock protecting access to poll_hrt. * @node: API private. To hook into list of controllers. */ struct mbox_controller { diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 0c464eade1d6..4a5631906aff 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -4,7 +4,7 @@ /* * Maple Tree - An RCU-safe adaptive tree for storing ranges * Copyright (c) 2018-2022 Oracle - * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com> + * Authors: Liam R. Howlett <liam@infradead.org> * Matthew Wilcox <willy@infradead.org> */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 0b776907152e..af23453e9dbd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4391,7 +4391,7 @@ static inline void mmap_action_map_kernel_pages_full(struct vm_area_desc *desc, int mmap_action_prepare(struct vm_area_desc *desc); int mmap_action_complete(struct vm_area_struct *vma, - struct mmap_action *action); + struct mmap_action *action, bool is_compat); /* Look up the first VMA which exactly match the interval vm_start ... vm_end */ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index cf3374580f74..5d75cc5b057e 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -226,6 +226,7 @@ struct mr_table_ops { /** * struct mr_table - a multicast routing table + * @work: used for table destruction * @list: entry within a list of multicast routing tables * @net: net where this table belongs * @ops: protocol specific operations @@ -243,6 +244,7 @@ struct mr_table_ops { * @mroute_reg_vif_num: PIM-device vif index */ struct mr_table { + struct rcu_work work; struct list_head list; possible_net_t net; struct mr_table_ops ops; @@ -274,6 +276,7 @@ void vif_device_init(struct vif_device *v, unsigned short flags, unsigned short get_iflink_mask); +void mr_table_free(struct mr_table *mrt); struct mr_table * mr_table_alloc(struct net *net, u32 id, struct mr_table_ops *ops, diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 58abd306ebe3..782984ba3a20 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -290,6 +290,12 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) +#define SPINAND_PAGE_READ_PACKED_8D_8D_0_OP(addr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_PACKED_CMD(0x13, addr >> 16, 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr & 0xffff, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + #define SPINAND_PAGE_READ_FROM_CACHE_8D_8D_8D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x9d, 8), \ SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ @@ -483,6 +489,7 @@ struct spinand_ecc_info { #define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) #define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) #define SPINAND_NO_RAW_ACCESS BIT(4) +#define SPINAND_ODTR_PACKED_PAGE_READ BIT(5) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 77c778d84d4c..5a1c5c336fa4 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -146,6 +146,9 @@ struct xt_match { /* Called when user tries to insert an entry of this type. */ int (*checkentry)(const struct xt_mtchk_param *); + /* Called to validate hooks based on the match configuration. */ + int (*check_hooks)(const struct xt_mtchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -187,6 +190,9 @@ struct xt_target { /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); + /* Called to validate hooks based on the target configuration. */ + int (*check_hooks)(const struct xt_tgchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -279,8 +285,10 @@ bool xt_find_jump_offset(const unsigned int *offsets, int xt_check_proc_name(const char *name, unsigned int size); +int xt_check_hooks_match(struct xt_mtchk_param *par); int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); +int xt_check_hooks_target(struct xt_tgchk_param *par); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto); @@ -297,9 +305,11 @@ struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, + const struct nf_hook_ops *template_ops, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); -void *xt_unregister_table(struct xt_table *table); +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name); +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name); struct xt_table_info *xt_replace_table(struct xt_table *table, unsigned int num_counters, diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index a40aaf645fa4..05631a25e622 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,7 +53,6 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); -void arpt_unregister_table_pre_exit(struct net *net, const char *name); extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 132b0e4a6d4d..13593391d605 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -26,7 +26,6 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops); -void ipt_unregister_table_pre_exit(struct net *net, const char *name); void ipt_unregister_table_exit(struct net *net, const char *name); /* Standard entry. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8b8885a73c76..c6d5b927830d 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,7 +27,6 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *); int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *ops); -void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ba17ac5bf356..243c0f737938 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -62,8 +62,8 @@ struct netfs_inode { struct fscache_cookie *cache; #endif struct mutex wb_lock; /* Writeback serialisation */ - loff_t remote_i_size; /* Size of the remote file */ - loff_t zero_point; /* Size after which we assume there's no data + loff_t _remote_i_size; /* Size of the remote file */ + loff_t _zero_point; /* Size after which we assume there's no data * on the server */ atomic_t io_count; /* Number of outstanding reqs */ unsigned long flags; @@ -252,7 +252,7 @@ struct netfs_io_request { unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ unsigned long long abandon_to; /* Position to abandon folios to */ - pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + const struct folio *no_unlock_folio; /* Don't unlock this folio after read */ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; unsigned int rsize; /* Maximum read size (0 for none) */ @@ -475,6 +475,254 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) } /** + * netfs_read_remote_i_size - Read remote_i_size safely + * @inode: The inode to access + * + * Read remote_i_size safely without the potential for tearing on 32-bit + * arches. + * + * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the + * i_size_read/write must be atomic with respect to the local cpu (unlike with + * preempt disabled), but they don't need to be atomic with respect to other + * cpus like in true SMP (so they need either to either locally disable irq + * around the read or for example on x86 they can be still implemented as a + * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit + * archs it makes no difference if preempt is enabled or not. + */ +static inline unsigned long long netfs_read_remote_i_size(const struct inode *inode) +{ + const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode); + unsigned long long remote_i_size; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + unsigned int seq; + + do { + seq = read_seqcount_begin(&inode->i_size_seqcount); + remote_i_size = ictx->_remote_i_size; + } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + remote_i_size = ictx->_remote_i_size; + preempt_enable(); +#else + /* Pairs with smp_store_release() in netfs_write_remote_i_size() */ + remote_i_size = smp_load_acquire(&ictx->_remote_i_size); +#endif + return remote_i_size; +} + +/* + * netfs_write_remote_i_size - Set remote_i_size safely + * @inode: The inode to access + * @remote_i_size: The new value for the size of the file on the server + * + * Set remote_i_size safely without the potential for tearing on 32-bit arches. + * + * Context: The caller must hold inode->i_lock. + * + * NOTE: unlike netfs_read_remote_i_size(), netfs_write_remote_i_size() does + * need locking around it (normally i_rwsem), otherwise on 32bit/SMP an update + * of i_size_seqcount can be lost, resulting in subsequent i_size_read() calls + * spinning forever. + */ +static inline void netfs_write_remote_i_size(struct inode *inode, + unsigned long long remote_i_size) +{ + struct netfs_inode *ictx = netfs_inode(inode); + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&inode->i_size_seqcount); + ictx->_remote_i_size = remote_i_size; + write_seqcount_end(&inode->i_size_seqcount); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + ictx->_remote_i_size = remote_i_size; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in netfs_read_remote_i_size() to + * ensure changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&ictx->_remote_i_size, remote_i_size); +#endif +} + +/** + * netfs_read_zero_point - Read zero_point safely + * @inode: The inode to access + * + * Read zero_point safely without the potential for tearing on 32-bit + * arches. + * + * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the + * i_size_read/write must be atomic with respect to the local cpu (unlike with + * preempt disabled), but they don't need to be atomic with respect to other + * cpus like in true SMP (so they need either to either locally disable irq + * around the read or for example on x86 they can be still implemented as a + * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit + * archs it makes no difference if preempt is enabled or not. + */ +static inline unsigned long long netfs_read_zero_point(const struct inode *inode) +{ + struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode); + unsigned long long zero_point; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + unsigned int seq; + + do { + seq = read_seqcount_begin(&inode->i_size_seqcount); + zero_point = ictx->_zero_point; + } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + zero_point = ictx->_zero_point; + preempt_enable(); +#else + /* Pairs with smp_store_release() in netfs_write_zero_point() */ + zero_point = smp_load_acquire(&ictx->_zero_point); +#endif + return zero_point; +} + +/* + * netfs_write_zero_point - Set zero_point safely + * @inode: The inode to access + * @zero_point: The new value for the point beyond which the server has no data + * + * Set zero_point safely without the potential for tearing on 32-bit arches. + * + * Context: The caller must hold inode->i_lock. + * + * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need + * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of + * i_size_seqcount can be lost, resulting in subsequent read calls spinning + * forever. + */ +static inline void netfs_write_zero_point(struct inode *inode, + unsigned long long zero_point) +{ + struct netfs_inode *ictx = netfs_inode(inode); + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&inode->i_size_seqcount); + ictx->_zero_point = zero_point; + write_seqcount_end(&inode->i_size_seqcount); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + ictx->_zero_point = zero_point; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in netfs_read_zero_point() to + * ensure changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&ictx->_zero_point, zero_point); +#endif +} + +/** + * netfs_read_sizes - Read remote_i_size and zero_point safely + * @inode: The inode to access + * @i_size: Where to return the local file size. + * @remote_i_size: Where to return the size of the file on the server + * @zero_point: Where to return the the point beyond which the server has no data + * + * Read remote_i_size and zero_point safely without the potential for tearing + * on 32-bit arches. + * + * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the + * i_size_read/write must be atomic with respect to the local cpu (unlike with + * preempt disabled), but they don't need to be atomic with respect to other + * cpus like in true SMP (so they need either to either locally disable irq + * around the read or for example on x86 they can be still implemented as a + * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit + * archs it makes no difference if preempt is enabled or not. + */ +static inline void netfs_read_sizes(const struct inode *inode, + unsigned long long *i_size, + unsigned long long *remote_i_size, + unsigned long long *zero_point) +{ + const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode); +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + unsigned int seq; + + do { + seq = read_seqcount_begin(&inode->i_size_seqcount); + *i_size = inode->i_size; + *remote_i_size = ictx->_remote_i_size; + *zero_point = ictx->_zero_point; + } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + *i_size = inode->i_size; + *remote_i_size = ictx->_remote_i_size; + *zero_point = ictx->_zero_point; + preempt_enable(); +#else + /* Pairs with smp_store_release() in i_size_write() */ + *i_size = smp_load_acquire(&inode->i_size); + /* Pairs with smp_store_release() in netfs_write_remote_i_size() */ + *remote_i_size = smp_load_acquire(&ictx->_remote_i_size); + /* Pairs with smp_store_release() in netfs_write_zero_point() */ + *zero_point = smp_load_acquire(&ictx->_zero_point); +#endif +} + +/* + * netfs_write_sizes - Set i_size, remote_i_size and zero_point safely + * @inode: The inode to access + * @i_size: The new value for the local size of the file + * @remote_i_size: The new value for the size of the file on the server + * @zero_point: The new value for the point beyond which the server has no data + * + * Set both remote_i_size and zero_point safely without the potential for + * tearing on 32-bit arches. + * + * Context: The caller must hold inode->i_lock. + * + * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need + * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of + * i_size_seqcount can be lost, resulting in subsequent read calls spinning + * forever. + */ +static inline void netfs_write_sizes(struct inode *inode, + unsigned long long i_size, + unsigned long long remote_i_size, + unsigned long long zero_point) +{ + struct netfs_inode *ictx = netfs_inode(inode); + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&inode->i_size_seqcount); + inode->i_size = i_size; + ictx->_remote_i_size = remote_i_size; + ictx->_zero_point = zero_point; + write_seqcount_end(&inode->i_size_seqcount); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + inode->i_size = i_size; + ictx->_remote_i_size = remote_i_size; + ictx->_zero_point = zero_point; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in i_size_read(), + * netfs_read_remote_i_size() and netfs_read_zero_point() to ensure + * changes related to inode size (such as page contents) are visible + * before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); + smp_store_release(&ictx->_remote_i_size, remote_i_size); + smp_store_release(&ictx->_zero_point, zero_point); +#endif +} + +/** * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise * @ops: The netfs's operations list @@ -488,8 +736,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, bool use_zero_point) { ctx->ops = ops; - ctx->remote_i_size = i_size_read(&ctx->inode); - ctx->zero_point = LLONG_MAX; + ctx->_remote_i_size = i_size_read(&ctx->inode); + ctx->_zero_point = LLONG_MAX; ctx->flags = 0; atomic_set(&ctx->io_count, 0); #if IS_ENABLED(CONFIG_FSCACHE) @@ -498,7 +746,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, mutex_init(&ctx->wb_lock); /* ->releasepage() drives zero_point */ if (use_zero_point) { - ctx->zero_point = ctx->remote_i_size; + ctx->_zero_point = ctx->_remote_i_size; mapping_set_release_always(ctx->inode.i_mapping); } } @@ -511,13 +759,40 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, +static inline void netfs_resize_file(struct netfs_inode *ictx, + unsigned long long new_i_size, bool changed_on_server) { +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + struct inode *inode = &ictx->inode; + + preempt_disable(); + write_seqcount_begin(&inode->i_size_seqcount); + if (changed_on_server) + ictx->_remote_i_size = new_i_size; + if (new_i_size < ictx->_zero_point) + ictx->_zero_point = new_i_size; + write_seqcount_end(&inode->i_size_seqcount); + preempt_enable(); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); if (changed_on_server) - ctx->remote_i_size = new_i_size; - if (new_i_size < ctx->zero_point) - ctx->zero_point = new_i_size; + ictx->_remote_i_size = new_i_size; + if (new_i_size < ictx->_zero_point) + ictx->_zero_point = new_i_size; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in netfs_read_remote_i_size and + * netfs_read_zero_point() to ensure changes related to inode size + * (such as page contents) are visible before we see the changed inode + * size. + */ + if (changed_on_server) + smp_store_release(&ictx->_remote_i_size, new_i_size); + if (new_i_size < ictx->_zero_point) + smp_store_release(&ictx->_zero_point, new_i_size); +#endif } /** diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 682f81046345..d674d8ab26e6 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -49,9 +49,9 @@ int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len, int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid); int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, u8 *host_key, size_t host_key_len); -int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, - const u8 *ctrl_key, size_t ctrl_key_len, - u8 *sess_key, size_t sess_key_len); +int nvme_auth_gen_session_key(struct crypto_kpp *dh_tfm, + const u8 *public_key, size_t public_key_len, + u8 *sess_key, size_t sess_key_len, u8 hash_id); int nvme_auth_generate_psk(u8 hmac_id, const u8 *skey, size_t skey_len, const u8 *c1, const u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len); diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2abba7552605..e3bc44225692 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -261,6 +261,35 @@ static inline void list_replace_rcu(struct list_head *old, old->prev = LIST_POISON2; } +static inline void __list_splice_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + last->next = next; + first->prev = prev; + next->prev = last; + rcu_assign_pointer(list_next_rcu(prev), first); +} + +/** + * list_splice_rcu - splice a non-RCU list into an RCU-protected list, + * designed for stacks. + * @list: the non RCU-protected list to splice + * @head: the place in the existing RCU-protected list to splice + * + * The list pointed to by @head can be RCU-read traversed concurrently with + * this function. + */ +static inline void list_splice_rcu(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice_rcu(list, head, head->next); +} + /** * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 015c8298bebc..fc2f596a6df1 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -12,6 +12,7 @@ #include <linux/alloc_tag.h> #include <linux/atomic.h> #include <linux/compiler.h> +#include <linux/irq_work_types.h> #include <linux/mutex.h> #include <linux/workqueue_types.h> @@ -49,6 +50,7 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking + * @insecure_elasticity: Set to true to disable chain length checks * @automatic_shrinking: Enable automatic shrinking of tables * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -61,6 +63,7 @@ struct rhashtable_params { u16 head_offset; unsigned int max_size; u16 min_size; + bool insecure_elasticity; bool automatic_shrinking; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -75,6 +78,7 @@ struct rhashtable_params { * @p: Configuration parameters * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously + * @run_irq_work: Bounces the @run_work kick through hard IRQ context. * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list * @nelems: Number of elements in table @@ -86,6 +90,7 @@ struct rhashtable { struct rhashtable_params p; bool rhlist; struct work_struct run_work; + struct irq_work run_irq_work; struct mutex mutex; spinlock_t lock; atomic_t nelems; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0480509a6339..ef5230cece36 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -20,6 +20,7 @@ #include <linux/err.h> #include <linux/errno.h> +#include <linux/irq_work.h> #include <linux/jhash.h> #include <linux/list_nulls.h> #include <linux/workqueue.h> @@ -821,14 +822,15 @@ slow_path: goto out; } - if (elasticity <= 0) + if (elasticity <= 0 && !params.insecure_elasticity) goto slow_path; data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out_unlock; - if (unlikely(rht_grow_above_100(ht, tbl))) + if (unlikely(rht_grow_above_100(ht, tbl)) && + !params.insecure_elasticity) goto slow_path; /* Inserting at head of list makes unlocking free. */ @@ -846,7 +848,7 @@ slow_path: rht_assign_unlock(tbl, bkt, obj, flags); if (rht_grow_above_75(ht, tbl)) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); data = NULL; out: diff --git a/include/linux/rseq.h b/include/linux/rseq.h index b9d62fc2140d..7ef79b25e714 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -9,6 +9,11 @@ void __rseq_handle_slowpath(struct pt_regs *regs); +static __always_inline bool rseq_v2(struct task_struct *t) +{ + return IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && likely(t->rseq.event.has_rseq > 1); +} + /* Invoked from resume_user_mode_work() */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { @@ -16,8 +21,7 @@ static inline void rseq_handle_slowpath(struct pt_regs *regs) if (current->rseq.event.slowpath) __rseq_handle_slowpath(regs); } else { - /* '&' is intentional to spare one conditional branch */ - if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) + if (current->rseq.event.sched_switch && current->rseq.event.has_rseq) __rseq_handle_slowpath(regs); } } @@ -30,9 +34,9 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs); */ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { - /* '&' is intentional to spare one conditional branch */ - if (current->rseq.event.has_rseq & current->rseq.event.user_irq) + if (rseq_v2(current)) { + /* has_rseq is implied in rseq_v2() */ + if (current->rseq.event.user_irq) __rseq_signal_deliver(ksig->sig, regs); } else { if (current->rseq.event.has_rseq) @@ -50,15 +54,22 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) { struct rseq_event *ev = &t->rseq.event; - if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { + /* + * Only apply the user_irq optimization for RSEQ ABI V2 registrations. + * Legacy users like TCMalloc rely on the original ABI V1 behaviour + * which updates IDs on every context swtich. + */ + if (rseq_v2(t)) { /* - * Avoid a boat load of conditionals by using simple logic - * to determine whether NOTIFY_RESUME needs to be raised. + * Avoid a boat load of conditionals by using simple logic to + * determine whether TIF_NOTIFY_RESUME or TIF_RSEQ needs to be + * raised. * - * It's required when the CPU or MM CID has changed or - * the entry was from user space. + * It's required when the CPU or MM CID has changed or the entry + * was via interrupt from user space. ev->has_rseq does not have + * to be evaluated here because rseq_v2() implies has_rseq. */ - bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; + bool raise = ev->user_irq | ev->ids_changed; if (raise) { ev->sched_switch = true; @@ -66,6 +77,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) } } else { if (ev->has_rseq) { + t->rseq.event.ids_changed = true; t->rseq.event.sched_switch = true; rseq_raise_notify_resume(t); } @@ -119,6 +131,8 @@ static inline void rseq_virt_userspace_exit(void) static inline void rseq_reset(struct task_struct *t) { + /* Protect against preemption and membarrier IPI */ + guard(irqsave)(); memset(&t->rseq, 0, sizeof(t->rseq)); t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; } @@ -159,6 +173,7 @@ static inline unsigned int rseq_alloc_align(void) } #else /* CONFIG_RSEQ */ +static inline bool rseq_v2(struct task_struct *t) { return false; } static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_sched_switch_event(struct task_struct *t) { } diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index f11ebd34f8b9..63bc72086e75 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -111,6 +111,20 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) t->rseq.slice.state.granted = false; } +/* + * Open coded, so it can be invoked within a user access region. + * + * This clears the user space state of the time slice extensions field only when + * the task has registered the optimized RSEQ_ABI V2. Some legacy registrations, + * e.g. TCMalloc, have conflicting non-ABI fields in struct RSEQ, which would be + * overwritten by an unconditional write. + */ +#define rseq_slice_clear_user(rseq, efault) \ +do { \ + if (rseq_slice_extension_enabled()) \ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); \ +} while (0) + static __always_inline bool __rseq_grant_slice_extension(bool work_pending) { struct task_struct *curr = current; @@ -230,10 +244,10 @@ static __always_inline bool rseq_slice_extension_enabled(void) { return false; } static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } +#define rseq_slice_clear_user(rseq, efault) do { } while (0) #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); -bool rseq_debug_validate_ids(struct task_struct *t); static __always_inline void rseq_note_user_irq_entry(void) { @@ -353,43 +367,6 @@ efault: return false; } -/* - * On debug kernels validate that user space did not mess with it if the - * debug branch is enabled. - */ -bool rseq_debug_validate_ids(struct task_struct *t) -{ - struct rseq __user *rseq = t->rseq.usrptr; - u32 cpu_id, uval, node_id; - - /* - * On the first exit after registering the rseq region CPU ID is - * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0! - */ - node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ? - cpu_to_node(t->rseq.ids.cpu_id) : 0; - - scoped_user_read_access(rseq, efault) { - unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); - if (cpu_id != t->rseq.ids.cpu_id) - goto die; - unsafe_get_user(uval, &rseq->cpu_id, efault); - if (uval != cpu_id) - goto die; - unsafe_get_user(uval, &rseq->node_id, efault); - if (uval != node_id) - goto die; - unsafe_get_user(uval, &rseq->mm_cid, efault); - if (uval != t->rseq.ids.mm_cid) - goto die; - } - return true; -die: - t->rseq.event.fatal = true; -efault: - return false; -} - #endif /* RSEQ_BUILD_SLOW_PATH */ /* @@ -499,37 +476,50 @@ efault: * faults in task context are fatal too. */ static rseq_inline -bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, - u32 node_id, u64 *csaddr) +bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, u64 *csaddr) { struct rseq __user *rseq = t->rseq.usrptr; - if (static_branch_unlikely(&rseq_debug_enabled)) { - if (!rseq_debug_validate_ids(t)) - return false; - } - scoped_user_rw_access(rseq, efault) { + /* Validate the R/O fields for debug and optimized mode */ + if (static_branch_unlikely(&rseq_debug_enabled) || rseq_v2(t)) { + u32 cpu_id, uval; + + unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); + if (cpu_id != t->rseq.ids.cpu_id) + goto die; + unsafe_get_user(uval, &rseq->cpu_id, efault); + if (uval != cpu_id) + goto die; + unsafe_get_user(uval, &rseq->node_id, efault); + if (uval != t->rseq.ids.node_id) + goto die; + unsafe_get_user(uval, &rseq->mm_cid, efault); + if (uval != t->rseq.ids.mm_cid) + goto die; + } + unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault); unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault); - unsafe_put_user(node_id, &rseq->node_id, efault); + unsafe_put_user(ids->node_id, &rseq->node_id, efault); unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); - /* Open coded, so it's in the same user access region */ - if (rseq_slice_extension_enabled()) { - /* Unconditionally clear it, no point in conditionals */ - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); - } + /* RSEQ ABI V2 only operations */ + if (rseq_v2(t)) + rseq_slice_clear_user(rseq, efault); } rseq_slice_clear_grant(t); /* Cache the new values */ - t->rseq.ids.cpu_cid = ids->cpu_cid; + t->rseq.ids = *ids; rseq_stat_inc(rseq_stats.ids); rseq_trace_update(t, ids); return true; + +die: + t->rseq.event.fatal = true; efault: return false; } @@ -539,11 +529,11 @@ efault: * is in a critical section. */ static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs, - struct rseq_ids *ids, u32 node_id) + struct rseq_ids *ids) { u64 csaddr; - if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr)) + if (!rseq_set_ids_get_csaddr(t, ids, &csaddr)) return false; /* @@ -612,6 +602,14 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t * interrupts disabled */ guard(pagefault)(); + /* + * This optimization is only valid when the task registered for the + * optimized RSEQ_ABI_V2 variant. Some legacy users rely on the original + * RSEQ implementation behaviour which unconditionally updated the IDs. + * rseq_sched_switch_event() ensures that legacy registrations always + * have both sched_switch and ids_changed set, which is compatible with + * the historical TIF_NOTIFY_RESUME behaviour. + */ if (likely(!t->rseq.event.ids_changed)) { struct rseq __user *rseq = t->rseq.usrptr; /* @@ -623,11 +621,9 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t scoped_user_rw_access(rseq, efault) { unsafe_get_user(csaddr, &rseq->rseq_cs, efault); - /* Open coded, so it's in the same user access region */ - if (rseq_slice_extension_enabled()) { - /* Unconditionally clear it, no point in conditionals */ - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); - } + /* RSEQ ABI V2 only operations */ + if (rseq_v2(t)) + rseq_slice_clear_user(rseq, efault); } rseq_slice_clear_grant(t); @@ -640,12 +636,12 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t } struct rseq_ids ids = { - .cpu_id = task_cpu(t), - .mm_cid = task_mm_cid(t), + .cpu_id = task_cpu(t), + .mm_cid = task_mm_cid(t), + .node_id = cpu_to_node(ids.cpu_id), }; - u32 node_id = cpu_to_node(ids.cpu_id); - return rseq_update_usr(t, regs, &ids, node_id); + return rseq_update_usr(t, regs, &ids); efault: return false; } @@ -753,24 +749,6 @@ static __always_inline void rseq_irqentry_exit_to_user_mode(void) ev->events = 0; } -/* Required to keep ARM64 working */ -static __always_inline void rseq_exit_to_user_mode_legacy(void) -{ - struct rseq_event *ev = ¤t->rseq.event; - - rseq_stat_inc(rseq_stats.exit); - - if (static_branch_unlikely(&rseq_debug_enabled)) - WARN_ON_ONCE(ev->sched_switch); - - /* - * Ensure that event (especially user_irq) is cleared when the - * interrupt did not result in a schedule and therefore the - * rseq processing did not clear it. - */ - ev->events = 0; -} - void __rseq_debug_syscall_return(struct pt_regs *regs); static __always_inline void rseq_debug_syscall_return(struct pt_regs *regs) @@ -786,7 +764,6 @@ static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned } static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } -static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } #endif /* !CONFIG_RSEQ */ diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 0b42045988db..85739a63e85e 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -9,6 +9,12 @@ #ifdef CONFIG_RSEQ struct rseq; +/* + * rseq_event::has_rseq contains the ABI version number so preserving it + * in AND operations requires a mask. + */ +#define RSEQ_HAS_RSEQ_VERSION_MASK 0xff + /** * struct rseq_event - Storage for rseq related event management * @all: Compound to initialize and clear the data efficiently @@ -17,7 +23,8 @@ struct rseq; * exit to user * @ids_changed: Indicator that IDs need to be updated * @user_irq: True on interrupt entry from user mode - * @has_rseq: True if the task has a rseq pointer installed + * @has_rseq: Greater than 0 if the task has a rseq pointer installed. + * Contains the RSEQ version number * @error: Compound error code for the slow path to analyze * @fatal: User space data corrupted or invalid * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME @@ -59,8 +66,9 @@ struct rseq_event { * compiler emit a single compare on 64-bit * @cpu_id: The CPU ID which was written last to user space * @mm_cid: The MM CID which was written last to user space + * @node_id: The node ID which was written last to user space * - * @cpu_id and @mm_cid are updated when the data is written to user space. + * @cpu_id, @mm_cid and @node_id are updated when the data is written to user space. */ struct rseq_ids { union { @@ -70,6 +78,7 @@ struct rseq_ids { u32 mm_cid; }; }; + u32 node_id; }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 368c7b4d7cb5..ee06cba5c6f5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1002,6 +1002,9 @@ struct task_struct { unsigned sched_rt_mutex:1; #endif + /* Save user-dumpable when mm goes away */ + unsigned user_dumpable:1; + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 1198138cb839..273538200a44 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -33,6 +33,15 @@ struct root_domain; extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); extern void dl_clear_root_domain_cpu(int cpu); +/* + * Return whether moving DL task @p to @new_mask requires moving DL + * bandwidth accounting between root domains. This helper is specific to + * DL bandwidth move accounting semantics and is shared by + * cpuset_can_attach() and set_cpus_allowed_dl() so both paths use the + * same source root-domain test. + */ +extern bool dl_task_needs_bw_move(struct task_struct *p, + const struct cpumask *new_mask); extern u64 dl_cookie; extern bool dl_bw_visited(int cpu, u64 cookie); diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1a3af2ea2a79..2129e18ada58 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -103,21 +103,25 @@ enum scx_ent_flags { SCX_TASK_IMMED = 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */ /* - * Bits 8 and 9 are used to carry task state: + * Bits 8 to 10 are used to carry task state: * * NONE ops.init_task() not called yet + * INIT_BEGIN ops.init_task() in flight; see sched_ext_dead() * INIT ops.init_task() succeeded, but task can be cancelled * READY fully initialized, but not in sched_ext * ENABLED fully initialized and in sched_ext + * DEAD terminal state set by sched_ext_dead() */ - SCX_TASK_STATE_SHIFT = 8, /* bits 8 and 9 are used to carry task state */ - SCX_TASK_STATE_BITS = 2, + SCX_TASK_STATE_SHIFT = 8, + SCX_TASK_STATE_BITS = 3, SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, SCX_TASK_NONE = 0 << SCX_TASK_STATE_SHIFT, - SCX_TASK_INIT = 1 << SCX_TASK_STATE_SHIFT, - SCX_TASK_READY = 2 << SCX_TASK_STATE_SHIFT, - SCX_TASK_ENABLED = 3 << SCX_TASK_STATE_SHIFT, + SCX_TASK_INIT_BEGIN = 1 << SCX_TASK_STATE_SHIFT, + SCX_TASK_INIT = 2 << SCX_TASK_STATE_SHIFT, + SCX_TASK_READY = 3 << SCX_TASK_STATE_SHIFT, + SCX_TASK_ENABLED = 4 << SCX_TASK_STATE_SHIFT, + SCX_TASK_DEAD = 5 << SCX_TASK_STATE_SHIFT, /* * Bits 12 and 13 are used to carry reenqueue reason. In addition to diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index dc3975ff1b2e..cf0fd03dd7a2 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -21,6 +21,11 @@ enum hk_type { HK_TYPE_MAX, /* + * HK_TYPE_KTHREAD is now an alias of HK_TYPE_DOMAIN + */ + HK_TYPE_KTHREAD = HK_TYPE_DOMAIN, + + /* * The following housekeeping types are only set by the nohz_full * boot commandline option. So they can share the same value. */ @@ -29,7 +34,6 @@ enum hk_type { HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, - HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/slab.h b/include/linux/slab.h index 15a60b501b95..2b5ab488e96b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1234,6 +1234,9 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) +extern void kvfree_atomic(const void *addr); +DEFINE_FREE(kvfree_atomic, void *, if (!IS_ERR_OR_NULL(_T)) kvfree_atomic(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); diff --git a/include/linux/smbdirect.h b/include/linux/smbdirect.h new file mode 100644 index 000000000000..97f5ba730fa7 --- /dev/null +++ b/include/linux/smbdirect.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025, Stefan Metzmacher + */ + +#ifndef __LINUX_SMBDIRECT_H__ +#define __LINUX_SMBDIRECT_H__ + +#include <linux/types.h> + +/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ +struct smbdirect_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + +/* + * Connection parameters mostly from [MS-SMBD] 3.1.1.1 + * + * These are setup and negotiated at the beginning of a + * connection and remain constant unless explicitly changed. + * + * Some values are important for the upper layer. + */ +struct smbdirect_socket_parameters { + __u64 flags; +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) + __u32 resolve_addr_timeout_msec; + __u32 resolve_route_timeout_msec; + __u32 rdma_connect_timeout_msec; + __u32 negotiate_timeout_msec; + __u16 initiator_depth; /* limited to U8_MAX */ + __u16 responder_resources; /* limited to U8_MAX */ + __u16 recv_credit_max; + __u16 send_credit_target; + __u32 max_send_size; + __u32 max_fragmented_send_size; + __u32 max_recv_size; + __u32 max_fragmented_recv_size; + __u32 max_read_write_size; + __u32 max_frmr_depth; + __u32 keepalive_interval_msec; + __u32 keepalive_timeout_msec; +} __packed; + +#define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) + +struct smbdirect_socket; +struct smbdirect_send_batch; +struct smbdirect_mr_io; + +#include <rdma/rw.h> + +u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); + +bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); + +int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc); + +int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc); + +int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, + const struct smbdirect_socket_parameters *sp); + +const struct smbdirect_socket_parameters * +smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc); + +int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, + enum ib_poll_context poll_ctx, + gfp_t gfp_mask); + +#define SMBDIRECT_LOG_ERR 0x0 +#define SMBDIRECT_LOG_INFO 0x1 + +#define SMBDIRECT_LOG_OUTGOING 0x1 +#define SMBDIRECT_LOG_INCOMING 0x2 +#define SMBDIRECT_LOG_READ 0x4 +#define SMBDIRECT_LOG_WRITE 0x8 +#define SMBDIRECT_LOG_RDMA_SEND 0x10 +#define SMBDIRECT_LOG_RDMA_RECV 0x20 +#define SMBDIRECT_LOG_KEEP_ALIVE 0x40 +#define SMBDIRECT_LOG_RDMA_EVENT 0x80 +#define SMBDIRECT_LOG_RDMA_MR 0x100 +#define SMBDIRECT_LOG_RDMA_RW 0x200 +#define SMBDIRECT_LOG_NEGOTIATE 0x400 +void smbdirect_socket_set_logging(struct smbdirect_socket *sc, + void *private_ptr, + bool (*needed)(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls), + void (*vaprintf)(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf)); + +bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); + +int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); + +int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr); + +void smbdirect_socket_shutdown(struct smbdirect_socket *sc); + +void smbdirect_socket_release(struct smbdirect_socket *sc); + +int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + bool is_last); + +/* + * This is only temporary and only needed + * as long as the client still requires + * to use smbdirect_connection_send_single_iter() + */ +struct smbdirect_send_batch_storage { + union { + struct list_head __msg_list; + __aligned_u64 __space[5]; + }; +}; + +struct smbdirect_send_batch * +smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, + bool need_invalidate_rkey, + unsigned int remote_key); + +int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + struct iov_iter *iter, + unsigned int flags, + u32 remaining_data_length); + +int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc); + +int smbdirect_connection_send_iter(struct smbdirect_socket *sc, + struct iov_iter *iter, + unsigned int flags, + bool need_invalidate, + unsigned int remote_key); + +int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, + struct msghdr *msg, + unsigned int flags); + +int smbdirect_connect(struct smbdirect_socket *sc, + const struct sockaddr *dst); + +int smbdirect_connect_sync(struct smbdirect_socket *sc, + const struct sockaddr *dst); + +int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog); + +struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, + long timeo, + struct proto_accept_arg *arg); + +int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, + void *buf, size_t buf_len, + struct smbdirect_buffer_descriptor_v1 *desc, + size_t desc_len, + bool is_read); + +struct smbdirect_mr_io * +smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, + struct iov_iter *iter, + bool writing, + bool need_invalidate); + +void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1); + +void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr); + +void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, + unsigned int rdma_readwrite_threshold, + struct seq_file *m); + +#endif /* __LINUX_SMBDIRECT_H__ */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 578e520b6ee6..763eea4d80d8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -202,7 +202,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define TP_CONDITION(args...) args /* - * Individual subsystem my have a separate configuration to + * Individual subsystem may have a separate configuration to * enable their tracepoints. By default, this file will create * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 2ebba746c18f..89165b769e5c 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -21,7 +21,7 @@ #define VFIO_PCI_CORE_H #define VFIO_PCI_OFFSET_SHIFT 40 -#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_TO_INDEX(off) ((u64)(off) >> VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ab6cb70ca1a5..6177624539b3 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -534,8 +534,10 @@ alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...) * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(2, 5) struct workqueue_struct * -devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, - int max_active, ...); +devm_alloc_workqueue_noprof(struct device *dev, const char *fmt, + unsigned int flags, int max_active, ...); +#define devm_alloc_workqueue(...) \ + alloc_hooks(devm_alloc_workqueue_noprof(__VA_ARGS__)) #ifdef CONFIG_LOCKDEP /** diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a7bffb908c1e..aa600fbf9a53 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2495,7 +2495,7 @@ void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); int hci_abort_conn(struct hci_conn *conn, u8 reason); -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size); diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c92d4a976246..05572c19e14b 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -243,7 +243,7 @@ typedef struct port { churn_state_t sm_churn_actor_state; churn_state_t sm_churn_partner_state; struct slave *slave; /* pointer to the bond slave that this port belongs to */ - struct aggregator *aggregator; /* pointer to an aggregator that this port related to */ + struct aggregator __rcu *aggregator; /* pointer to an aggregator that this port related to */ struct port *next_port_in_aggregator; /* Next port on the linked list of the parent aggregator */ u32 transaction_id; /* continuous number for identification of Marker PDU's; */ struct lacpdu lacpdu; /* the lacpdu that will be sent for this port */ diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e0ca3904ff8e..2f312d1f67d6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -99,6 +99,7 @@ FN(FRAG_TOO_FAR) \ FN(TCP_MINTTL) \ FN(IPV6_BAD_EXTHDR) \ + FN(IPV6_TOO_MANY_EXTHDRS) \ FN(IPV6_NDISC_FRAG) \ FN(IPV6_NDISC_HOP_LIMIT) \ FN(IPV6_NDISC_BAD_CODE) \ @@ -494,6 +495,11 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_MINTTL, /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */ SKB_DROP_REASON_IPV6_BAD_EXTHDR, + /** + * @SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS: Number of IPv6 extension + * headers in the packet exceeds IP6_MAX_EXT_HDRS_CNT. + */ + SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS, /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */ SKB_DROP_REASON_IPV6_NDISC_FRAG, /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 7b84f2cef8b1..d70510ac31ab 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -489,8 +489,10 @@ genlmsg_multicast_netns_filtered(const struct genl_family *family, netlink_filter_fn filter, void *filter_data) { - if (WARN_ON_ONCE(group >= family->n_mcgrps)) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) { + nlmsg_free(skb); return -EINVAL; + } group = family->mcgrp_offset + group; return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group, flags, filter, filter_data); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 72d325c81313..02762ce73a0c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -491,6 +491,7 @@ struct ip_vs_est_kt_data { DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */ unsigned long est_timer; /* estimation timer (jiffies) */ struct ip_vs_stats *calc_stats; /* Used for calculation */ + int needed; /* task is needed */ int tick_len[IPVS_EST_NTICKS]; /* est count */ int id; /* ktid per netns */ int chain_max; /* max ests per tick chain */ @@ -1411,7 +1412,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { if (ipvs->est_cpulist_valid) return ipvs->sysctl_est_cpulist; @@ -1529,7 +1530,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { return housekeeping_cpumask(HK_TYPE_KTHREAD); } @@ -1564,6 +1565,18 @@ static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) return READ_ONCE(ipvs->sysctl_svc_lfactor); } +static inline bool sysctl_est_cpulist_empty(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_empty(__sysctl_est_cpulist(ipvs)); +} + +static inline unsigned int sysctl_est_cpulist_weight(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_weight(__sysctl_est_cpulist(ipvs)); +} + /* IPVS core functions * (from ip_vs_core.c) */ @@ -1884,18 +1897,26 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); -void ip_vs_est_reload_start(struct netns_ipvs *ipvs); +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart); int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, struct ip_vs_est_kt_data *kd); void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd); +static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + ipvs->tot_stats->s.est.ktid = -2; +#endif +} + static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) { #ifdef CONFIG_SYSCTL /* Stop tasks while cpulist is empty or if disabled with flag */ ipvs->est_stopped = !sysctl_run_estimation(ipvs) || (ipvs->est_cpulist_valid && - cpumask_empty(sysctl_est_cpulist(ipvs))); + sysctl_est_cpulist_empty(ipvs)); #endif } @@ -1911,7 +1932,7 @@ static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs) static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs) { unsigned int limit = IPVS_EST_CPU_KTHREADS * - cpumask_weight(sysctl_est_cpulist(ipvs)); + sysctl_est_cpulist_weight(ipvs); return max(1U, limit); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d042afe7a245..1dec81faff28 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -90,6 +90,9 @@ struct ip_tunnel_info; #define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ #define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ +/* Hard limit on traversed IPv6 extension headers */ +#define IP6_MAX_EXT_HDRS_CNT 12 + /* * Addr type * diff --git a/include/net/macsec.h b/include/net/macsec.h index bc7de5b53e54..d962093ee923 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -9,6 +9,7 @@ #include <linux/u64_stats_sync.h> #include <linux/if_vlan.h> +#include <linux/workqueue.h> #include <uapi/linux/if_link.h> #include <uapi/linux/if_macsec.h> @@ -123,6 +124,7 @@ struct macsec_dev_stats { * @key: key structure * @ssci: short secure channel identifier * @stats: per-SA stats + * @destroy_work: deferred work to free the SA in process context after RCU grace period */ struct macsec_rx_sa { struct macsec_key key; @@ -136,7 +138,7 @@ struct macsec_rx_sa { bool active; struct macsec_rx_sa_stats __percpu *stats; struct macsec_rx_sc *sc; - struct rcu_head rcu; + struct rcu_work destroy_work; }; struct pcpu_rx_sc_stats { @@ -174,6 +176,7 @@ struct macsec_rx_sc { * @key: key structure * @ssci: short secure channel identifier * @stats: per-SA stats + * @destroy_work: deferred work to free the SA in process context after RCU grace period */ struct macsec_tx_sa { struct macsec_key key; @@ -186,7 +189,7 @@ struct macsec_tx_sa { refcount_t refcnt; bool active; struct macsec_tx_sa_stats __percpu *stats; - struct rcu_head rcu; + struct rcu_work destroy_work; }; /** diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h index 5199b41497ff..dbabcfb95daf 100644 --- a/include/net/mana/shm_channel.h +++ b/include/net/mana/shm_channel.h @@ -4,6 +4,12 @@ #ifndef _SHM_CHANNEL_H #define _SHM_CHANNEL_H +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) +#define SMC_APERTURE_SIZE (SMC_APERTURE_BITS / 8) + struct shm_channel { struct device *dev; void __iomem *base; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index e9a8350e7ccf..80f50fd0f7ad 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -45,9 +45,12 @@ struct nf_conntrack_expect { void (*expectfn)(struct nf_conn *new, struct nf_conntrack_expect *this); - /* Helper to assign to new connection */ + /* Helper that created this expectation */ struct nf_conntrack_helper __rcu *helper; + /* Helper to assign to new connection */ + struct nf_conntrack_helper __rcu *assign_helper; + /* The conntrack of the master connection */ struct nf_conn *master; diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index b175d271aec9..609bcf422a9b 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -3,10 +3,23 @@ #define _NF_DUP_NETDEV_H_ #include <net/netfilter/nf_tables.h> +#include <linux/netdevice.h> +#include <linux/sched.h> void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); +#define NF_RECURSION_LIMIT 2 + +static inline u8 *nf_get_nf_dup_skb_recursion(void) +{ +#ifndef CONFIG_PREEMPT_RT + return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); +#else + return ¤t->net_xmit.nf_dup_skb_recursion; +#endif +} + struct nft_offload_ctx; struct nft_flow_rule; diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index b09c11c048d5..7b23b245a5a8 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -148,9 +148,10 @@ struct flow_offload_tuple { /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; - u8 dir:2, + u16 dir:2, xmit_type:3, encap_num:2, + needs_gso_segment:1, tun_num:2, in_vlan_ingress:2; u16 mtu; @@ -232,6 +233,7 @@ struct nf_flow_route { u32 hw_ifindex; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + u8 needs_gso_segment:1; } out; enum flow_offload_xmit_type xmit_type; } tuple[FLOW_OFFLOAD_DIR_MAX]; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2c0173d9309c..cff7b773e972 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1204,12 +1204,15 @@ struct nft_stats { struct u64_stats_sync syncp; }; +#define NFT_HOOK_REMOVE (1 << 0) + struct nft_hook { struct list_head list; struct list_head ops_list; struct rcu_head rcu; char ifname[IFNAMSIZ]; u8 ifnamelen; + u8 flags; }; struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, @@ -1665,6 +1668,16 @@ struct nft_trans { }; /** + * struct nft_trans_hook - nf_tables hook update in transaction + * @list: used internally + * @hook: struct nft_hook with the device hook + */ +struct nft_trans_hook { + struct list_head list; + struct nft_hook *hook; +}; + +/** * struct nft_trans_binding - nf_tables object with binding support in transaction * @nft_trans: base structure, MUST be first member * @binding_list: list of objects with possible bindings diff --git a/include/net/netmem.h b/include/net/netmem.h index 507b74c9f52d..78fe51e5756b 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -127,6 +127,21 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov) return niov - net_iov_owner(niov)->niovs; } +/* Initialize a niov: stamp the owning area, the memory provider type, + * and the page_type "no type" sentinel expected by the page-type API + * (see PAGE_TYPE_OPS in <linux/page-flags.h>) so that + * page_pool_set_pp_info() can later call __SetPageNetpp() on a niov + * cast to struct page. + */ +static inline void net_iov_init(struct net_iov *niov, + struct net_iov_area *owner, + enum net_iov_type type) +{ + niov->owner = owner; + niov->type = type; + niov->page_type = UINT_MAX; +} + /* netmem */ /** diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80ccd4dda8e0..6e27c56514df 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -275,7 +275,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES - struct mr_table *mrt; + struct mr_table __rcu *mrt; #else struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 499e4288170f..875916d60bfe 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -119,6 +119,7 @@ struct netns_ipv6 { struct fib_notifier_ops *notifier_ops; struct fib_notifier_ops *ip6mr_notifier_ops; atomic_t ipmr_seq; + int flowlabel_count; struct { struct hlist_head head; spinlock_t lock; diff --git a/include/net/nsh.h b/include/net/nsh.h index 16a751093896..15a26c590815 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -247,10 +247,10 @@ struct nshhdr { #define NSH_M_TYPE1_LEN 24 /* NSH header maximum Length. */ -#define NSH_HDR_MAX_LEN 256 +#define NSH_HDR_MAX_LEN ((NSH_LEN_MASK >> NSH_LEN_SHIFT) * 4) /* NSH context headers maximum Length. */ -#define NSH_CTX_HDRS_MAX_LEN 248 +#define NSH_CTX_HDRS_MAX_LEN (NSH_HDR_MAX_LEN - NSH_BASE_HDR_LEN) static inline struct nshhdr *nsh_hdr(struct sk_buff *skb) { diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8ad7a2d76c1d..ec1df8b94517 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -771,10 +771,8 @@ TRACE_EVENT(btrfs_sync_file, TP_fast_assign( struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); - struct dentry *parent = dget_parent(dentry); - struct inode *parent_inode = d_inode(parent); + struct inode *parent_inode = d_inode(dentry->d_parent); - dput(parent); TP_fast_assign_fsid(btrfs_sb(inode->i_sb)); __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 24fc402ab3c8..7e25f4469b81 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -41,7 +41,7 @@ TRACE_EVENT(damos_stat_after_apply_interval, ), TP_printk("ctx_idx=%u scheme_idx=%u nr_tried=%lu sz_tried=%lu " - "nr_applied=%lu sz_tried=%lu sz_ops_filter_passed=%lu " + "nr_applied=%lu sz_applied=%lu sz_ops_filter_passed=%lu " "qt_exceeds=%lu nr_snapshots=%lu", __entry->context_idx, __entry->scheme_idx, __entry->nr_tried, __entry->sz_tried, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 8c936fc575d5..082cb03c6131 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -177,7 +177,11 @@ EM(netfs_folio_is_uptodate, "mod-uptodate") \ EM(netfs_just_prefetch, "mod-prefetch") \ EM(netfs_whole_folio_modify, "mod-whole-f") \ + EM(netfs_whole_folio_modify_efault, "mod-whole-f!") \ + EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \ + EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \ EM(netfs_modify_and_clear, "mod-n-clear") \ + EM(netfs_modify_and_clear_rm_finfo, "mod-n-clear+") \ EM(netfs_streaming_write, "mod-streamw") \ EM(netfs_streaming_write_cont, "mod-streamw+") \ EM(netfs_flush_content, "flush") \ @@ -194,6 +198,10 @@ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ + EM(netfs_folio_trace_invalidate_all, "inval-all") \ + EM(netfs_folio_trace_invalidate_front, "inval-front") \ + EM(netfs_folio_trace_invalidate_middle, "inval-mid") \ + EM(netfs_folio_trace_invalidate_tail, "inval-tail") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ EM(netfs_folio_trace_kill_g, "kill-g") \ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 17ac1b785440..909fb7aea638 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -905,7 +905,8 @@ struct io_uring_buf_reg { __u32 ring_entries; __u16 bgid; __u16 flags; - __u64 resv[3]; + __u32 min_left; + __u32 resv[5]; }; /* argument for IORING_REGISTER_PBUF_STATUS */ diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index f69344fe6c08..ca6fe1f9d05e 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -28,7 +28,7 @@ enum rseq_cs_flags_bit { RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, - /* (3) Intentional gap to put new bits into a separate byte */ + /* (3) Intentional gap to keep new bits separate */ /* User read only feature flags */ RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, @@ -161,6 +161,9 @@ struct rseq { * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE + * + * It is now used for feature status advertisement by the kernel. + * See: enum rseq_cs_flags_bit for further information. */ __u32 flags; diff --git a/include/ufs/unipro.h b/include/ufs/unipro.h index f849a2a101ae..9c168703b104 100644 --- a/include/ufs/unipro.h +++ b/include/ufs/unipro.h @@ -333,6 +333,11 @@ enum ufs_eom_eye_mask { #define DME_LocalTC0ReplayTimeOutVal 0xD042 #define DME_LocalAFC0ReqTimeOutVal 0xD043 +enum ufs_op_mode { + LS_MODE = 1, + HS_MODE = 2, +}; + /* PA power modes */ enum ufs_pa_pwr_mode { FAST_MODE = 1, diff --git a/include/video/imx-ipu-image-convert.h b/include/video/imx-ipu-image-convert.h index 003b3927ede5..6b77968a6a15 100644 --- a/include/video/imx-ipu-image-convert.h +++ b/include/video/imx-ipu-image-convert.h @@ -27,12 +27,13 @@ struct ipu_image_convert_run { int status; + /* private: */ /* internal to image converter, callers don't touch */ struct list_head list; }; /** - * ipu_image_convert_cb_t - conversion callback function prototype + * typedef ipu_image_convert_cb_t - conversion callback function prototype * * @run: the completed conversion run pointer * @ctx: a private context pointer for the callback @@ -60,7 +61,7 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, * @out: output image format * @rot_mode: rotation mode * - * Returns 0 if the formats and rotation mode meet IPU restrictions, + * Returns: 0 if the formats and rotation mode meet IPU restrictions, * -EINVAL otherwise. */ int ipu_image_convert_verify(struct ipu_image *in, struct ipu_image *out, @@ -77,11 +78,11 @@ int ipu_image_convert_verify(struct ipu_image *in, struct ipu_image *out, * @complete: run completion callback * @complete_context: a context pointer for the completion callback * - * Returns an opaque conversion context pointer on success, error pointer + * In V4L2, drivers should call ipu_image_convert_prepare() at streamon. + * + * Returns: an opaque conversion context pointer on success, error pointer * on failure. The input/output formats and rotation mode must already meet * IPU retrictions. - * - * In V4L2, drivers should call ipu_image_convert_prepare() at streamon. */ struct ipu_image_convert_ctx * ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, @@ -122,6 +123,8 @@ void ipu_image_convert_unprepare(struct ipu_image_convert_ctx *ctx); * In V4L2, drivers should call ipu_image_convert_queue() while * streaming to queue the conversion of a received input buffer. * For example mem2mem devices this would be called in .device_run. + * + * Returns: 0 on success or -errno on error. */ int ipu_image_convert_queue(struct ipu_image_convert_run *run); @@ -155,6 +158,9 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx); * On successful return the caller can queue more run requests if needed, using * the prepared context in run->ctx. The caller is responsible for unpreparing * the context when no more conversion requests are needed. + * + * Returns: pointer to the created &struct ipu_image_convert_run that has + * been queued on success; an ERR_PTR(errno) on error. */ struct ipu_image_convert_run * ipu_image_convert(struct ipu_soc *ipu, enum ipu_ic_task ic_task, diff --git a/include/video/udlfb.h b/include/video/udlfb.h index 58fb5732831a..ab34790d57ec 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h @@ -56,6 +56,7 @@ struct dlfb_data { spinlock_t damage_lock; struct work_struct damage_work; struct fb_ops ops; + atomic_t mmap_count; /* blit-only rendering path metrics, exposed through sysfs */ atomic_t bytes_rendered; /* raw pixel-bytes driver asked to render */ atomic_t bytes_identical; /* saved effort with backbuffer comparison */ diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h index c3eada2642aa..61360b89da40 100644 --- a/include/xen/arm/interface.h +++ b/include/xen/arm/interface.h @@ -30,7 +30,7 @@ #define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Explicitly size integers that represent pfns in the interface with * Xen so that we can have one ABI that works for 32 and 64 bit guests. * Note that this means that the xen_pfn_t type may be capable of |
