From b71f9804f66c2592d4c3a2397b7374a4039005a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Apr 2025 22:46:52 -0700 Subject: timekeeping: Prevent coarse clocks going backwards Lei Chen raised an issue with CLOCK_MONOTONIC_COARSE seeing time inconsistencies. Lei tracked down that this was being caused by the adjustment: tk->tkr_mono.xtime_nsec -= offset; which is made to compensate for the unaccumulated cycles in offset when the multiplicator is adjusted forward, so that the non-_COARSE clockids don't see inconsistencies. However, the _COARSE clockid getter functions use the adjusted xtime_nsec value directly and do not compensate the negative offset via the clocksource delta multiplied with the new multiplicator. In that case the caller can observe time going backwards in consecutive calls. By design, this negative adjustment should be fine, because the logic run from timekeeping_adjust() is done after it accumulated approximately multiplicator * interval_cycles into xtime_nsec. The accumulated value is always larger then the mult_adj * offset value, which is subtracted from xtime_nsec. Both operations are done together under the tk_core.lock, so the net change to xtime_nsec is always always be positive. However, do_adjtimex() calls into timekeeping_advance() as well, to apply the NTP frequency adjustment immediately. In this case, timekeeping_advance() does not return early when the offset is smaller then interval_cycles. In that case there is no time accumulated into xtime_nsec. But the subsequent call into timekeeping_adjust(), which modifies the multiplicator, subtracts from xtime_nsec to correct for the new multiplicator. Here because there was no accumulation, xtime_nsec becomes smaller than before, which opens a window up to the next accumulation, where the _COARSE clockid getters, which don't compensate for the offset, can observe the inconsistency. This has been tried to be fixed by forwarding the timekeeper in the case that adjtimex() adjusts the multiplier, which resets the offset to zero: 757b000f7b93 ("timekeeping: Fix possible inconsistencies in _COARSE clockids") That works correctly, but unfortunately causes a regression on the adjtimex() side. There are two issues: 1) The forwarding of the base time moves the update out of the original period and establishes a new one. 2) The clearing of the accumulated NTP error is changing the behaviour as well. User-space expects that multiplier/frequency updates are in effect, when the syscall returns, so delaying the update to the next tick is not solving the problem either. Commit 757b000f7b93 was reverted so that the established expectations of user space implementations (ntpd, chronyd) are restored, but that obviously brought the inconsistencies back. One of the initial approaches to fix this was to establish a separate storage for the coarse time getter nanoseconds part by calculating it from the offset. That was dropped on the floor because not having yet another state to maintain was simpler. But given the result of the above exercise, this solution turns out to be the right one. Bring it back in a slightly modified form. Thus introduce timekeeper::coarse_nsec and store that nanoseconds part in it, switch the time getter functions and the VDSO update to use that value. coarse_nsec is set on operations which forward or initialize the timekeeper and after time was accumulated during a tick. If there is no accumulation the timestamp is unchanged. This leaves the adjtimex() behaviour unmodified and prevents coarse time from going backwards. [ jstultz: Simplified the coarse_nsec calculation and kept behavior so coarse clockids aren't adjusted on each inter-tick adjtimex call, slightly reworked the comments and commit message ] Fixes: da15cfdae033 ("time: Introduce CLOCK_REALTIME_COARSE") Reported-by: Lei Chen Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/20250419054706.2319105-1-jstultz@google.com Closes: https://lore.kernel.org/lkml/20250310030004.3705801-1-lei.chen@smartx.com/ --- include/linux/timekeeper_internal.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b19..785048a3b3e6 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL -- cgit v1.2.3 From f31fe8165d365379d858c53bef43254c7d6d1cfd Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 2 May 2025 13:18:10 +0530 Subject: uio_hv_generic: Fix sysfs creation path for ring buffer On regular bootup, devices get registered to VMBus first, so when uio_hv_generic driver for a particular device type is probed, the device is already initialized and added, so sysfs creation in hv_uio_probe() works fine. However, when the device is removed and brought back, the channel gets rescinded and the device again gets registered to VMBus. However this time, the uio_hv_generic driver is already registered to probe for that device and in this case sysfs creation is tried before the device's kobject gets initialized completely. Fix this by moving the core logic of sysfs creation of ring buffer, from uio_hv_generic to HyperV's VMBus driver, where the rest of the sysfs attributes for the channels are defined. While doing that, make use of attribute groups and macros, instead of creating sysfs directly, to ensure better error handling and code flow. Problematic path: vmbus_process_offer (A new offer comes for the VMBus device) vmbus_add_channel_work vmbus_device_register |-> device_register | |... | |-> hv_uio_probe | |... | |-> sysfs_create_bin_file (leads to a warning as | the primary channel's kobject, which is used to | create the sysfs file, is not yet initialized) |-> kset_create_and_add |-> vmbus_add_channel_kobj (initialization of the primary channel's kobject happens later) Above code flow is sequential and the warning is always reproducible in this path. Fixes: 9ab877a6ccf8 ("uio_hv_generic: make ring buffer attribute for primary channel") Cc: stable@kernel.org Suggested-by: Saurabh Sengar Suggested-by: Michael Kelley Reviewed-by: Michael Kelley Tested-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Naman Jain Link: https://lore.kernel.org/r/20250502074811.2022-2-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba..d6ffe01962c2 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1002,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ -- cgit v1.2.3 From 2bb04ea9e5b7a2ef583c042ed5f8111804606e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:01:01 +0200 Subject: drm/ttm: Fix ttm_backup kerneldoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docs were not properly updated from an earlier version of the code. Fixes: e7b5d23e5d47 ("drm/ttm: Provide a shmem backup implementation") Cc: Christian König Cc: Matthew Brost Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250502130101.3185-1-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_backup.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 24ad120b8827..574b932177cc 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -16,7 +16,7 @@ struct ttm_backup; * @handle: The handle to convert. * * Converts an opaque handle received from the - * struct ttm_backoup_ops::backup_page() function to an (invalid) + * ttm_backup_backup_page() function to an (invalid) * struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. @@ -45,8 +45,8 @@ static inline bool ttm_backup_page_ptr_is_handle(const struct page *page) * * Return: The handle that was previously used in * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable - * for use as argument in the struct ttm_backup_ops drop() or - * copy_backed_up_page() functions. + * for use as argument in the struct ttm_backup_drop() or + * ttm_backup_copy_page() functions. */ static inline unsigned long ttm_backup_page_ptr_to_handle(const struct page *page) -- cgit v1.2.3 From d4ad53adfe21df1464bae5ed916085a69d6ffb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:00:14 +0200 Subject: drm/ttm: Remove the struct ttm_backup abstraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The abstraction was previously added to support separate ttm_backup implementations. However with the current implementation casting from a struct file to a struct ttm_backup, we run into trouble since struct file may have randomized the layout and gcc complains. Remove the struct ttm_backup abstraction Cc: dri-devel@lists.freedesktop.org Cc: Matthew Brost Cc: Dave Airlie Cc: Christian König Cc: Matthew Auld Cc: Al Viro Reported-by: Kees Cook Closes: https://lore.kernel.org/dri-devel/9c8dbbafdaf9f3f089da2cde5a772d69579b3795.camel@linux.intel.com/T/#mb153ab9216cb813b92bdeb36f391ad4808c2ba29 Suggested-by: Christian König Fixes: 70d645deac98 ("drm/ttm: Add helpers for shrinking") Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250502130014.3156-1-thomas.hellstrom@linux.intel.com --- include/drm/ttm/ttm_backup.h | 12 +++++------- include/drm/ttm/ttm_tt.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 574b932177cc..c33cba111171 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -9,8 +9,6 @@ #include #include -struct ttm_backup; - /** * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. @@ -55,20 +53,20 @@ ttm_backup_page_ptr_to_handle(const struct page *page) return (unsigned long)page >> 1; } -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle); +void ttm_backup_drop(struct file *backup, pgoff_t handle); -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr); s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp); -void ttm_backup_fini(struct ttm_backup *backup); +void ttm_backup_fini(struct file *backup); u64 ttm_backup_bytes_avail(void); -struct ttm_backup *ttm_backup_shmem_create(loff_t size); +struct file *ttm_backup_shmem_create(loff_t size); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 13cf47f3322f..406437ad674b 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -118,7 +118,7 @@ struct ttm_tt { * ttm_tt_create() callback is responsible for assigning * this field. */ - struct ttm_backup *backup; + struct file *backup; /** * @caching: The current caching state of the pages, see enum * ttm_caching. -- cgit v1.2.3 From e12a42f64fc3d74872b349eedd47f90c6676b78a Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 5 May 2025 16:19:46 +0800 Subject: wifi: mac80211: fix the type of status_code for negotiated TID to Link Mapping The status code should be type of __le16. Fixes: 83e897a961b8 ("wifi: ieee80211: add definitions for negotiated TID to Link map") Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20250505081946.3927214-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc..457b4fba88bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1526,7 +1526,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { -- cgit v1.2.3 From 78cd408356fe3edbac66598772fd347bf3e32c1f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 5 May 2025 18:19:19 -0700 Subject: net: add missing instance lock to dev_set_promiscuity Accidentally spotted while trying to understand what else needs to be renamed to netif_ prefix. Most of the calls to dev_set_promiscuity are adjacent to dev_set_allmulti or dev_disable_lro so it should be safe to add the lock. Note that new netif_set_promiscuity is currently unused, the locked paths call __dev_set_promiscuity directly. Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250506011919.2882313-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d11d013cabe..7ea022750e4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4972,6 +4972,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); -- cgit v1.2.3 From f5c79ffdc250bc8c90fd4fdf1e5d7ac4647912d5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:39 +0200 Subject: bpf: Clarify handling of mark and tstamp by redirect_peer When switching network namespaces with the bpf_redirect_peer helper, the skb->mark and skb->tstamp fields are not zeroed out like they can be on a typical netns switch. This patch clarifies that in the helper description. Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/ccc86af26d43c5c0b776bcba2601b7479c0d46d0.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The -- cgit v1.2.3 From a0309faf1cb0622cac7c820150b7abf2024acff5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 17:11:07 -0700 Subject: mm: vmalloc: support more granular vrealloc() sizing Introduce struct vm_struct::requested_size so that the requested (re)allocation size is retained separately from the allocated area size. This means that KASAN will correctly poison the correct spans of requested bytes. This also means we can support growing the usable portion of an allocation that can already be supported by the existing area's existing allocation. Link: https://lkml.kernel.org/r/20250426001105.it.679-kees@kernel.org Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()") Signed-off-by: Kees Cook Reported-by: Erhard Furtner Closes: https://lore.kernel.org/all/20250408192503.6149a816@outsider.home/ Reviewed-by: Danilo Krummrich Cc: Michal Hocko Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..5ca8d4dd149d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { -- cgit v1.2.3 From 23fa6a23d97182d36ca3c71e43c804fa91e46a03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:20 -0700 Subject: net: export a helper for adding up queue stats Older drivers and drivers with lower queue counts often have a static array of queues, rather than allocating structs for each queue on demand. Add a helper for adding up qstats from a queue range. Expectation is that driver will pass a queue range [netdev->real_num_*x_queues, MAX). It was tempting to always use num_*x_queues as the end, but virtio seems to clamp its queue count after allocating the netdev. And this way we can trivaly reuse the helper for [0, real_..). Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/net/netdev_queues.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e5..d01c82983b4d 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -103,6 +103,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * -- cgit v1.2.3 From c82b6357a5465a3222780ac5d3edcdfb02208cc3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 30 Apr 2025 15:07:03 -0400 Subject: Bluetooth: hci_event: Fix not using key encryption size when its known This fixes the regression introduced by 50c1241e6a8a ("Bluetooth: l2cap: Check encryption key size on incoming connection") introduced a check for l2cap_check_enc_key_size which checks for hcon->enc_key_size which may not be initialized if HCI_OP_READ_ENC_KEY_SIZE is still pending. If the key encryption size is known, due previously reading it using HCI_OP_READ_ENC_KEY_SIZE, then store it as part of link_key/smp_ltk structures so the next time the encryption is changed their values are used as conn->enc_key_size thus avoiding the racing against HCI_OP_READ_ENC_KEY_SIZE. Now that the enc_size is stored as part of key the information the code then attempts to check that there is no downgrade of security if HCI_OP_READ_ENC_KEY_SIZE returns a value smaller than what has been previously stored. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220061 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220063 Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 522d837a23fa..54bfeeaa0995 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1798,6 +1798,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); +u8 *hci_conn_key_enc_size(struct hci_conn *conn); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, -- cgit v1.2.3 From 2d3cbfd6d54a2c39ce3244f33f85c595844bd7b8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2025 21:35:58 -0700 Subject: net_sched: Flush gso_skb list too during ->change() Previously, when reducing a qdisc's limit via the ->change() operation, only the main skb queue was trimmed, potentially leaving packets in the gso_skb list. This could result in NULL pointer dereference when we only check sch->limit against sch->q.qlen. This patch introduces a new helper, qdisc_dequeue_internal(), which ensures both the gso_skb list and the main queue are properly flushed when trimming excess packets. All relevant qdiscs (codel, fq, fq_codel, fq_pie, hhf, pie) are updated to use this helper in their ->change() routines. Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme") Reported-by: Will Reported-by: Savy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d48c657191cd..1c05fed05f2b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) return skb; } +static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct) +{ + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; + return skb; + } + if (direct) + return __qdisc_dequeue_head(&sch->q); + else + return sch->dequeue(sch); +} + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); -- cgit v1.2.3 From f4818881c47fd91fcb6d62373c57c7844e3de1c0 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 20:23:23 -0700 Subject: x86/its: Enable Indirect Target Selection mitigation Indirect Target Selection (ITS) is a bug in some pre-ADL Intel CPUs with eIBRS. It affects prediction of indirect branch and RETs in the lower half of cacheline. Due to ITS such branches may get wrongly predicted to a target of (direct or indirect) branch that is located in the upper half of the cacheline. Scope of impact =============== Guest/host isolation -------------------- When eIBRS is used for guest/host isolation, the indirect branches in the VMM may still be predicted with targets corresponding to branches in the guest. Intra-mode ---------- cBPF or other native gadgets can be used for intra-mode training and disclosure using ITS. User/kernel isolation --------------------- When eIBRS is enabled user/kernel isolation is not impacted. Indirect Branch Prediction Barrier (IBPB) ----------------------------------------- After an IBPB, indirect branches may be predicted with targets corresponding to direct branches which were executed prior to IBPB. This is mitigated by a microcode update. Add cmdline parameter indirect_target_selection=off|on|force to control the mitigation to relocate the affected branches to an ITS-safe thunk i.e. located in the upper half of cacheline. Also add the sysfs reporting. When retpoline mitigation is deployed, ITS safe-thunks are not needed, because retpoline sequence is already ITS-safe. Similarly, when call depth tracking (CDT) mitigation is deployed (retbleed=stuff), ITS safe return thunk is not used, as CDT prevents RSB-underflow. To not overcomplicate things, ITS mitigation is not supported with spectre-v2 lfence;jmp mitigation. Moreover, it is less practical to deploy lfence;jmp mitigation on ITS affected parts anyways. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e3049543008b..3aa955102b34 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From d6d1e3e6580ca35071ad474381f053cbf1fb6414 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2025 16:30:11 +0200 Subject: mm/execmem: Unify early execmem_cache behaviour Early kernel memory is RWX, only at the end of early boot (before SMP) do we mark things ROX. Have execmem_cache mirror this behaviour for early users. This avoids having to remember what code is execmem and what is not -- we can poke everything with impunity ;-) Also performance for not having to do endless text_poke_mm switches. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- include/linux/execmem.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 65655a5d1be2..089c71cc8ddf 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -53,7 +53,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -93,9 +93,15 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); + +/* + * Called from mark_readonly(), where the system transitions to ROX. + */ +void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +static inline void execmem_cache_make_ro(void) { } #endif /** -- cgit v1.2.3 From 872df34d7c51a79523820ea6a14860398c639b87 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Oct 2024 10:05:48 -0700 Subject: x86/its: Use dynamic thunks for indirect branches ITS mitigation moves the unsafe indirect branches to a safe thunk. This could degrade the prediction accuracy as the source address of indirect branches becomes same for different execution paths. To improve the predictions, and hence the performance, assign a separate thunk for each indirect callsite. This is also a defense-in-depth measure to avoid indirect branches aliasing with each other. As an example, 5000 dynamic thunks would utilize around 16 bits of the address space, thereby gaining entropy. For a BTB that uses 32 bits for indexing, dynamic thunks could provide better prediction accuracy over fixed thunks. Have ITS thunks be variable sized and use EXECMEM_MODULE_TEXT such that they are both more flexible (got to extend them later) and live in 2M TLBs, just like kernel code, avoiding undue TLB pressure. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- include/linux/execmem.h | 3 +++ include/linux/module.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 089c71cc8ddf..ca42d5e46ccc 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include #include +#include #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -176,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/module.h b/include/linux/module.h index b3329110d668..8050f77c3b64 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,6 +586,11 @@ struct module { atomic_t refcnt; #endif +#ifdef CONFIG_MITIGATION_ITS + int its_num_pages; + void **its_page_array; +#endif + #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; -- cgit v1.2.3 From 1f93d877f09d987f08baedd50597aaaa72a37be4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 8 May 2025 21:12:07 +0300 Subject: ALSA/hda: intel-sdw-acpi: Correct sdw_intel_acpi_scan() function parameter The acpi_handle should be just a handle and not a pointer in sdw_intel_acpi_scan() parameter list. It is called with 'acpi_handle handle' as parameter and it is passing it to acpi_walk_namespace, which also expects acpi_handle and not acpi_handle* Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250508181207.22113-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- include/linux/soundwire/sdw_intel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 493d9de4e472..dc6ebaee3d18 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -365,7 +365,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); -- cgit v1.2.3 From dd33993a9721ab1dae38bd37c9f665987d554239 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 11 May 2025 16:11:45 +0200 Subject: ALSA: ump: Fix a typo of snd_ump_stream_msg_device_info s/devince/device/ It's used only internally, so no any behavior changes. Fixes: 37e0e14128e0 ("ALSA: ump: Support UMP Endpoint and Function Block parsing") Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250511141147.10246-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump_msg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/ump_msg.h b/include/sound/ump_msg.h index 72f60ddfea75..9556b4755a1e 100644 --- a/include/sound/ump_msg.h +++ b/include/sound/ump_msg.h @@ -604,7 +604,7 @@ struct snd_ump_stream_msg_ep_info { } __packed; /* UMP Stream Message: Device Info Notification (128bit) */ -struct snd_ump_stream_msg_devince_info { +struct snd_ump_stream_msg_device_info { #ifdef __BIG_ENDIAN_BITFIELD /* 0 */ u32 type:4; @@ -754,7 +754,7 @@ struct snd_ump_stream_msg_fb_name { union snd_ump_stream_msg { struct snd_ump_stream_msg_ep_discovery ep_discovery; struct snd_ump_stream_msg_ep_info ep_info; - struct snd_ump_stream_msg_devince_info device_info; + struct snd_ump_stream_msg_device_info device_info; struct snd_ump_stream_msg_stream_cfg stream_cfg; struct snd_ump_stream_msg_fb_discovery fb_discovery; struct snd_ump_stream_msg_fb_info fb_info; -- cgit v1.2.3 From 0ae0227fa31dda5bfc6b5a0145952d46fe57408b Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Tue, 6 May 2025 03:30:34 +0800 Subject: mm/codetag: move tag retrieval back upfront in __free_pages() Commit 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks") introduces a possible use-after-free scenario, when page is non-compound, page[0] could be released by other thread right after put_page_testzero failed in current thread, pgalloc_tag_sub_pages afterwards would manipulate an invalid page for accounting remaining pages: [timeline] [thread1] [thread2] | alloc_page non-compound V | get_page, rf counter inc V | in ___free_pages | put_page_testzero fails V | put_page, page released V | in ___free_pages, | pgalloc_tag_sub_pages | manipulate an invalid page V Restore __free_pages() to its state before, retrieve alloc tag beforehand. Link: https://lkml.kernel.org/r/20250505193034.91682-1-00107082@163.com Fixes: 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks") Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Cc: Brendan Jackman Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index c74077977830..8a7f4f802c57 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) return tag; } +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; +} + void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); void pgalloc_tag_swap(struct folio *new, struct folio *old); @@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ -- cgit v1.2.3 From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001 From: Steve Siwinski Date: Thu, 8 May 2025 16:01:22 -0400 Subject: scsi: sd_zbc: block: Respect bio vector limits for REPORT ZONES buffer The REPORT ZONES buffer size is currently limited by the HBA's maximum segment count to ensure the buffer can be mapped. However, the block layer further limits the number of iovec entries to 1024 when allocating a bio. To avoid allocation of buffers too large to be mapped, further restrict the maximum buffer size to BIO_MAX_INLINE_VECS. Replace the UIO_MAXIOV symbolic name with the more contextually appropriate BIO_MAX_INLINE_VECS. Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation") Cc: stable@vger.kernel.org Signed-off-by: Steve Siwinski Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index cafc7c215de8..b786ec5bcc81 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; -- cgit v1.2.3 From 8c619eb21b8e87ae95877e9cca9fcb0e3115776e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 4 May 2025 10:14:34 +0200 Subject: net: phy: micrel: remove KSZ9477 EEE quirks now handled by phylink The KSZ9477 PHY driver contained workarounds for broken EEE capability advertisements by manually masking supported EEE modes and forcibly disabling EEE if MICREL_NO_EEE was set. With proper MAC-side EEE handling implemented via phylink, these quirks are no longer necessary. Remove MICREL_NO_EEE handling and the use of ksz9477_get_features(). This simplifies the PHY driver and avoids duplicated EEE management logic. Signed-off-by: Oleksij Rempel Cc: stable@vger.kernel.org # v6.14+ Link: https://patch.msgid.link/20250504081434.424489-3-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- include/linux/micrel_phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc..9af01bdd86d2 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit v1.2.3 From 7bd68ce21d39150d80806233965326359f517b78 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Mon, 12 May 2025 06:54:55 -0700 Subject: drm/gpusvm: Introduce devmem_only flag for allocation This commit adds a new flag, devmem_only, to the drm_gpusvm structure. The purpose of this flag is to ensure that the get_pages function allocates memory exclusively from the device's memory. If the allocation from device memory fails, the function will return an -EFAULT error. Required for shared CPU and GPU atomics on certain devices. v3: - s/vram_only/devmem_only/ Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250512135500.1405019-2-matthew.brost@intel.com (cherry picked from commit 8a9b978ebd47df9e0694c34748c2d6fa0c31eb4d) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index df120b4d1f83..9fd25fc880a4 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -286,6 +286,7 @@ struct drm_gpusvm { * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory + * @devmem_only: use only device memory * * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ @@ -294,6 +295,7 @@ struct drm_gpusvm_ctx { unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; + unsigned int devmem_only :1; }; int drm_gpusvm_init(struct drm_gpusvm *gpusvm, -- cgit v1.2.3 From 794f5493f518916380578f14d999de92b930b609 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:56 -0700 Subject: drm/xe: Strict migration policy for atomic SVM faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixing GPU and CPU atomics does not work unless a strict migration policy of GPU atomics must be device memory. Enforce a policy of must be in VRAM with a retry loop of 3 attempts, if retry loop fails abort fault. Removing always_migrate_to_vram modparam as we now have real migration policy. v2: - Only retry migration on atomics - Drop alway migrate modparam v3: - Only set vram_only on DGFX (Himal) - Bail on get_pages failure if vram_only and retry count exceeded (Himal) - s/vram_only/devmem_only - Update xe_svm_range_is_valid to accept devmem_only argument v4: - Fix logic bug get_pages failure v5: - Fix commit message (Himal) - Mention removing always_migrate_to_vram in commit message (Lucas) - Fix xe_svm_range_is_valid to check for devmem pages - Bail on devmem_only && !migrate_devmem (Thomas) v6: - Add READ_ONCE barriers for opportunistic checks (Thomas) - Pair READ_ONCE with WRITE_ONCE (Thomas) v7: - Adjust comments (Thomas) Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration") Cc: stable@vger.kernel.org Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Matthew Brost Acked-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250512135500.1405019-3-matthew.brost@intel.com (cherry picked from commit a9ac0fa455b050d03e3032501368048fb284d318) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 9fd25fc880a4..653d48dbe1c1 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -185,6 +185,31 @@ struct drm_gpusvm_notifier { } flags; }; +/** + * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags + * + * @migrate_devmem: Flag indicating whether the range can be migrated to device memory + * @unmapped: Flag indicating if the range has been unmapped + * @partial_unmap: Flag indicating if the range has been partially unmapped + * @has_devmem_pages: Flag indicating if the range has devmem pages + * @has_dma_mapping: Flag indicating if the range has a DMA mapping + * @__flags: Flags for range in u16 form (used for READ_ONCE) + */ +struct drm_gpusvm_range_flags { + union { + struct { + /* All flags below must be set upon creation */ + u16 migrate_devmem : 1; + /* All flags below must be set / cleared under notifier lock */ + u16 unmapped : 1; + u16 partial_unmap : 1; + u16 has_devmem_pages : 1; + u16 has_dma_mapping : 1; + }; + u16 __flags; + }; +}; + /** * struct drm_gpusvm_range - Structure representing a GPU SVM range * @@ -198,11 +223,6 @@ struct drm_gpusvm_notifier { * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. * Note this is assuming only one drm_pagemap per range is allowed. * @flags: Flags for range - * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory - * @flags.unmapped: Flag indicating if the range has been unmapped - * @flags.partial_unmap: Flag indicating if the range has been partially unmapped - * @flags.has_devmem_pages: Flag indicating if the range has devmem pages - * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping * * This structure represents a GPU SVM range used for tracking memory ranges * mapped in a DRM device. @@ -216,15 +236,7 @@ struct drm_gpusvm_range { unsigned long notifier_seq; struct drm_pagemap_device_addr *dma_addr; struct drm_pagemap *dpagemap; - struct { - /* All flags below must be set upon creation */ - u16 migrate_devmem : 1; - /* All flags below must be set / cleared under notifier lock */ - u16 unmapped : 1; - u16 partial_unmap : 1; - u16 has_devmem_pages : 1; - u16 has_dma_mapping : 1; - } flags; + struct drm_gpusvm_range_flags flags; }; /** -- cgit v1.2.3 From df8c37810b5a4a5c8bff1f880f59163c800a4985 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:57 -0700 Subject: drm/gpusvm: Add timeslicing support to GPU SVM Add timeslicing support to GPU SVM which will guarantee the GPU a minimum execution time on piece of physical memory before migration back to CPU. Intended to implement strict migration policies which require memory to be in a certain placement for correct execution. Required for shared CPU and GPU atomics on certain devices. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250512135500.1405019-4-matthew.brost@intel.com (cherry picked from commit 8dc1812b5b3a42311d28eb385eed88e2053ad3cb) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 653d48dbe1c1..eaf704d3d05e 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops { * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation + * @timeslice_expiration: Timeslice expiration in jiffies */ struct drm_gpusvm_devmem { struct device *dev; @@ -97,6 +98,7 @@ struct drm_gpusvm_devmem { const struct drm_gpusvm_devmem_ops *ops; struct drm_pagemap *dpagemap; size_t size; + u64 timeslice_expiration; }; /** @@ -295,6 +297,8 @@ struct drm_gpusvm { * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. + * @timeslice_ms: The timeslice MS which in minimum time a piece of memory + * remains with either exclusive GPU or CPU access. * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory @@ -304,6 +308,7 @@ struct drm_gpusvm { */ struct drm_gpusvm_ctx { unsigned long check_pages_threshold; + unsigned long timeslice_ms; unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; -- cgit v1.2.3 From 539fbab37881e32ba6a708a100de6db19e1e7e7d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 7 Apr 2025 15:28:05 +0300 Subject: tpm: Mask TPM RC in tpm2_start_auth_session() tpm2_start_auth_session() does not mask TPM RC correctly from the callers: [ 28.766528] tpm tpm0: A TPM error (2307) occurred start auth session Process TPM RCs inside tpm2_start_auth_session(), and map them to POSIX error codes. Cc: stable@vger.kernel.org # v6.10+ Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions") Reported-by: Herbert Xu Closes: https://lore.kernel.org/linux-integrity/Z_NgdRHuTKP6JK--@gondor.apana.org.au/ Reviewed-by: Stefano Garzarella Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 6c3125300c00..9ac9768cc8f7 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); -- cgit v1.2.3 From 2f661f71fda1fc0c42b7746ca5b7da529eb6b5be Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 4 Apr 2025 10:23:14 +0200 Subject: tpm: tis: Double the timeout B to 4s With some Infineon chips the timeouts in tpm_tis_send_data (both B and C) can reach up to about 2250 ms. Timeout C is retried since commit de9e33df7762 ("tpm, tpm_tis: Workaround failed command reception on Infineon devices") Timeout B still needs to be extended. The problem is most commonly encountered with context related operation such as load context/save context. These are issued directly by the kernel, and there is no retry logic for them. When a filesystem is set up to use the TPM for unlocking the boot fails, and restarting the userspace service is ineffective. This is likely because ignoring a load context/save context result puts the real TPM state and the TPM state expected by the kernel out of sync. Chips known to be affected: tpm_tis IFX1522:00: 2.0 TPM (device-id 0x1D, rev-id 54) Description: SLB9672 Firmware Revision: 15.22 tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1B, rev-id 22) Firmware Revision: 7.83 tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1A, rev-id 16) Firmware Revision: 5.63 Link: https://lore.kernel.org/linux-integrity/Z5pI07m0Muapyu9w@kitsune.suse.cz/ Signed-off-by: Michal Suchanek Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 9ac9768cc8f7..a3d8305e88a5 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, -- cgit v1.2.3 From 45a442fe369e6c4e0b4aa9f63b31c3f2f9e2090e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:04 -0700 Subject: Drivers: hv: vmbus: Remove vmbus_sendpacket_pagebuffer() With the netvsc driver changed to use vmbus_sendpacket_mpb_desc() instead of vmbus_sendpacket_pagebuffer(), the latter has no remaining callers. Remove it. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-6-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- include/linux/hyperv.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba..ea806cef4a0d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1161,13 +1161,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, -- cgit v1.2.3 From dcd21b609d4abc7303f8683bce4f35d78d7d6830 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Apr 2025 18:21:06 -0400 Subject: NFS: Avoid flushing data while holding directory locks in nfs_rename() The Linux client assumes that all filehandles are non-volatile for renames within the same directory (otherwise sillyrename cannot work). However, the existence of the Linux 'subtree_check' export option has meant that nfs_rename() has always assumed it needs to flush writes before attempting to rename. Since NFSv4 does allow the client to query whether or not the server exhibits this behaviour, and since knfsd does actually set the appropriate flag when 'subtree_check' is enabled on an export, it should be OK to optimise away the write flushing behaviour in the cases where it is clearly not needed. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- include/linux/nfs_fs_sb.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 71319637a84e..ee03f3cef30c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -213,6 +213,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -236,9 +245,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ -- cgit v1.2.3