From da142f3d373a6ddaca0119615a8db2175ddc4121 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 5 Dec 2025 15:26:55 -0800
Subject: KVM: Remove subtle "struct kvm_stats_desc" pseudo-overlay

Remove KVM's internal pseudo-overlay of kvm_stats_desc, which subtly
aliases the flexible name[] in the uAPI definition with a fixed-size array
of the same name.  The unusual embedded structure results in compiler
warnings due to -Wflex-array-member-not-at-end, and also necessitates an
extra level of dereferencing in KVM.  To avoid the "overlay", define the
uAPI structure to have a fixed-size name when building for the kernel.

Opportunistically clean up the indentation for the stats macros, and
replace spaces with tabs.

No functional change intended.

Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Closes: https://lore.kernel.org/all/aPfNKRpLfhmhYqfP@kspp
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
[..]
Acked-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20251205232655.445294-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 83 ++++++++++++++++++++----------------------------
 include/uapi/linux/kvm.h |  8 +++++
 2 files changed, 43 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d93f75b05ae2..7428d9949382 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1927,56 +1927,43 @@ enum kvm_stat_kind {
 
 struct kvm_stat_data {
 	struct kvm *kvm;
-	const struct _kvm_stats_desc *desc;
+	const struct kvm_stats_desc *desc;
 	enum kvm_stat_kind kind;
 };
 
-struct _kvm_stats_desc {
-	struct kvm_stats_desc desc;
-	char name[KVM_STATS_NAME_SIZE];
-};
-
-#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz)		       \
-	.flags = type | unit | base |					       \
-		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |	       \
-		 BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |	       \
-		 BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),	       \
-	.exponent = exp,						       \
-	.size = sz,							       \
+#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz)		\
+	.flags = type | unit | base |					\
+		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |       \
+		 BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |	\
+		 BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),	\
+	.exponent = exp,						\
+	.size = sz,							\
 	.bucket_size = bsz
 
-#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vm_stat, generic.stat)   \
-		},							       \
-		.name = #stat,						       \
-	}
-#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
-		},							       \
-		.name = #stat,						       \
-	}
-#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vm_stat, stat)	       \
-		},							       \
-		.name = #stat,						       \
-	}
-#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vcpu_stat, stat)	       \
-		},							       \
-		.name = #stat,						       \
-	}
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vm_stat, generic.stat),		\
+	.name = #stat,							\
+}
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vcpu_stat, generic.stat),		\
+	.name = #stat,							\
+}
+#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vm_stat, stat),			\
+	.name = #stat,							\
+}
+#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vcpu_stat, stat),			\
+	.name = #stat,							\
+}
 /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
 #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz)		       \
 	SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz)
@@ -2053,7 +2040,7 @@ struct _kvm_stats_desc {
 	STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
 
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
-		       const struct _kvm_stats_desc *desc,
+		       const struct kvm_stats_desc *desc,
 		       void *stats, size_t size_stats,
 		       char __user *user_buffer, size_t size, loff_t *offset);
 
@@ -2098,9 +2085,9 @@ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value)
 
 
 extern const struct kvm_stats_header kvm_vm_stats_header;
-extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
+extern const struct kvm_stats_desc kvm_vm_stats_desc[];
 extern const struct kvm_stats_header kvm_vcpu_stats_header;
-extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
+extern const struct kvm_stats_desc kvm_vcpu_stats_desc[];
 
 #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index dddb781b0507..76bd54848b11 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -14,6 +14,10 @@
 #include <linux/ioctl.h>
 #include <asm/kvm.h>
 
+#ifdef __KERNEL__
+#include <linux/kvm_types.h>
+#endif
+
 #define KVM_API_VERSION 12
 
 /*
@@ -1579,7 +1583,11 @@ struct kvm_stats_desc {
 	__u16 size;
 	__u32 offset;
 	__u32 bucket_size;
+#ifdef __KERNEL__
+	char name[KVM_STATS_NAME_SIZE];
+#else
 	char name[];
+#endif
 };
 
 #define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
-- 
cgit v1.2.3


From b777b5e09eabeefc6ba80f4296366a4742701103 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Feb 2026 17:02:25 +0000
Subject: time/jiffies: Inline jiffies_to_msecs() and jiffies_to_usecs()

For common cases (HZ=100, 250 or 1000), these helpers are at most one
multiply, so there is no point calling a tiny function.

Keep them out of line for HZ=300 and others.

This saves cycles in TCP fast path, among other things.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/8 grow/shrink: 25/89 up/down: 530/-3474 (-2944)
...
nla_put_msecs                                193       -    -193
message_stats_print                         2131     920   -1211
Total: Before=25365208, After=25362264, chg -0.01%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260210170226.57209-1-edumazet@google.com
---
 include/linux/jiffies.h | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fdef2c155c27..d1c3d4941854 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -434,8 +434,44 @@ extern unsigned long preset_lpj;
 /*
  * Convert various time units to each other:
  */
-extern unsigned int jiffies_to_msecs(const unsigned long j);
-extern unsigned int jiffies_to_usecs(const unsigned long j);
+
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+/**
+ * jiffies_to_msecs - Convert jiffies to milliseconds
+ * @j: jiffies value
+ *
+ * This inline version takes care of HZ in {100,250,1000}.
+ *
+ * Return: milliseconds value
+ */
+static inline unsigned int jiffies_to_msecs(const unsigned long j)
+{
+	return (MSEC_PER_SEC / HZ) * j;
+}
+#else
+unsigned int jiffies_to_msecs(const unsigned long j);
+#endif
+
+#if !(USEC_PER_SEC % HZ)
+/**
+ * jiffies_to_usecs - Convert jiffies to microseconds
+ * @j: jiffies value
+ *
+ * Return: microseconds value
+ */
+static inline unsigned int jiffies_to_usecs(const unsigned long j)
+{
+	/*
+	 * Hz usually doesn't go much further MSEC_PER_SEC.
+	 * jiffies_to_usecs() and usecs_to_jiffies() depend on that.
+	 */
+	BUILD_BUG_ON(HZ > USEC_PER_SEC);
+
+	return (USEC_PER_SEC / HZ) * j;
+}
+#else
+unsigned int jiffies_to_usecs(const unsigned long j);
+#endif
 
 /**
  * jiffies_to_nsecs - Convert jiffies to nanoseconds
-- 
cgit v1.2.3


From e9e0b48cd15b46dcb2bbc165f6b0fee698b855d6 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Sun, 8 Feb 2026 22:47:26 +0000
Subject: drm/fourcc: fix plane order for 10/12/16-bit YCbCr formats

The short comments had the correct order, but the long comments
had the planes reversed.

Fixes: 2271e0a20ef7 ("drm: drm_fourcc: add 10/12/16bit software decoder YCbCr formats")
Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Robert Mader <robert.mader@collabora.com>
Link: https://patch.msgid.link/20260208224718.57199-1-contact@emersion.fr
---
 include/uapi/drm/drm_fourcc.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index e527b24bd824..c89aede3cb12 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -401,8 +401,8 @@ extern "C" {
  * implementation can multiply the values by 2^6=64. For that reason the padding
  * must only contain zeros.
  * index 0 = Y plane, [15:0] z:Y [6:10] little endian
- * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian
- * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian
+ * index 1 = Cb plane, [15:0] z:Cb [6:10] little endian
+ * index 2 = Cr plane, [15:0] z:Cr [6:10] little endian
  */
 #define DRM_FORMAT_S010	fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */
 #define DRM_FORMAT_S210	fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */
@@ -414,8 +414,8 @@ extern "C" {
  * implementation can multiply the values by 2^4=16. For that reason the padding
  * must only contain zeros.
  * index 0 = Y plane, [15:0] z:Y [4:12] little endian
- * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian
- * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian
+ * index 1 = Cb plane, [15:0] z:Cb [4:12] little endian
+ * index 2 = Cr plane, [15:0] z:Cr [4:12] little endian
  */
 #define DRM_FORMAT_S012	fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */
 #define DRM_FORMAT_S212	fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */
@@ -424,8 +424,8 @@ extern "C" {
 /*
  * 3 plane YCbCr
  * index 0 = Y plane, [15:0] Y little endian
- * index 1 = Cr plane, [15:0] Cr little endian
- * index 2 = Cb plane, [15:0] Cb little endian
+ * index 1 = Cb plane, [15:0] Cb little endian
+ * index 2 = Cr plane, [15:0] Cr little endian
  */
 #define DRM_FORMAT_S016	fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */
 #define DRM_FORMAT_S216	fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */
-- 
cgit v1.2.3


From ce9e40a9a5e5cff0b1b0d2fa582b3d71a8ce68e8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 6 Feb 2026 15:48:16 +0000
Subject: irqchip/gic-v3-its: Limit number of per-device MSIs to the range the
 ITS supports

The ITS driver blindly assumes that EventIDs are in abundant supply, to the
point where it never checks how many the hardware actually supports.

It turns out that some pretty esoteric integrations make it so that only a
few bits are available, all the way down to a single bit.

Enforce the advertised limitation at the point of allocating the device
structure, and hope that the endpoint driver can deal with such limitation.

Fixes: 84a6a2e7fc18d ("irqchip: GICv3: ITS: device allocation and configuration")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Zenghui Yu <zenghui.yu@linux.dev>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260206154816.3582887-1-maz@kernel.org
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 70c0948f978e..0225121f3013 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -394,6 +394,7 @@
 #define GITS_TYPER_VLPIS		(1UL << 1)
 #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT	4
 #define GITS_TYPER_ITT_ENTRY_SIZE	GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS		GENMASK_ULL(12, 8)
 #define GITS_TYPER_IDBITS_SHIFT		8
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS		GENMASK_ULL(17, 13)
-- 
cgit v1.2.3


From 249013e673fce3506c61063c7cbedd75b4c668d8 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 18 Feb 2026 22:09:21 -0800
Subject: fsnotify: drop unused helper

Remove this helper now that all users have been converted to
fserror_report_metadata as of 7.0-rc1.

Cc: jack@suse.cz
Cc: amir73il@gmail.com
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://patch.msgid.link/177148129543.716249.980530449513340111.stgit@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fsnotify.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 28a9cb13fbfa..079c18bcdbde 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -495,19 +495,6 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		fsnotify_dentry(dentry, mask);
 }
 
-static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
-				    int error)
-{
-	struct fs_error_report report = {
-		.error = error,
-		.inode = inode,
-		.sb = sb,
-	};
-
-	return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR,
-			NULL, NULL, NULL, 0);
-}
-
 static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
 {
 	fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
-- 
cgit v1.2.3


From 6b3e458806e34f1142592f786d3eb0ebac209cc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCnther=20Noack?= <gnoack@google.com>
Date: Thu, 19 Feb 2026 16:43:35 +0100
Subject: HID: Document memory allocation properties of report_fixup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memory pointer returned by the report_fixup() hook does not get
freed by the caller.  Instead, report_fixup() must return (in return
value and *rsize) a memory buffer with at least the same lifetime as
the input buffer (defined by rdesc and original *rsize).

This is usually achieved using one of the following techniques:

* Returning a pointer and size to a sub-portion of the input buffer
* Returning a pointer to a static buffer
* Allocating a buffer with a devm_*() function,
  which will automatically get freed when the device is removed.

Signed-off-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index dce862cafbbd..2990b9f94cb5 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -836,6 +836,12 @@ struct hid_usage_id {
  * raw_event and event should return negative on error, any other value will
  * pass the event on to .event() typically return 0 for success.
  *
+ * report_fixup must return a report descriptor pointer whose lifetime is at
+ * least that of the input rdesc.  This is usually done by mutating the input
+ * rdesc and returning it or a sub-portion of it.  In case a new buffer is
+ * allocated and returned, the implementation of report_fixup is responsible for
+ * freeing it later.
+ *
  * input_mapping shall return a negative value to completely ignore this usage
  * (e.g. doubled or invalid usage), zero to continue with parsing of this
  * usage by generic code (no special handling needed) or positive to skip
-- 
cgit v1.2.3


From 858d2a4f67ff69e645a43487ef7ea7f28f06deae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Feb 2026 16:12:05 +0000
Subject: tcp: fix potential race in tcp_v6_syn_recv_sock()

Code in tcp_v6_syn_recv_sock() after the call to tcp_v4_syn_recv_sock()
is done too late.

After tcp_v4_syn_recv_sock(), the child socket is already visible
from TCP ehash table and other cpus might use it.

Since newinet->pinet6 is still pointing to the listener ipv6_pinfo
bad things can happen as syzbot found.

Move the problematic code in tcp_v6_mapped_child_init()
and call this new helper from tcp_v4_syn_recv_sock() before
the ehash insertion.

This allows the removal of one tcp_sync_mss(), since
tcp_v4_syn_recv_sock() will call it with the correct
context.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+937b5bbb6a815b3e5d0b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/69949275.050a0220.2eeac1.0145.GAE@google.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260217161205.2079883-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_connection_sock.h | 4 +++-
 include/net/tcp.h                  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ecb362025c4e..5cb3056d6ddc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -42,7 +42,9 @@ struct inet_connection_sock_af_ops {
 				      struct request_sock *req,
 				      struct dst_entry *dst,
 				      struct request_sock *req_unhash,
-				      bool *own_req);
+				      bool *own_req,
+				      void (*opt_child_init)(struct sock *newsk,
+							     const struct sock *sk));
 	u16	    net_header_len;
 	int	    (*setsockopt)(struct sock *sk, int level, int optname,
 				  sockptr_t optval, unsigned int optlen);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 40e72b9cb85f..eb8bf63fdafc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
 				  struct dst_entry *dst,
 				  struct request_sock *req_unhash,
-				  bool *own_req);
+				  bool *own_req,
+				  void (*opt_child_init)(struct sock *newsk,
+							 const struct sock *sk));
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
 int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
 int tcp_connect(struct sock *sk);
-- 
cgit v1.2.3


From 6e3c0a4e1ad1e0455b7880fad02b3ee179f56c09 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 22 Apr 2025 12:16:28 +0200
Subject: sched/fair: Fix lag clamp

Vincent reported that he was seeing undue lag clamping in a mixed
slice workload. Implement the max_slice tracking as per the todo
comment.

Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Reported-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20250422101628.GA33555@noisy.programming.kicks-ass.net
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 074ad4ef3d81..a7b4a980eb2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -579,6 +579,7 @@ struct sched_entity {
 	u64				deadline;
 	u64				min_vruntime;
 	u64				min_slice;
+	u64				max_slice;
 
 	struct list_head		group_node;
 	unsigned char			on_rq;
-- 
cgit v1.2.3


From 4c652a47722f69c6f2685f05b17490ea97f643a8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Feb 2026 08:41:13 +0100
Subject: rseq: Mark rseq_arm_slice_extension_timer() __always_inline

objtool warns about this function being called inside of a uaccess
section:

kernel/entry/common.o: warning: objtool: irqentry_exit+0x1dc: call to rseq_arm_slice_extension_timer() with UACCESS enabled

Interestingly, this happens with CONFIG_RSEQ_SLICE_EXTENSION disabled,
so this is an empty function, as the normal implementation is
already marked __always_inline.

I could reproduce this multiple times with gcc-11 but not with gcc-15,
so the compiler probably got better at identifying the trivial function.

Mark all the empty helpers for !RSEQ_SLICE_EXTENSION as __always_inline
for consistency, avoiding this warning.

Fixes: 0ac3b5c3dc45 ("rseq: Implement time slice extension enforcement timer")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260206074122.709580-1-arnd@kernel.org
---
 include/linux/rseq_entry.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
index cbc4a791618b..c6831c93cd6e 100644
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -216,10 +216,10 @@ efault:
 }
 
 #else /* CONFIG_RSEQ_SLICE_EXTENSION */
-static inline bool rseq_slice_extension_enabled(void) { return false; }
-static inline bool rseq_arm_slice_extension_timer(void) { return false; }
-static inline void rseq_slice_clear_grant(struct task_struct *t) { }
-static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
+static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
+static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
+static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
+static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
 #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
 
 bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
-- 
cgit v1.2.3


From 3b68df978133ac3d46d570af065a73debbb68248 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 20 Feb 2026 15:06:41 -0500
Subject: rseq: slice ext: Ensure rseq feature size differs from original rseq
 size

Before rseq became extensible, its original size was 32 bytes even
though the active rseq area was only 20 bytes. This had the following
impact in terms of userspace ecosystem evolution:

* The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set
  to 32, even though the size of the active rseq area is really 20.
* The GNU libc 2.40 changes this __rseq_size to 20, thus making it
  express the active rseq area.
* Starting from glibc 2.41, __rseq_size corresponds to the
  AT_RSEQ_FEATURE_SIZE from getauxval(3).

This means that users of __rseq_size can always expect it to
correspond to the active rseq area, except for the value 32, for
which the active rseq area is 20 bytes.

Exposing a 32 bytes feature size would make life needlessly painful
for userspace. Therefore, add a reserved field at the end of the
rseq area to bump the feature size to 33 bytes. This reserved field
is expected to be replaced with whatever field will come next,
expecting that this field will be larger than 1 byte.

The effect of this change is to increase the size from 32 to 64 bytes
before we actually have fields using that memory.

Clarify the allocation size and alignment requirements in the struct
rseq uapi comment.

Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the
value of the active rseq area size rounded up to next power of 2, which
guarantees that the rseq structure will always be aligned on the nearest
power of two large enough to contain it, even as it grows. Change the
alignment check in the rseq registration accordingly.

This will minimize the amount of ABI corner-cases we need to document
and require userspace to play games with. The rule stays simple when
__rseq_size != 32:

  #define rseq_field_available(field)	(__rseq_size >= offsetofend(struct rseq_abi, field))

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com
---
 include/linux/rseq.h      | 12 ++++++++++++
 include/uapi/linux/rseq.h | 26 ++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 7a01a0760405..b9d62fc2140d 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
 		t->rseq = current->rseq;
 }
 
+/*
+ * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq
+ * registration. This is the active rseq area size rounded up to next
+ * power of 2, which guarantees that the rseq structure will always be
+ * aligned on the nearest power of two large enough to contain it, even
+ * as it grows.
+ */
+static inline unsigned int rseq_alloc_align(void)
+{
+	return 1U << get_count_order(offsetof(struct rseq, end));
+}
+
 #else /* CONFIG_RSEQ */
 static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
 static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 863c4a00a66b..f69344fe6c08 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl {
 };
 
 /*
- * struct rseq is aligned on 4 * 8 bytes to ensure it is always
- * contained within a single cache-line.
+ * The original size and alignment of the allocation for struct rseq is
+ * 32 bytes.
  *
- * A single struct rseq per thread is allowed.
+ * The allocation size needs to be greater or equal to
+ * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
+ * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
+ *
+ * As an alternative, userspace is allowed to use both the original size
+ * and alignment of 32 bytes for backward compatibility.
+ *
+ * A single active struct rseq registration per thread is allowed.
  */
 struct rseq {
 	/*
@@ -180,10 +187,21 @@ struct rseq {
 	 */
 	struct rseq_slice_ctrl slice_ctrl;
 
+	/*
+	 * Before rseq became extensible, its original size was 32 bytes even
+	 * though the active rseq area was only 20 bytes.
+	 * Exposing a 32 bytes feature size would make life needlessly painful
+	 * for userspace. Therefore, add a reserved byte after byte 32
+	 * to bump the rseq feature size from 32 to 33.
+	 * The next field to be added to the rseq area will be larger
+	 * than one byte, and will replace this reserved byte.
+	 */
+	__u8 __reserved;
+
 	/*
 	 * Flexible array member at end of structure, after last feature field.
 	 */
 	char end[];
-} __attribute__((aligned(4 * sizeof(__u64))));
+} __attribute__((aligned(32)));
 
 #endif /* _UAPI_LINUX_RSEQ_H */
-- 
cgit v1.2.3


From 901084c51a0a8fb42a3f37d2e9c62083c495f824 Mon Sep 17 00:00:00 2001
From: Penghe Geng <pgeng@nvidia.com>
Date: Thu, 19 Feb 2026 15:29:54 -0500
Subject: mmc: core: Avoid bitfield RMW for claim/retune flags

Move claimed and retune control flags out of the bitfield word to
avoid unrelated RMW side effects in asynchronous contexts.

The host->claimed bit shared a word with retune flags. Writes to claimed
in __mmc_claim_host() or retune_now in mmc_mq_queue_rq() can overwrite
other bits when concurrent updates happen in other contexts, triggering
spurious WARN_ON(!host->claimed). Convert claimed, can_retune,
retune_now and retune_paused to bool to remove shared-word coupling.

Fixes: 6c0cedd1ef952 ("mmc: core: Introduce host claiming by context")
Fixes: 1e8e55b67030c ("mmc: block: Add CQE support")
Cc: stable@vger.kernel.org
Suggested-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Penghe Geng <pgeng@nvidia.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e0e2c265e5d1..ba84f02c2a10 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -486,14 +486,12 @@ struct mmc_host {
 
 	struct mmc_ios		ios;		/* current io bus settings */
 
+	bool			claimed;	/* host exclusively claimed */
+
 	/* group bitfields together to minimize padding */
 	unsigned int		use_spi_crc:1;
-	unsigned int		claimed:1;	/* host exclusively claimed */
 	unsigned int		doing_init_tune:1; /* initial tuning in progress */
-	unsigned int		can_retune:1;	/* re-tuning can be used */
 	unsigned int		doing_retune:1;	/* re-tuning in progress */
-	unsigned int		retune_now:1;	/* do re-tuning at next req */
-	unsigned int		retune_paused:1; /* re-tuning is temporarily disabled */
 	unsigned int		retune_crc_disable:1; /* don't trigger retune upon crc */
 	unsigned int		can_dma_map_merge:1; /* merging can be used */
 	unsigned int		vqmmc_enabled:1; /* vqmmc regulator is enabled */
@@ -508,6 +506,9 @@ struct mmc_host {
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
+	bool			can_retune;	/* re-tuning can be used */
+	bool			retune_now;	/* do re-tuning at next req */
+	bool			retune_paused;	/* re-tuning is temporarily disabled */
 	int			need_retune;	/* re-tuning is needed */
 	int			hold_retune;	/* hold off re-tuning */
 	unsigned int		retune_period;	/* re-tuning period in secs */
-- 
cgit v1.2.3


From 3afd8df024339c7da1a5a0302f3987866dd16e40 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 22 Dec 2025 21:36:25 +0100
Subject: PM: runtime: Change pm_runtime_put() return type to void

The primary role of pm_runtime_put() is to decrement the runtime PM
usage counter of the given device.  It always does that regardless of
the value returned by it later.

In addition, if the runtime PM usage counter after decrementation turns
out to be zero, a work item is queued up to check whether or not the
device can be suspended.  This is not guaranteed to succeed though and
even if it is successful, the device may still not be suspended going
forward.

There are multiple valid reasons why pm_runtime_put() may not decide to
queue up the work item mentioned above, including, but not limited to,
the case when user space has written "on" to the device's runtime PM
"control" file in sysfs.  In all of those cases, pm_runtime_put()
returns a negative error code (even though the device's runtime PM
usage counter has been successfully decremented by it) which is very
confusing.  In fact, its return value should only be used for debug
purposes and care should be taken when doing it even in that case.

Accordingly, to avoid the confusion mentioned above, change the return
type of pm_runtime_put() to void.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Link: https://patch.msgid.link/14387202.RDIVbhacDa@rafael.j.wysocki
---
 include/linux/pm_runtime.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 41037c513f06..64921b10ac74 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -545,22 +545,10 @@ static inline int pm_runtime_resume_and_get(struct device *dev)
  *
  * Decrement the runtime PM usage counter of @dev and if it turns out to be
  * equal to 0, queue up a work item for @dev like in pm_request_idle().
- *
- * Return:
- * * 1: Success. Usage counter dropped to zero, but device was already suspended.
- * * 0: Success.
- * * -EINVAL: Runtime PM error.
- * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
- *            change ongoing.
- * * -EBUSY: Runtime PM child_count non-zero.
- * * -EPERM: Device PM QoS resume latency 0.
- * * -EINPROGRESS: Suspend already in progress.
- * * -ENOSYS: CONFIG_PM not enabled.
  */
-static inline int pm_runtime_put(struct device *dev)
+static inline void pm_runtime_put(struct device *dev)
 {
-	return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
+	__pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
 /**
-- 
cgit v1.2.3


From 2a7b7652b1bb3fadc3bd47d622bfb127a93ab6b0 Mon Sep 17 00:00:00 2001
From: Leif Skunberg <diamondback@cohunt.app>
Date: Tue, 10 Feb 2026 14:21:29 +0100
Subject: platform/x86: int3472: Handle GPIO type 0x10 (DOVDD)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Lenovo ThinkPad X1 Fold 16 Gen 1 has an OV5675 sensor (ACPI HID
OVTI5675) behind an INT3472 discrete PMIC controller. The INT3472
_DSM returns GPIO type 0x10 for one of the pins, which controls the
DOVDD (digital I/O power) regulator enable.

Type 0x10 is not currently handled by the driver, causing the GPIO to
be ignored with a warning. Add INT3472_GPIO_TYPE_DOVDD (0x10) and
handle it as a regulator with con_id "dovdd" to match the supply name
used by sensor drivers (e.g. ov5675).

Also increase GPIO_SUPPLY_NAME_LENGTH from 5 to 6 to accommodate
the "dovdd" name (5 chars + null terminator).

Signed-off-by: Leif Skunberg <diamondback@cohunt.app>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260210132129.17943-1-diamondback@cohunt.app
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index b1b837583d54..dbe745dc88d5 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -26,6 +26,7 @@
 #define INT3472_GPIO_TYPE_POWER_ENABLE				0x0b
 #define INT3472_GPIO_TYPE_CLK_ENABLE				0x0c
 #define INT3472_GPIO_TYPE_PRIVACY_LED				0x0d
+#define INT3472_GPIO_TYPE_DOVDD					0x10
 #define INT3472_GPIO_TYPE_HANDSHAKE				0x12
 #define INT3472_GPIO_TYPE_HOTPLUG_DETECT			0x13
 
@@ -33,8 +34,8 @@
 #define INT3472_MAX_SENSOR_GPIOS				3
 #define INT3472_MAX_REGULATORS					3
 
-/* E.g. "avdd\0" */
-#define GPIO_SUPPLY_NAME_LENGTH				5
+/* E.g. "dovdd\0" */
+#define GPIO_SUPPLY_NAME_LENGTH				6
 /* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */
 #define GPIO_REGULATOR_NAME_LENGTH				(12 + GPIO_SUPPLY_NAME_LENGTH)
 /* lower- and upper-case mapping */
-- 
cgit v1.2.3


From 551d44200152cb26f75d2ef990aeb6185b7e37fd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 23 Feb 2026 09:33:08 -0800
Subject: default_gfp(): avoid using the "newfangled" __VA_OPT__ trick

The default_gfp() helper that I added is not wrong, but it turns out
that it causes unnecessary headaches for 'sparse' which doesn't support
the use of __VA_OPT__ (introduced in C++20 and C23, and supported by gcc
and clang for a long time).

We do already use __VA_OPT__ in some other cases in the kernel (drm/xe
and btrfs), but it has been fairly limited.  Now it triggers for pretty
much everything, and sparse ends up not working at all.

We can use the traditional gcc ',##__VA_ARGS__' syntax instead: it may
not be the "C standard" way and is slightly less natural in this
context, but it is the traditional model for this and avoids the sparse
problem.

Reported-and-tested-by: Ricardo Ribalda <ribalda@chromium.org>
Reported-and-tested-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reported-by: Ben Dooks <ben.dooks@codethink.co.uk>
Fixes: e19e1b480ac7 ("add default_gfp() helper macro and use it in the new *alloc_obj() helpers")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b30a0529d48..90536b2bc42e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,8 +14,8 @@ struct vm_area_struct;
 struct mempolicy;
 
 /* Helper macro to avoid gfp flags if they are the default one */
-#define __default_gfp(a,...) a
-#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL)
+#define __default_gfp(a,b,...) b
+#define default_gfp(...) __default_gfp(,##__VA_ARGS__,GFP_KERNEL)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
-- 
cgit v1.2.3


From 7accb1c4321acb617faf934af59d928b0b047e2b Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 3 Feb 2026 15:16:16 -0500
Subject: Bluetooth: L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ

This fixes responding with an invalid result caused by checking the
wrong size of CID which should have been (cmd_len - sizeof(*req)) and
on top of it the wrong result was use L2CAP_CR_LE_INVALID_PARAMS which
is invalid/reserved for reconf when running test like L2CAP/ECFC/BI-03-C:

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 64
        MPS: 64
        Source CID: 64
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reserved (0x000c)
         Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003)

Fiix L2CAP/ECFC/BI-04-C which expects L2CAP_RECONF_INVALID_MPS (0x0002)
when more than one channel gets its MPS reduced:

> ACL Data RX: Handle 64 flags 0x02 dlen 16
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 8
        MTU: 264
        MPS: 99
        Source CID: 64
!       Source CID: 65
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reconfiguration successful (0x0000)
         Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)

Fix L2CAP/ECFC/BI-05-C when SCID is invalid (85 unconnected):

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 65
        MPS: 64
!        Source CID: 85
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reconfiguration successful (0x0000)
         Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003)

Fix L2CAP/ECFC/BI-06-C when MPS < L2CAP_ECRED_MIN_MPS (64):

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 672
!       MPS: 63
        Source CID: 64
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!       Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)
        Result: Reconfiguration failed - other unacceptable parameters (0x0004)

Fix L2CAP/ECFC/BI-07-C when MPS reduced for more than one channel:

> ACL Data RX: Handle 64 flags 0x02 dlen 16
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 3 len 8
        MTU: 84
!       MPS: 71
        Source CID: 64
!        Source CID: 65
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!       Result: Reconfiguration successful (0x0000)
        Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)

Link: https://github.com/bluez/bluez/issues/1865
Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/l2cap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index ec3af01e4db9..6f9cf7a05986 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req {
 #define L2CAP_RECONF_SUCCESS		0x0000
 #define L2CAP_RECONF_INVALID_MTU	0x0001
 #define L2CAP_RECONF_INVALID_MPS	0x0002
+#define L2CAP_RECONF_INVALID_CID	0x0003
+#define L2CAP_RECONF_INVALID_PARAMS	0x0004
 
 struct l2cap_ecred_reconf_rsp {
 	__le16 result;
-- 
cgit v1.2.3


From c28d2bff70444a85b3b86aaf241ece9408c7858c Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Thu, 5 Feb 2026 15:11:34 -0500
Subject: Bluetooth: L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too
 short

Test L2CAP/ECFC/BV-26-C expect the response to L2CAP_ECRED_CONN_REQ with
and MTU value < L2CAP_ECRED_MIN_MTU (64) to be L2CAP_CR_LE_INVALID_PARAMS
rather than L2CAP_CR_LE_UNACCEPT_PARAMS.

Also fix not including the correct number of CIDs in the response since
the spec requires all CIDs being rejected to be included in the
response.

Link: https://github.com/bluez/bluez/issues/1868
Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/l2cap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 6f9cf7a05986..010f1a8fd15f 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -284,9 +284,9 @@ struct l2cap_conn_rsp {
 #define L2CAP_CR_LE_BAD_KEY_SIZE	0x0007
 #define L2CAP_CR_LE_ENCRYPTION		0x0008
 #define L2CAP_CR_LE_INVALID_SCID	0x0009
-#define L2CAP_CR_LE_SCID_IN_USE		0X000A
-#define L2CAP_CR_LE_UNACCEPT_PARAMS	0X000B
-#define L2CAP_CR_LE_INVALID_PARAMS	0X000C
+#define L2CAP_CR_LE_SCID_IN_USE		0x000A
+#define L2CAP_CR_LE_UNACCEPT_PARAMS	0x000B
+#define L2CAP_CR_LE_INVALID_PARAMS	0x000C
 
 /* connect/create channel status */
 #define L2CAP_CS_NO_INFO	0x0000
-- 
cgit v1.2.3


From 983512f3a87fd8dc4c94dfa6b596b6e57df5aad7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 20 Feb 2026 19:38:58 +0100
Subject: net: Drop the lock in skb_may_tx_timestamp()

skb_may_tx_timestamp() may acquire sock::sk_callback_lock. The lock must
not be taken in IRQ context, only softirq is okay. A few drivers receive
the timestamp via a dedicated interrupt and complete the TX timestamp
from that handler. This will lead to a deadlock if the lock is already
write-locked on the same CPU.

Taking the lock can be avoided. The socket (pointed by the skb) will
remain valid until the skb is released. The ->sk_socket and ->file
member will be set to NULL once the user closes the socket which may
happen before the timestamp arrives.
If we happen to observe the pointer while the socket is closing but
before the pointer is set to NULL then we may use it because both
pointer (and the file's cred member) are RCU freed.

Drop the lock. Use READ_ONCE() to obtain the individual pointer. Add a
matching WRITE_ONCE() where the pointer are cleared.

Link: https://lore.kernel.org/all/20260205145104.iWinkXHv@linutronix.de
Fixes: b245be1f4db1a ("net-timestamp: no-payload only sysctl")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260220183858.N4ERjFW6@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 66b56288c1d3..6c9a83016e95 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2098,7 +2098,7 @@ static inline int sk_rx_queue_get(const struct sock *sk)
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
-	sk->sk_socket = sock;
+	WRITE_ONCE(sk->sk_socket, sock);
 	if (sock) {
 		WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
 		WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
-- 
cgit v1.2.3


From eb9549346f7578eda3755683ac2cfb4d94c0675f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Feb 2026 13:17:44 +0100
Subject: mm: change vma_alloc_folio_noprof() macro to inline function

In a few rare configurations with extra warnings eanbled, the new
drm_pagemap_migrate_populate_ram_pfn() calls vma_alloc_folio_noprof() but
that does not use all the arguments, leading to a harmless warning:

drivers/gpu/drm/drm_pagemap.c: In function 'drm_pagemap_migrate_populate_ram_pfn':
drivers/gpu/drm/drm_pagemap.c:701:63: error: parameter 'addr' set but not used [-Werror=unused-but-set-parameter=]
  701 |                                                 unsigned long addr)
      |                                                 ~~~~~~~~~~~~~~^~~~

Replace the macro with an inline function so the compiler can see how the
argument would be used, but is still able to optimize out the assignments.

Link: https://lkml.kernel.org/r/20260216121751.2378374-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b30a0529d48..f82d74a77cad 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -339,8 +339,11 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde
 {
 	return folio_alloc_noprof(gfp, order);
 }
-#define vma_alloc_folio_noprof(gfp, order, vma, addr)		\
-	folio_alloc_noprof(gfp, order)
+static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order,
+		struct vm_area_struct *vma, unsigned long addr)
+{
+	return folio_alloc_noprof(gfp, order);
+}
 #endif
 
 #define alloc_pages(...)			alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
-- 
cgit v1.2.3


From f85b1c6af5bc3872f994df0a5688c1162de07a62 Mon Sep 17 00:00:00 2001
From: "Pratyush Yadav (Google)" <pratyush@kernel.org>
Date: Mon, 16 Feb 2026 14:22:19 +0100
Subject: liveupdate: luo_file: remember retrieve() status

LUO keeps track of successful retrieve attempts on a LUO file.  It does so
to avoid multiple retrievals of the same file.  Multiple retrievals cause
problems because once the file is retrieved, the serialized data
structures are likely freed and the file is likely in a very different
state from what the code expects.

The retrieve boolean in struct luo_file keeps track of this, and is passed
to the finish callback so it knows what work was already done and what it
has left to do.

All this works well when retrieve succeeds.  When it fails,
luo_retrieve_file() returns the error immediately, without ever storing
anywhere that a retrieve was attempted or what its error code was.  This
results in an errored LIVEUPDATE_SESSION_RETRIEVE_FD ioctl to userspace,
but nothing prevents it from trying this again.

The retry is problematic for much of the same reasons listed above.  The
file is likely in a very different state than what the retrieve logic
normally expects, and it might even have freed some serialization data
structures.  Attempting to access them or free them again is going to
break things.

For example, if memfd managed to restore 8 of its 10 folios, but fails on
the 9th, a subsequent retrieve attempt will try to call
kho_restore_folio() on the first folio again, and that will fail with a
warning since it is an invalid operation.

Apart from the retry, finish() also breaks.  Since on failure the
retrieved bool in luo_file is never touched, the finish() call on session
close will tell the file handler that retrieve was never attempted, and it
will try to access or free the data structures that might not exist, much
in the same way as the retry attempt.

There is no sane way of attempting the retrieve again.  Remember the error
retrieve returned and directly return it on a retry.  Also pass this
status code to finish() so it can make the right decision on the work it
needs to do.

This is done by changing the bool to an integer.  A value of 0 means
retrieve was never attempted, a positive value means it succeeded, and a
negative value means it failed and the error code is the value.

Link: https://lkml.kernel.org/r/20260216132221.987987-1-pratyush@kernel.org
Fixes: 7c722a7f44e0 ("liveupdate: luo_file: implement file systems callbacks")
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/liveupdate.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index fe82a6c3005f..dd11fdc76a5f 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -23,8 +23,11 @@ struct file;
 /**
  * struct liveupdate_file_op_args - Arguments for file operation callbacks.
  * @handler:          The file handler being called.
- * @retrieved:        The retrieve status for the 'can_finish / finish'
- *                    operation.
+ * @retrieve_status:  The retrieve status for the 'can_finish / finish'
+ *                    operation. A value of 0 means the retrieve has not been
+ *                    attempted, a positive value means the retrieve was
+ *                    successful, and a negative value means the retrieve failed,
+ *                    and the value is the error code of the call.
  * @file:             The file object. For retrieve: [OUT] The callback sets
  *                    this to the new file. For other ops: [IN] The caller sets
  *                    this to the file being operated on.
@@ -40,7 +43,7 @@ struct file;
  */
 struct liveupdate_file_op_args {
 	struct liveupdate_file_handler *handler;
-	bool retrieved;
+	int retrieve_status;
 	struct file *file;
 	u64 serialized_data;
 	void *private_data;
-- 
cgit v1.2.3


From 079c24d5690262e83ee476e2a548e416f3237511 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 19 Feb 2026 15:36:56 -0800
Subject: mm/tracing: rss_stat: ensure curr is false from kthread context

The rss_stat trace event allows userspace tools, like Perfetto [1], to
inspect per-process RSS metric changes over time.

The curr field was introduced to rss_stat in commit e4dcad204d3a
("rss_stat: add support to detect RSS updates of external mm").  Its
intent is to indicate whether the RSS update is for the mm_struct of the
current execution context; and is set to false when operating on a remote
mm_struct (e.g., via kswapd or a direct reclaimer).

However, an issue arises when a kernel thread temporarily adopts a user
process's mm_struct.  Kernel threads do not have their own mm_struct and
normally have current->mm set to NULL.  To operate on user memory, they
can "borrow" a memory context using kthread_use_mm(), which sets
current->mm to the user process's mm.

This can be observed, for example, in the USB Function Filesystem (FFS)
driver.  The ffs_user_copy_worker() handles AIO completions and uses
kthread_use_mm() to copy data to a user-space buffer.  If a page fault
occurs during this copy, the fault handler executes in the kthread's
context.

At this point, current is the kthread, but current->mm points to the user
process's mm.  Since the rss_stat event (from the page fault) is for that
same mm, the condition current->mm == mm becomes true, causing curr to be
incorrectly set to true when the trace event is emitted.

This is misleading because it suggests the mm belongs to the kthread,
confusing userspace tools that track per-process RSS changes and
corrupting their mm_id-to-process association.

Fix this by ensuring curr is always false when the trace event is emitted
from a kthread context by checking for the PF_KTHREAD flag.

Link: https://lkml.kernel.org/r/20260219233708.1971199-1-kaleshsingh@google.com
Link: https://perfetto.dev/ [1]
Fixes: e4dcad204d3a ("rss_stat: add support to detect RSS updates of external mm")
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: "David Hildenbrand (Arm)" <david@kernel.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: <stable@vger.kernel.org>	[5.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/kmem.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 7f93e754da5c..cd7920c81f85 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -440,7 +440,13 @@ TRACE_EVENT(rss_stat,
 
 	TP_fast_assign(
 		__entry->mm_id = mm_ptr_to_hash(mm);
-		__entry->curr = !!(current->mm == mm);
+		/*
+		 * curr is true if the mm matches the current task's mm_struct.
+		 * Since kthreads (PF_KTHREAD) have no mm_struct of their own
+		 * but can borrow one via kthread_use_mm(), we must filter them
+		 * out to avoid incorrectly attributing the RSS update to them.
+		 */
+		__entry->curr = current->mm == mm && !(current->flags & PF_KTHREAD);
 		__entry->member = member;
 		__entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member])
 							    << PAGE_SHIFT);
-- 
cgit v1.2.3


From 201ceb94aa1def0024a7c18ce643e5f65026be06 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Mon, 23 Feb 2026 19:37:51 -0800
Subject: kunit: irq: Ensure timer doesn't fire too frequently

Fix a bug where kunit_run_irq_test() could hang if the system is too
slow.  This was noticed with the crypto library tests in certain VMs.

Specifically, if kunit_irq_test_timer_func() and the associated hrtimer
code took over 5us to run, then the CPU would spend all its time
executing that code in hardirq context.  As a result, the task executing
kunit_run_irq_test() never had a chance to run, exit the loop, and
cancel the timer.

To fix it, make kunit_irq_test_timer_func() increase the timer interval
when the other contexts aren't having a chance to run.

Fixes: 950a81224e8b ("lib/crypto: tests: Add hash-test-template.h and gen-hash-testvecs.py")
Cc: stable@vger.kernel.org
Reviewed-by: David Gow <david@davidgow.net>
Link: https://lore.kernel.org/r/20260224033751.97615-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 include/kunit/run-in-irq-context.h | 44 ++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h
index c89b1b1b12dd..bfe60d6cf28d 100644
--- a/include/kunit/run-in-irq-context.h
+++ b/include/kunit/run-in-irq-context.h
@@ -12,16 +12,16 @@
 #include <linux/hrtimer.h>
 #include <linux/workqueue.h>
 
-#define KUNIT_IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5)
-
 struct kunit_irq_test_state {
 	bool (*func)(void *test_specific_state);
 	void *test_specific_state;
 	bool task_func_reported_failure;
 	bool hardirq_func_reported_failure;
 	bool softirq_func_reported_failure;
+	atomic_t task_func_calls;
 	atomic_t hardirq_func_calls;
 	atomic_t softirq_func_calls;
+	ktime_t interval;
 	struct hrtimer timer;
 	struct work_struct bh_work;
 };
@@ -30,14 +30,25 @@ static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer)
 {
 	struct kunit_irq_test_state *state =
 		container_of(timer, typeof(*state), timer);
+	int task_calls, hardirq_calls, softirq_calls;
 
 	WARN_ON_ONCE(!in_hardirq());
-	atomic_inc(&state->hardirq_func_calls);
+	task_calls = atomic_read(&state->task_func_calls);
+	hardirq_calls = atomic_inc_return(&state->hardirq_func_calls);
+	softirq_calls = atomic_read(&state->softirq_func_calls);
+
+	/*
+	 * If the timer is firing too often for the softirq or task to ever have
+	 * a chance to run, increase the timer interval.  This is needed on very
+	 * slow systems.
+	 */
+	if (hardirq_calls >= 20 && (softirq_calls == 0 || task_calls == 0))
+		state->interval = ktime_add_ns(state->interval, 250);
 
 	if (!state->func(state->test_specific_state))
 		state->hardirq_func_reported_failure = true;
 
-	hrtimer_forward_now(&state->timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL);
+	hrtimer_forward_now(&state->timer, state->interval);
 	queue_work(system_bh_wq, &state->bh_work);
 	return HRTIMER_RESTART;
 }
@@ -86,10 +97,14 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *),
 	struct kunit_irq_test_state state = {
 		.func = func,
 		.test_specific_state = test_specific_state,
+		/*
+		 * Start with a 5us timer interval.  If the system can't keep
+		 * up, kunit_irq_test_timer_func() will increase it.
+		 */
+		.interval = us_to_ktime(5),
 	};
 	unsigned long end_jiffies;
-	int hardirq_calls, softirq_calls;
-	bool allctx = false;
+	int task_calls, hardirq_calls, softirq_calls;
 
 	/*
 	 * Set up a hrtimer (the way we access hardirq context) and a work
@@ -104,21 +119,18 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *),
 	 * and hardirq), or 1 second, whichever comes first.
 	 */
 	end_jiffies = jiffies + HZ;
-	hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL,
-		      HRTIMER_MODE_REL_HARD);
-	for (int task_calls = 0, calls = 0;
-	     ((calls < max_iterations) || !allctx) &&
-	     !time_after(jiffies, end_jiffies);
-	     task_calls++) {
+	hrtimer_start(&state.timer, state.interval, HRTIMER_MODE_REL_HARD);
+	do {
 		if (!func(test_specific_state))
 			state.task_func_reported_failure = true;
 
+		task_calls = atomic_inc_return(&state.task_func_calls);
 		hardirq_calls = atomic_read(&state.hardirq_func_calls);
 		softirq_calls = atomic_read(&state.softirq_func_calls);
-		calls = task_calls + hardirq_calls + softirq_calls;
-		allctx = (task_calls > 0) && (hardirq_calls > 0) &&
-			 (softirq_calls > 0);
-	}
+	} while ((task_calls + hardirq_calls + softirq_calls < max_iterations ||
+		  (task_calls == 0 || hardirq_calls == 0 ||
+		   softirq_calls == 0)) &&
+		 !time_after(jiffies, end_jiffies));
 
 	/* Cancel the timer and work. */
 	hrtimer_cancel(&state.timer);
-- 
cgit v1.2.3


From 4b44cbb264d0ed3f2f2bc2659db6ce45882f4670 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 24 Feb 2026 15:24:52 -0800
Subject: overflow: Make sure size helpers are always inlined

With kmalloc_obj() performing implicit size calculations, the embedded
size_mul() calls, while marked inline, were not always being inlined.
I noticed a couple places where allocations were making a call out for
things that would otherwise be compile-time calculated. Force the
compilers to always inline these calculations.

Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20260224232451.work.614-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/overflow.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index eddd987a8513..a8cb6319b4fb 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -42,7 +42,7 @@
  * both the type-agnostic benefits of the macros while also being able to
  * enforce that the return value is, in fact, checked.
  */
-static inline bool __must_check __must_check_overflow(bool overflow)
+static __always_inline bool __must_check __must_check_overflow(bool overflow)
 {
 	return unlikely(overflow);
 }
@@ -327,7 +327,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
  * with any overflow causing the return value to be SIZE_MAX. The
  * lvalue must be size_t to avoid implicit type conversion.
  */
-static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
+static __always_inline size_t __must_check size_mul(size_t factor1, size_t factor2)
 {
 	size_t bytes;
 
@@ -346,7 +346,7 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
  * with any overflow causing the return value to be SIZE_MAX. The
  * lvalue must be size_t to avoid implicit type conversion.
  */
-static inline size_t __must_check size_add(size_t addend1, size_t addend2)
+static __always_inline size_t __must_check size_add(size_t addend1, size_t addend2)
 {
 	size_t bytes;
 
@@ -367,7 +367,7 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2)
  * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
  * The lvalue must be size_t to avoid implicit type conversion.
  */
-static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
+static __always_inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 {
 	size_t bytes;
 
-- 
cgit v1.2.3


From 93a4a9b732fbb479f95d327aa867d094aed3f712 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Tue, 24 Feb 2026 17:59:52 +0100
Subject: RDMA/core: Check id_priv->restricted_node_type in cma_listen_on_dev()

When listening on wildcard addresses we have a global list for the application
layer rdma_cm_id and for any existing device or any device added in future we
try to listen on any wildcard listener.

When the listener has a restricted_node_type we should prevent listening on
devices with a different node type.

While there fix the documentation comment of rdma_restrict_node_type()
to include rdma_resolve_addr() instead of having rdma_bind_addr() twice.

Fixes: a760e80e90f5 ("RDMA/core: introduce rdma_restrict_node_type()")
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Steve French <smfrench@gmail.com>
Cc: Namjae Jeon <linkinjeon@kernel.org>
Cc: Tom Talpey <tom@talpey.com>
Cc: Long Li <longli@microsoft.com>
Cc: linux-rdma@vger.kernel.org
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Link: https://patch.msgid.link/20260224165951.3582093-2-metze@samba.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/rdma_cm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 6de6fd8bd15e..d639ff889e64 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -181,7 +181,7 @@ void rdma_destroy_id(struct rdma_cm_id *id);
  *
  * It needs to be called before the RDMA identifier is bound
  * to an device, which mean it should be called before
- * rdma_bind_addr(), rdma_bind_addr() and rdma_listen().
+ * rdma_bind_addr(), rdma_resolve_addr() and rdma_listen().
  */
 int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type);
 
-- 
cgit v1.2.3


From 28aaa9c39945b7925a1cc1d513c8f21ed38f5e4f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 26 Feb 2026 10:43:55 +0100
Subject: kthread: consolidate kthread exit paths to prevent use-after-free

Guillaume reported crashes via corrupted RCU callback function pointers
during KUnit testing. The crash was traced back to the pidfs rhashtable
conversion which replaced the 24-byte rb_node with an 8-byte rhash_head
in struct pid, shrinking it from 160 to 144 bytes.

struct kthread (without CONFIG_BLK_CGROUP) is also 144 bytes. With
CONFIG_SLAB_MERGE_DEFAULT and SLAB_HWCACHE_ALIGN both round up to
192 bytes and share the same slab cache. struct pid.rcu.func and
struct kthread.affinity_node both sit at offset 0x78.

When a kthread exits via make_task_dead() it bypasses kthread_exit() and
misses the affinity_node cleanup. free_kthread_struct() frees the memory
while the node is still linked into the global kthread_affinity_list. A
subsequent list_del() by another kthread writes through dangling list
pointers into the freed and reused memory, corrupting the pid's
rcu.func pointer.

Instead of patching free_kthread_struct() to handle the missed cleanup,
consolidate all kthread exit paths. Turn kthread_exit() into a macro
that calls do_exit() and add kthread_do_exit() which is called from
do_exit() for any task with PF_KTHREAD set. This guarantees that
kthread-specific cleanup always happens regardless of the exit path -
make_task_dead(), direct do_exit(), or kthread_exit().

Replace __to_kthread() with a new tsk_is_kthread() accessor in the
public header. Export do_exit() since module code using the
kthread_exit() macro now needs it directly.

Reported-by: Guillaume Tucker <gtucker@gtucker.io>
Tested-by: Guillaume Tucker <gtucker@gtucker.io>
Tested-by: Mark Brown <broonie@kernel.org>
Tested-by: David Gow <davidgow@google.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/all/20260224-mittlerweile-besessen-2738831ae7f6@brauner
Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 4d13f4304fa4 ("kthread: Implement preferred affinity")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/kthread.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c92c1149ee6e..a01a474719a7 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -7,6 +7,24 @@
 
 struct mm_struct;
 
+/* opaque kthread data */
+struct kthread;
+
+/*
+ * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
+ * always remain a kthread.  For kthreads p->worker_private always
+ * points to a struct kthread.  For tasks that are not kthreads
+ * p->worker_private is used to point to other things.
+ *
+ * Return NULL for any task that is not a kthread.
+ */
+static inline struct kthread *tsk_is_kthread(struct task_struct *p)
+{
+	if (p->flags & PF_KTHREAD)
+		return p->worker_private;
+	return NULL;
+}
+
 __printf(4, 5)
 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 					   void *data,
@@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_exit(long result) __noreturn;
+#define kthread_exit(result) do_exit(result)
 void kthread_complete_and_exit(struct completion *, long) __noreturn;
 int kthreads_update_housekeeping(void);
+void kthread_do_exit(struct kthread *, long);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
-- 
cgit v1.2.3


From 102eab95f025b4d3f3a6c0a858400aca2af2fe52 Mon Sep 17 00:00:00 2001
From: Bobby Eshleman <bobbyeshleman@meta.com>
Date: Mon, 23 Feb 2026 14:38:33 -0800
Subject: vsock: lock down child_ns_mode as write-once

Two administrator processes may race when setting child_ns_mode as one
process sets child_ns_mode to "local" and then creates a namespace, but
another process changes child_ns_mode to "global" between the write and
the namespace creation. The first process ends up with a namespace in
"global" mode instead of "local". While this can be detected after the
fact by reading ns_mode and retrying, it is fragile and error-prone.

Make child_ns_mode write-once so that a namespace manager can set it
once and be sure it won't change. Writing a different value after the
first write returns -EBUSY. This applies to all namespaces, including
init_net, where an init process can write "local" to lock all future
namespaces into local mode.

Fixes: eafb64f40ca4 ("vsock: add netns to vsock core")
Suggested-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
Co-developed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-2-c0cde6959923@meta.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/af_vsock.h    | 13 +++++++++++--
 include/net/netns/vsock.h |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index d3ff48a2fbe0..533d8e75f7bb 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -276,10 +276,19 @@ static inline bool vsock_net_mode_global(struct vsock_sock *vsk)
 	return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL;
 }
 
-static inline void vsock_net_set_child_mode(struct net *net,
+static inline bool vsock_net_set_child_mode(struct net *net,
 					    enum vsock_net_mode mode)
 {
-	WRITE_ONCE(net->vsock.child_ns_mode, mode);
+	int new_locked = mode + 1;
+	int old_locked = 0; /* unlocked */
+
+	if (try_cmpxchg(&net->vsock.child_ns_mode_locked,
+			&old_locked, new_locked)) {
+		WRITE_ONCE(net->vsock.child_ns_mode, mode);
+		return true;
+	}
+
+	return old_locked == new_locked;
 }
 
 static inline enum vsock_net_mode vsock_net_child_mode(struct net *net)
diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h
index b34d69a22fa8..dc8cbe45f406 100644
--- a/include/net/netns/vsock.h
+++ b/include/net/netns/vsock.h
@@ -17,5 +17,8 @@ struct netns_vsock {
 
 	enum vsock_net_mode mode;
 	enum vsock_net_mode child_ns_mode;
+
+	/* 0 = unlocked, 1 = locked to global, 2 = locked to local */
+	int child_ns_mode_locked;
 };
 #endif /* __NET_NET_NAMESPACE_VSOCK_H */
-- 
cgit v1.2.3


From a0b4c7a49137ed21279f354eb59f49ddae8dffc2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Feb 2026 13:32:33 +0000
Subject: netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict
 sequence

Fix netfslib such that when it's making an unbuffered or DIO write, to make
sure that it sends each subrequest strictly sequentially, waiting till the
previous one is 'committed' before sending the next so that we don't have
pieces landing out of order and potentially leaving a hole if an error
occurs (ENOSPC for example).

This is done by copying in just those bits of issuing, collecting and
retrying subrequests that are necessary to do one subrequest at a time.
Retrying, in particular, is simpler because if the current subrequest needs
retrying, the source iterator can just be copied again and the subrequest
prepped and issued again without needing to be concerned about whether it
needs merging with the previous or next in the sequence.

Note that the issuing loop waits for a subrequest to complete right after
issuing it, but this wait could be moved elsewhere allowing preparatory
steps to be performed whilst the subrequest is in progress.  In particular,
once content encryption is available in netfslib, that could be done whilst
waiting, as could cleanup of buffers that have been completed.

Fixes: 153a9961b551 ("netfs: Implement unbuffered/DIO write support")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/58526.1772112753@warthog.procyon.org.uk
Tested-by: Steve French <sfrench@samba.org>
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/netfs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 64a382fbc31a..2d366be46a1c 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -57,6 +57,7 @@
 	EM(netfs_rreq_trace_done,		"DONE   ")	\
 	EM(netfs_rreq_trace_end_copy_to_cache,	"END-C2C")	\
 	EM(netfs_rreq_trace_free,		"FREE   ")	\
+	EM(netfs_rreq_trace_intr,		"INTR   ")	\
 	EM(netfs_rreq_trace_ki_complete,	"KI-CMPL")	\
 	EM(netfs_rreq_trace_recollect,		"RECLLCT")	\
 	EM(netfs_rreq_trace_redirty,		"REDIRTY")	\
@@ -169,7 +170,8 @@
 	EM(netfs_sreq_trace_put_oom,		"PUT OOM    ")	\
 	EM(netfs_sreq_trace_put_wip,		"PUT WIP    ")	\
 	EM(netfs_sreq_trace_put_work,		"PUT WORK   ")	\
-	E_(netfs_sreq_trace_put_terminated,	"PUT TERM   ")
+	EM(netfs_sreq_trace_put_terminated,	"PUT TERM   ")	\
+	E_(netfs_sreq_trace_see_failed,		"SEE FAILED ")
 
 #define netfs_folio_traces					\
 	EM(netfs_folio_is_uptodate,		"mod-uptodate")	\
-- 
cgit v1.2.3


From f3ec502b6755a3bfb12c1c47025ef989ff9efc72 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 25 Feb 2026 08:34:07 -0800
Subject: mm/slab: mark alloc tags empty for sheaves allocated with
 __GFP_NO_OBJ_EXT

alloc_empty_sheaf() allocates sheaves from SLAB_KMALLOC caches using
__GFP_NO_OBJ_EXT to avoid recursion, however it does not mark their
allocation tags empty before freeing, which results in a warning when
CONFIG_MEM_ALLOC_PROFILING_DEBUG is set. Fix this by marking allocation
tags for such sheaves as empty.

The problem was technically introduced in commit 4c0a17e28340 but only
becomes possible to hit with commit 913ffd3a1bf5.

Fixes: 4c0a17e28340 ("slab: prevent recursive kmalloc() in alloc_empty_sheaf()")
Fixes: 913ffd3a1bf5 ("slab: handle kmalloc sheaves bootstrap")
Reported-by: David Wang <00107082@163.com>
Closes: https://lore.kernel.org/all/20260223155128.3849-1-00107082@163.com/
Analyzed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Tested-by: Harry Yoo <harry.yoo@oracle.com>
Tested-by: David Wang <00107082@163.com>
Link: https://patch.msgid.link/20260225163407.2218712-1-surenb@google.com
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
---
 include/linux/gfp_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 814bb2892f99..6c75df30a281 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -139,6 +139,8 @@ enum {
  * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
  *
  * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
+ * mark_obj_codetag_empty() should be called upon freeing for objects allocated
+ * with this flag to indicate that their NULL tags are expected and normal.
  */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
-- 
cgit v1.2.3


From 2b351ea42820a7ecc2e8305724536512984f4419 Mon Sep 17 00:00:00 2001
From: Sanjay Chitroda <sanjayembeddedse@gmail.com>
Date: Thu, 26 Feb 2026 11:17:12 +0530
Subject: mm/slub: drop duplicate kernel-doc for ksize()

The implementation of ksize() was updated with kernel-doc by commit
fab0694646d7 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c")
However, the public header still contains a kernel-doc comment
attached to the ksize() prototype.

Having documentation both in the header and next to the implementation
causes Sphinx to treat the function as being documented twice,
resulting in the warning:

  WARNING: Duplicate C declaration, also defined at core-api/mm-api:521
  Declaration is '.. c:function:: size_t ksize(const void *objp)'

Kernel-doc guidelines recommend keeping the documentation with the
function implementation. Therefore remove the redundant kernel-doc
block from include/linux/slab.h so that the implementation in slub.c
remains the canonical source for documentation.

No functional change.

Fixes: fab0694646d7 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c")
Signed-off-by: Sanjay Chitroda <sanjayembeddedse@gmail.com>
Link: https://patch.msgid.link/20260226054712.3610744-1-sanjayembedded@gmail.com
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
---
 include/linux/slab.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index a5a5e4108ae5..15a60b501b95 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -517,18 +517,6 @@ void kfree_sensitive(const void *objp);
 DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
 DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T))
 
-/**
- * ksize - Report actual allocation size of associated object
- *
- * @objp: Pointer returned from a prior kmalloc()-family allocation.
- *
- * This should not be used for writing beyond the originally requested
- * allocation size. Either use krealloc() or round up the allocation size
- * with kmalloc_size_roundup() prior to allocation. If this is used to
- * access beyond the originally requested allocation size, UBSAN_BOUNDS
- * and/or FORTIFY_SOURCE may trip, since they only know about the
- * originally allocated size via the __alloc_size attribute.
- */
 size_t ksize(const void *objp);
 
 #ifdef CONFIG_PRINTK
-- 
cgit v1.2.3


From 8678591b47469fe16357234efef9b260317b8be4 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 25 Feb 2026 15:02:51 -0700
Subject: kbuild: Split .modinfo out from ELF_DETAILS

Commit 3e86e4d74c04 ("kbuild: keep .modinfo section in
vmlinux.unstripped") added .modinfo to ELF_DETAILS while removing it
from COMMON_DISCARDS, as it was needed in vmlinux.unstripped and
ELF_DETAILS was present in all architecture specific vmlinux linker
scripts. While this shuffle is fine for vmlinux, ELF_DETAILS and
COMMON_DISCARDS may be used by other linker scripts, such as the s390
and x86 compressed boot images, which may not expect to have a .modinfo
section. In certain circumstances, this could result in a bootloader
failing to load the compressed kernel [1].

Commit ddc6cbef3ef1 ("s390/boot/vmlinux.lds.S: Ensure bzImage ends with
SecureBoot trailer") recently addressed this for the s390 bzImage but
the same bug remains for arm, parisc, and x86. The presence of .modinfo
in the x86 bzImage was the root cause of the issue worked around with
commit d50f21091358 ("kbuild: align modinfo section for Secureboot
Authenticode EDK2 compat"). misc.c in arch/x86/boot/compressed includes
lib/decompress_unzstd.c, which in turn includes lib/xxhash.c and its
MODULE_LICENSE / MODULE_DESCRIPTION macros due to the STATIC definition.

Split .modinfo out from ELF_DETAILS into its own macro and handle it in
all vmlinux linker scripts. Discard .modinfo in the places where it was
previously being discarded from being in COMMON_DISCARDS, as it has
never been necessary in those uses.

Cc: stable@vger.kernel.org
Fixes: 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped")
Reported-by: Ed W <lists@wildgooses.com>
Closes: https://lore.kernel.org/587f25e0-a80e-46a5-9f01-87cb40cfa377@wildgooses.com/ [1]
Tested-by: Ed W <lists@wildgooses.com> # x86_64
Link: https://patch.msgid.link/20260225-separate-modinfo-from-elf-details-v1-1-387ced6baf4b@kernel.org
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index eeb070f330bd..1e1580febe4b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -848,12 +848,14 @@
 
 /* Required sections not related to debugging. */
 #define ELF_DETAILS							\
-		.modinfo : { *(.modinfo) . = ALIGN(8); }		\
 		.comment 0 : { *(.comment) }				\
 		.symtab 0 : { *(.symtab) }				\
 		.strtab 0 : { *(.strtab) }				\
 		.shstrtab 0 : { *(.shstrtab) }
 
+#define MODINFO								\
+		.modinfo : { *(.modinfo) . = ALIGN(8); }
+
 #ifdef CONFIG_GENERIC_BUG
 #define BUG_TABLE							\
 	. = ALIGN(8);							\
-- 
cgit v1.2.3


From 3350c2b3f2b8a3b985a020a4ef4f2f050a4b6a1d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 25 Feb 2026 21:12:29 -0800
Subject: platform_data/mlxreg: mlxreg.h: fix all kernel-doc warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the correct kernel-doc format & notation to eliminate
kernel-doc warnings:

Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE1' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE2' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE3' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:37 bad line:
 PHYs ready / unready state;
Warning: include/linux/platform_data/mlxreg.h:153 struct member 'np'
 not described in 'mlxreg_core_data'
Warning: include/linux/platform_data/mlxreg.h:153 struct member 'hpdev'
 not described in 'mlxreg_core_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260226051232.549537-1-rdunlap@infradead.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/mlxreg.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index f6cca7a035c7..50b6be57da66 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -13,10 +13,10 @@
 /**
  * enum mlxreg_wdt_type - type of HW watchdog
  *
- * TYPE1 HW watchdog implementation exist in old systems.
- * All new systems have TYPE2 HW watchdog.
- * TYPE3 HW watchdog can exist on all systems with new CPLD.
- * TYPE3 is selected by WD capability bit.
+ * @MLX_WDT_TYPE1: HW watchdog implementation in old systems.
+ * @MLX_WDT_TYPE2: All new systems have TYPE2 HW watchdog.
+ * @MLX_WDT_TYPE3: HW watchdog that can exist on all systems with new CPLD.
+ *   TYPE3 is selected by WD capability bit.
  */
 enum mlxreg_wdt_type {
 	MLX_WDT_TYPE1,
@@ -35,7 +35,7 @@ enum mlxreg_wdt_type {
  * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming
  *			      after hardware-firmware synchronization handshake;
  * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card
-			     PHYs ready / unready state;
+ *			     PHYs ready / unready state;
  * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware
  *			      availability / unavailability for the ports on line card;
  * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive
@@ -123,8 +123,8 @@ struct mlxreg_hotplug_device {
  * @reg_pwr: attribute power register;
  * @reg_ena: attribute enable register;
  * @mode: access mode;
- * @np - pointer to node platform associated with attribute;
- * @hpdev - hotplug device data;
+ * @np: pointer to node platform associated with attribute;
+ * @hpdev: hotplug device data;
  * @notifier: pointer to event notifier block;
  * @health_cntr: dynamic device health indication counter;
  * @attached: true if device has been attached after good health indication;
-- 
cgit v1.2.3


From 39195990e4c093c9eecf88f29811c6de29265214 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 27 Feb 2026 06:10:08 -0600
Subject: PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fb82437fdd8c ("PCI: Change capability register offsets to hex") incorrectly
converted the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value from decimal 52 to hex
0x32:

  -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52      /* v2 endpoints with link end here */
  +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32    /* end of v2 EPs w/ link */

This broke PCI capabilities in a VMM because subsequent ones weren't
DWORD-aligned.

Change PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to the correct value of 0x34.

fb82437fdd8c was from Baruch Siach <baruch@tkos.co.il>, but this was not
Baruch's fault; it's a mistake I made when applying the patch.

Fixes: fb82437fdd8c ("PCI: Change capability register offsets to hex")
Reported-by: David Woodhouse <dwmw2@infradead.org>
Closes: https://lore.kernel.org/all/3ae392a0158e9d9ab09a1d42150429dd8ca42791.camel@infradead.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
---
 include/uapi/linux/pci_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ec1c54b5a310..14f634ab9350 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -712,7 +712,7 @@
 #define  PCI_EXP_LNKCTL2_HASD		0x0020 /* HW Autonomous Speed Disable */
 #define PCI_EXP_LNKSTA2		0x32	/* Link Status 2 */
 #define  PCI_EXP_LNKSTA2_FLIT		0x0400 /* Flit Mode Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	0x32	/* end of v2 EPs w/ link */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	0x34	/* end of v2 EPs w/ link */
 #define PCI_EXP_SLTCAP2		0x34	/* Slot Capabilities 2 */
 #define  PCI_EXP_SLTCAP2_IBPD	0x00000001 /* In-band PD Disable Supported */
 #define PCI_EXP_SLTCTL2		0x38	/* Slot Control 2 */
-- 
cgit v1.2.3


From e6b899f08066e744f89df16ceb782e06868bd148 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 26 Feb 2026 14:50:09 +0100
Subject: nsfs: tighten permission checks for ns iteration ioctls

Even privileged services should not necessarily be able to see other
privileged service's namespaces so they can't leak information to each
other. Use may_see_all_namespaces() helper that centralizes this policy
until the nstree adapts.

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-1-d2c2853313bd@kernel.org
Fixes: a1d220d9dafa ("nsfs: iterate through mount namespaces")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@kernel.org # v6.12+
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 825f5865bfc5..c8e227a3f9e2 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns)
 
 #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
 
+bool may_see_all_namespaces(void);
+
 static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
 {
 	return atomic_read(&ns->__ns_ref_active);
-- 
cgit v1.2.3


From 1df97a7453eec80c1912c2d0360290a3970a7671 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 27 Feb 2026 14:48:01 -0800
Subject: bpf: Register dtor for freeing special fields

There is a race window where BPF hash map elements can leak special
fields if the program with access to the map value recreates these
special fields between the check_and_free_fields done on the map value
and its eventual return to the memory allocator.

Several ways were explored prior to this patch, most notably [0] tried
to use a poison value to reject attempts to recreate special fields for
map values that have been logically deleted but still accessible to BPF
programs (either while sitting in the free list or when reused). While
this approach works well for task work, timers, wq, etc., it is harder
to apply the idea to kptrs, which have a similar race and failure mode.

Instead, we change bpf_mem_alloc to allow registering destructor for
allocated elements, such that when they are returned to the allocator,
any special fields created while they were accessible to programs in the
mean time will be freed. If these values get reused, we do not free the
fields again before handing the element back. The special fields thus
may remain initialized while the map value sits in a free list.

When bpf_mem_alloc is retired in the future, a similar concept can be
introduced to kmalloc_nolock-backed kmem_cache, paired with the existing
idea of a constructor.

Note that the destructor registration happens in map_check_btf, after
the BTF record is populated and (at that point) avaiable for inspection
and duplication. Duplication is necessary since the freeing of embedded
bpf_mem_alloc can be decoupled from actual map lifetime due to logic
introduced to reduce the cost of rcu_barrier()s in mem alloc free path in
9f2c6e96c65e ("bpf: Optimize rcu_barrier usage between hash map and bpf_mem_alloc.").

As such, once all callbacks are done, we must also free the duplicated
record. To remove dependency on the bpf_map itself, also stash the key
size of the map to obtain value from htab_elem long after the map is
gone.

  [0]: https://lore.kernel.org/bpf/20260216131341.1285427-1-mykyta.yatsenko5@gmail.com

Fixes: 14a324f6a67e ("bpf: Wire up freeing of referenced kptr")
Fixes: 1bfbc267ec91 ("bpf: Enable bpf_timer and bpf_wq in any context")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_mem_alloc.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index e45162ef59bb..4ce0d27f8ea2 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,8 @@ struct bpf_mem_alloc {
 	struct obj_cgroup *objcg;
 	bool percpu;
 	struct work_struct work;
+	void (*dtor_ctx_free)(void *ctx);
+	void *dtor_ctx;
 };
 
 /* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
@@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
 /* The percpu allocation with a specific unit size. */
 int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
 void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma,
+			    void (*dtor)(void *obj, void *ctx),
+			    void (*dtor_ctx_free)(void *ctx),
+			    void *ctx);
 
 /* Check the allocation size for kmalloc equivalent allocator */
 int bpf_mem_alloc_check_size(bool percpu, size_t size);
-- 
cgit v1.2.3


From ae51772b1e94ba1d76db19085957dbccac189c1c Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 27 Feb 2026 14:48:02 -0800
Subject: bpf: Lose const-ness of map in map_check_btf()

BPF hash map may now use the map_check_btf() callback to decide whether
to set a dtor on its bpf_mem_alloc or not. Unlike C++ where members can
opt out of const-ness using mutable, we must lose the const qualifier on
the callback such that we can avoid the ugly cast. Make the change and
adjust all existing users, and lose the comment in hashtab.c.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h               | 4 ++--
 include/linux/bpf_local_storage.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b78b53198a2e..05b34a6355b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -124,7 +124,7 @@ struct bpf_map_ops {
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
-	int (*map_check_btf)(const struct bpf_map *map,
+	int (*map_check_btf)(struct bpf_map *map,
 			     const struct btf *btf,
 			     const struct btf_type *key_type,
 			     const struct btf_type *value_type);
@@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 		map->ops->map_seq_show_elem;
 }
 
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
 		     const struct btf *btf,
 		     const struct btf_type *key_type,
 		     const struct btf_type *value_type);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 85efa9772530..8157e8da61d4 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -176,7 +176,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
 void bpf_local_storage_map_free(struct bpf_map *map,
 				struct bpf_local_storage_cache *cache);
 
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+int bpf_local_storage_map_check_btf(struct bpf_map *map,
 				    const struct btf *btf,
 				    const struct btf_type *key_type,
 				    const struct btf_type *value_type);
-- 
cgit v1.2.3


From 62413a9c3cb183afb9bb6e94dd68caf4e4145f4c Mon Sep 17 00:00:00 2001
From: Paul Moses <p@1g4.org>
Date: Mon, 23 Feb 2026 15:05:44 +0000
Subject: net/sched: act_gate: snapshot parameters with RCU on replace

The gate action can be replaced while the hrtimer callback or dump path is
walking the schedule list.

Convert the parameters to an RCU-protected snapshot and swap updates under
tcf_lock, freeing the previous snapshot via call_rcu(). When REPLACE omits
the entry list, preserve the existing schedule so the effective state is
unchanged.

Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses <p@1g4.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260223150512.2251594-2-p@1g4.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tc_act/tc_gate.h | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index b147a3bb1a46..e0fded18e18c 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
 	s32			tcfg_clockid;
 	size_t			num_entries;
 	struct list_head	entries;
+	struct rcu_head		rcu;
 };
 
 #define GATE_ACT_GATE_OPEN	BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
 
 struct tcf_gate {
 	struct tc_action	common;
-	struct tcf_gate_params	param;
+	struct tcf_gate_params __rcu *param;
 	u8			current_gate_status;
 	ktime_t			current_close_time;
 	u32			current_entry_octets;
@@ -51,47 +52,65 @@ struct tcf_gate {
 
 #define to_gate(a) ((struct tcf_gate *)a)
 
+static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a)
+{
+	struct tcf_gate *gact = to_gate(a);
+
+	return rcu_dereference_protected(gact->param,
+					 lockdep_is_held(&gact->tcf_lock));
+}
+
 static inline s32 tcf_gate_prio(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	s32 tcfg_prio;
 
-	tcfg_prio = to_gate(a)->param.tcfg_priority;
+	p = tcf_gate_params_locked(a);
+	tcfg_prio = p->tcfg_priority;
 
 	return tcfg_prio;
 }
 
 static inline u64 tcf_gate_basetime(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u64 tcfg_basetime;
 
-	tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+	p = tcf_gate_params_locked(a);
+	tcfg_basetime = p->tcfg_basetime;
 
 	return tcfg_basetime;
 }
 
 static inline u64 tcf_gate_cycletime(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u64 tcfg_cycletime;
 
-	tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+	p = tcf_gate_params_locked(a);
+	tcfg_cycletime = p->tcfg_cycletime;
 
 	return tcfg_cycletime;
 }
 
 static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u64 tcfg_cycletimeext;
 
-	tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+	p = tcf_gate_params_locked(a);
+	tcfg_cycletimeext = p->tcfg_cycletime_ext;
 
 	return tcfg_cycletimeext;
 }
 
 static inline u32 tcf_gate_num_entries(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u32 num_entries;
 
-	num_entries = to_gate(a)->param.num_entries;
+	p = tcf_gate_params_locked(a);
+	num_entries = p->num_entries;
 
 	return num_entries;
 }
@@ -105,7 +124,7 @@ static inline struct action_gate_entry
 	u32 num_entries;
 	int i = 0;
 
-	p = &to_gate(a)->param;
+	p = tcf_gate_params_locked(a);
 	num_entries = p->num_entries;
 
 	list_for_each_entry(entry, &p->entries, list)
-- 
cgit v1.2.3


From 76e954155b45294c502e3d3a9e15757c858ca55e Mon Sep 17 00:00:00 2001
From: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Date: Fri, 27 Feb 2026 22:32:21 +0100
Subject: bpf: Introduce tnum_step to step through tnum's members

This commit introduces tnum_step(), a function that, when given t, and a
number z returns the smallest member of t larger than z. The number z
must be greater or equal to the smallest member of t and less than the
largest member of t.

The first step is to compute j, a number that keeps all of t's known
bits, and matches all unknown bits to z's bits. Since j is a member of
the t, it is already a candidate for result. However, we want our result
to be (minimally) greater than z.

There are only two possible cases:

(1) Case j <= z. In this case, we want to increase the value of j and
make it > z.
(2) Case j > z. In this case, we want to decrease the value of j while
keeping it > z.

(Case 1) j <= z

t = xx11x0x0
z = 10111101 (189)
j = 10111000 (184)
         ^
         k

(Case 1.1) Let's first consider the case where j < z. We will address j
== z later.

Since z > j, there had to be a bit position that was 1 in z and a 0 in
j, beyond which all positions of higher significance are equal in j and
z. Further, this position could not have been unknown in a, because the
unknown positions of a match z. This position had to be a 1 in z and
known 0 in t.

Let k be position of the most significant 1-to-0 flip. In our example, k
= 3 (starting the count at 1 at the least significant bit).  Setting (to
1) the unknown bits of t in positions of significance smaller than
k will not produce a result > z. Hence, we must set/unset the unknown
bits at positions of significance higher than k. Specifically, we look
for the next larger combination of 1s and 0s to place in those
positions, relative to the combination that exists in z. We can achieve
this by concatenating bits at unknown positions of t into an integer,
adding 1, and writing the bits of that result back into the
corresponding bit positions previously extracted from z.

>From our example, considering only positions of significance greater
than k:

t =  xx..x
z =  10..1
    +    1
     -----
     11..0

This is the exact combination 1s and 0s we need at the unknown bits of t
in positions of significance greater than k. Further, our result must
only increase the value minimally above z. Hence, unknown bits in
positions of significance smaller than k should remain 0. We finally
have,

result = 11110000 (240)

(Case 1.2) Now consider the case when j = z, for example

t = 1x1x0xxx
z = 10110100 (180)
j = 10110100 (180)

Matching the unknown bits of the t to the bits of z yielded exactly z.
To produce a number greater than z, we must set/unset the unknown bits
in t, and *all* the unknown bits of t candidates for being set/unset. We
can do this similar to Case 1.1, by adding 1 to the bits extracted from
the masked bit positions of z. Essentially, this case is equivalent to
Case 1.1, with k = 0.

t =  1x1x0xxx
z =  .0.1.100
    +       1
    ---------
     .0.1.101

This is the exact combination of bits needed in the unknown positions of
t. After recalling the known positions of t, we get

result = 10110101 (181)

(Case 2) j > z

t = x00010x1
z = 10000010 (130)
j = 10001011 (139)
	^
	k

Since j > z, there had to be a bit position which was 0 in z, and a 1 in
j, beyond which all positions of higher significance are equal in j and
z. This position had to be a 0 in z and known 1 in t. Let k be the
position of the most significant 0-to-1 flip. In our example, k = 4.

Because of the 0-to-1 flip at position k, a member of t can become
greater than z if the bits in positions greater than k are themselves >=
to z. To make that member *minimally* greater than z, the bits in
positions greater than k must be exactly = z. Hence, we simply match all
of t's unknown bits in positions more significant than k to z's bits. In
positions less significant than k, we set all t's unknown bits to 0
to retain minimality.

In our example, in positions of greater significance than k (=4),
t=x000. These positions are matched with z (1000) to produce 1000. In
positions of lower significance than k, t=10x1. All unknown bits are set
to 0 to produce 1001. The final result is:

result = 10001001 (137)

This concludes the computation for a result > z that is a member of t.

The procedure for tnum_step() in this commit implements the idea
described above. As a proof of correctness, we verified the algorithm
against a logical specification of tnum_step. The specification asserts
the following about the inputs t, z and output res that:

1. res is a member of t, and
2. res is strictly greater than z, and
3. there does not exist another value res2 such that
	3a. res2 is also a member of t, and
	3b. res2 is greater than z
	3c. res2 is smaller than res

We checked the implementation against this logical specification using
an SMT solver. The verification formula in SMTLIB format is available
at [1]. The verification returned an "unsat": indicating that no input
assignment exists for which the implementation and the specification
produce different outputs.

In addition, we also automatically generated the logical encoding of the
C implementation using Agni [2] and verified it against the same
specification. This verification also returned an "unsat", confirming
that the implementation is equivalent to the specification. The formula
for this check is also available at [3].

Link: https://pastebin.com/raw/2eRWbiit [1]
Link: https://github.com/bpfverif/agni [2]
Link: https://pastebin.com/raw/EztVbBJ2 [3]
Co-developed-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Signed-off-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Co-developed-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Link: https://lore.kernel.org/r/93fdf71910411c0f19e282ba6d03b4c65f9c5d73.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/tnum.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index fa4654ffb621..ca2cfec8de08 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a)
 	return !(tnum_subreg(a)).mask;
 }
 
+/* Returns the smallest member of t larger than z */
+u64 tnum_step(struct tnum t, u64 z);
+
 #endif /* _LINUX_TNUM_H */
-- 
cgit v1.2.3


From 29252397bcc1e0a1f85e5c3bee59c325f5c26341 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 25 Feb 2026 20:35:45 +0000
Subject: inet: annotate data-races around isk->inet_num

UDP/TCP lookups are using RCU, thus isk->inet_num accesses
should use READ_ONCE() and WRITE_ONCE() where needed.

Fixes: 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260225203545.1512417-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet6_hashtables.h | 2 +-
 include/net/inet_hashtables.h  | 2 +-
 include/net/ip.h               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 282e29237d93..c16de5b7963f 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk,
 {
 	if (!net_eq(sock_net(sk), net) ||
 	    sk->sk_family != AF_INET6 ||
-	    sk->sk_portpair != ports ||
+	    READ_ONCE(sk->sk_portpair) != ports ||
 	    !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
 	    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 		return false;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ac05a52d9e13..5a979dcab538 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -345,7 +345,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
 			      int dif, int sdif)
 {
 	if (!net_eq(sock_net(sk), net) ||
-	    sk->sk_portpair != ports ||
+	    READ_ONCE(sk->sk_portpair) != ports ||
 	    sk->sk_addrpair != cookie)
 	        return false;
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 69d5cef46004..7f9abd457e01 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
 
 	ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
 	ipcm->addr = inet->inet_saddr;
-	ipcm->protocol = inet->inet_num;
+	ipcm->protocol = READ_ONCE(inet->inet_num);
 }
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
-- 
cgit v1.2.3


From 11cb63b0d1a0685e0831ae3c77223e002ef18189 Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Wed, 25 Feb 2026 10:43:48 -0300
Subject: net/sched: Only allow act_ct to bind to clsact/ingress qdiscs and
 shared blocks

As Paolo said earlier [1]:

"Since the blamed commit below, classify can return TC_ACT_CONSUMED while
the current skb being held by the defragmentation engine. As reported by
GangMin Kim, if such packet is that may cause a UaF when the defrag engine
later on tries to tuch again such packet."

act_ct was never meant to be used in the egress path, however some users
are attaching it to egress today [2]. Attempting to reach a middle
ground, we noticed that, while most qdiscs are not handling
TC_ACT_CONSUMED, clsact/ingress qdiscs are. With that in mind, we
address the issue by only allowing act_ct to bind to clsact/ingress
qdiscs and shared blocks. That way it's still possible to attach act_ct to
egress (albeit only with clsact).

[1] https://lore.kernel.org/netdev/674b8cbfc385c6f37fb29a1de08d8fe5c2b0fbee.1771321118.git.pabeni@redhat.com/
[2] https://lore.kernel.org/netdev/cc6bfb4a-4a2b-42d8-b9ce-7ef6644fb22b@ovn.org/

Reported-by: GangMin Kim <km.kim1503@gmail.com>
Fixes: 3f14b377d01d ("net/sched: act_ct: fix skb leak and crash on ooo frags")
CC: stable@vger.kernel.org
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260225134349.1287037-1-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index e1e8f0f7dacb..d11b79107930 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -70,6 +70,7 @@ struct tc_action {
 #define TCA_ACT_FLAGS_REPLACE	(1U << (TCA_ACT_FLAGS_USER_BITS + 2))
 #define TCA_ACT_FLAGS_NO_RTNL	(1U << (TCA_ACT_FLAGS_USER_BITS + 3))
 #define TCA_ACT_FLAGS_AT_INGRESS	(1U << (TCA_ACT_FLAGS_USER_BITS + 4))
+#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT	(1U << (TCA_ACT_FLAGS_USER_BITS + 5))
 
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
-- 
cgit v1.2.3


From 15fba71533bcdfaa8eeba69a5a5a2927afdf664a Mon Sep 17 00:00:00 2001
From: Valentin Spreckels <valentin@spreckels.dev>
Date: Thu, 26 Feb 2026 20:54:09 +0100
Subject: net: usb: r8152: add TRENDnet TUC-ET2G

The TRENDnet TUC-ET2G is a RTL8156 based usb ethernet adapter. Add its
vendor and product IDs.

Signed-off-by: Valentin Spreckels <valentin@spreckels.dev>
Link: https://patch.msgid.link/20260226195409.7891-2-valentin@spreckels.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/usb/r8152.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 2ca60828f28b..1502b2a355f9 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -32,6 +32,7 @@
 #define VENDOR_ID_DLINK			0x2001
 #define VENDOR_ID_DELL			0x413c
 #define VENDOR_ID_ASUS			0x0b05
+#define VENDOR_ID_TRENDNET		0x20f4
 
 #if IS_REACHABLE(CONFIG_USB_RTL8152)
 extern u8 rtl8152_get_version(struct usb_interface *intf);
-- 
cgit v1.2.3


From 05ac3846ffe53fc63e454eb195ce8a6bab7a6a88 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Fri, 27 Feb 2026 22:46:40 +0800
Subject: ALSA: hda/tas2781: A workaround solution to lower-vol issue among
 lower calibrated-impedance micro-speaker on TAS2781

On TAS2781, if the Speaker calibrated impedance is lower than default
value hard-coded inside the TAS2781, it will cuase vol lower than
normal. In order to fix this issue, the parameter of SineGainI need
updating.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Tested-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Link: https://patch.msgid.link/20260227144641.1243-1-shenghao-ding@ti.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tas2781.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index 7c03bdc951bb..e847cf51878c 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -151,6 +151,7 @@ struct tasdevice {
 	struct bulk_reg_val *cali_data_backup;
 	struct bulk_reg_val alp_cali_bckp;
 	struct tasdevice_fw *cali_data_fmw;
+	void *cali_specific;
 	unsigned int dev_addr;
 	unsigned int err_code;
 	unsigned char cur_book;
-- 
cgit v1.2.3


From 0ed2e8bf61d6d5df1d78f4e24b682dff4c394e17 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 28 Feb 2026 04:56:20 -0700
Subject: io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG

Sync with a recent liburing fix, which corrects the comment explaining
when the IORING_SETUP_TASKRUN_FLAG setup flag is valid to use. May be
use with COOP_TASKRUN or DEFER_TASKRUN, not useful without either of
this task_work mechanisms being used.

Link: https://github.com/axboe/liburing/pull/1543
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6750c383a2ab..1ff16141c8a5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -188,7 +188,8 @@ enum io_uring_sqe_flags_bit {
 /*
  * If COOP_TASKRUN is set, get notified if task work is available for
  * running and a kernel transition would be needed to run it. This sets
- * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN
+ * or DEFER_TASKRUN.
  */
 #define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
 #define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
-- 
cgit v1.2.3


From 407fd8b8d8cce03856aa67329715de48b254b529 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 11 Feb 2026 19:03:03 +0100
Subject: KVM: remove CONFIG_KVM_GENERIC_MMU_NOTIFIER

All architectures now use MMU notifier for KVM page table management.
Remove the Kconfig symbol and the code that is used when it is
disabled.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dde605cb894e..34759a262b28 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -253,7 +253,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 union kvm_mmu_notifier_arg {
 	unsigned long attributes;
 };
@@ -275,7 +274,6 @@ struct kvm_gfn_range {
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-#endif
 
 enum {
 	OUTSIDE_GUEST_MODE,
@@ -849,13 +847,12 @@ struct kvm {
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 	struct mmu_notifier mmu_notifier;
 	unsigned long mmu_invalidate_seq;
 	long mmu_invalidate_in_progress;
 	gfn_t mmu_invalidate_range_start;
 	gfn_t mmu_invalidate_range_end;
-#endif
+
 	struct list_head devices;
 	u64 manual_dirty_log_protect;
 	struct dentry *debugfs_dentry;
@@ -2118,7 +2115,6 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
 extern const struct kvm_stats_header kvm_vcpu_stats_header;
 extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
 
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
 {
 	if (unlikely(kvm->mmu_invalidate_in_progress))
@@ -2196,7 +2192,6 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm,
 
 	return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq;
 }
-#endif
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 60abb0ac11dccd6b98fd9182bc5f85b621688861 Mon Sep 17 00:00:00 2001
From: "Nikhil P. Rao" <nikhil.rao@amd.com>
Date: Wed, 25 Feb 2026 00:00:26 +0000
Subject: xsk: Fix fragment node deletion to prevent buffer leak

After commit b692bf9a7543 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node"),
the list_node field is reused for both the xskb pool list and the buffer
free list, this causes a buffer leak as described below.

xp_free() checks if a buffer is already on the free list using
list_empty(&xskb->list_node). When list_del() is used to remove a node
from the xskb pool list, it doesn't reinitialize the node pointers.
This means list_empty() will return false even after the node has been
removed, causing xp_free() to incorrectly skip adding the buffer to the
free list.

Fix this by using list_del_init() instead of list_del() in all fragment
handling paths, this ensures the list node is reinitialized after removal,
allowing the list_empty() to work correctly.

Fixes: b692bf9a7543 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node")
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Nikhil P. Rao <nikhil.rao@amd.com>
Link: https://patch.msgid.link/20260225000456.107806-2-nikhil.rao@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xdp_sock_drv.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 242e34f771cc..aefc368449d5 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -122,7 +122,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
 		goto out;
 
 	list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
-		list_del(&pos->list_node);
+		list_del_init(&pos->list_node);
 		xp_free(pos);
 	}
 
@@ -157,7 +157,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
 	frag = list_first_entry_or_null(&xskb->pool->xskb_list,
 					struct xdp_buff_xsk, list_node);
 	if (frag) {
-		list_del(&frag->list_node);
+		list_del_init(&frag->list_node);
 		ret = &frag->xdp;
 	}
 
@@ -168,7 +168,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
 
-	list_del(&xskb->list_node);
+	list_del_init(&xskb->list_node);
 }
 
 static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
-- 
cgit v1.2.3


From 9351cf3fd92dc1349bb75f2f7f7324607dcf596f Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Thu, 26 Feb 2026 11:01:37 +0000
Subject: ASoC: cs35l56: Only patch ASP registers if the DAI is part of a
 DAIlink

Move the ASP register patches to a separate struct and apply this from the
ASP DAI probe() function so that the registers are only patched if the DAI
is part of a DAI link.

Some systems use the ASP as a special-purpose interconnect and on these
systems the ASP registers are configured by a third party (the firmware,
the BIOS, or another device using the amp's secondary host control
interface).

If the machine driver does not hook up the ASP DAI then the ASP registers
must be omitted from the patch to prevent overwriting the third party
configuration.

If the machine driver includes the ASP DAI in a DAI link, this implies that
the machine driver and higher components (such as alsa-ucm) are taking
ownership of the ASP. In this case the ASP registers are patched to known
defaults and the machine driver should configure the ASP.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20260226110137.1664562-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index ae1e1489b671..28f9f5940ab6 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -406,6 +406,7 @@ extern const char * const cs35l56_cal_set_status_text[3];
 extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC];
 extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC];
 
+int cs35l56_set_asp_patch(struct cs35l56_base *cs35l56_base);
 int cs35l56_set_patch(struct cs35l56_base *cs35l56_base);
 int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command);
 int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
-- 
cgit v1.2.3


From b570f37a2ce480be26c665345c5514686a8a0274 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 10 Feb 2026 12:56:53 +0100
Subject: mm: Fix a hmm_range_fault() livelock / starvation problem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If hmm_range_fault() fails a folio_trylock() in do_swap_page,
trying to acquire the lock of a device-private folio for migration,
to ram, the function will spin until it succeeds grabbing the lock.

However, if the process holding the lock is depending on a work
item to be completed, which is scheduled on the same CPU as the
spinning hmm_range_fault(), that work item might be starved and
we end up in a livelock / starvation situation which is never
resolved.

This can happen, for example if the process holding the
device-private folio lock is stuck in
   migrate_device_unmap()->lru_add_drain_all()
sinc lru_add_drain_all() requires a short work-item
to be run on all online cpus to complete.

A prerequisite for this to happen is:
a) Both zone device and system memory folios are considered in
   migrate_device_unmap(), so that there is a reason to call
   lru_add_drain_all() for a system memory folio while a
   folio lock is held on a zone device folio.
b) The zone device folio has an initial mapcount > 1 which causes
   at least one migration PTE entry insertion to be deferred to
   try_to_migrate(), which can happen after the call to
   lru_add_drain_all().
c) No or voluntary only preemption.

This all seems pretty unlikely to happen, but indeed is hit by
the "xe_exec_system_allocator" igt test.

Resolve this by waiting for the folio to be unlocked if the
folio_trylock() fails in do_swap_page().

Rename migration_entry_wait_on_locked() to
softleaf_entry_wait_unlock() and update its documentation to
indicate the new use-case.

Future code improvements might consider moving
the lru_add_drain_all() call in migrate_device_unmap() to be
called *after* all pages have migration entries inserted.
That would eliminate also b) above.

v2:
- Instead of a cond_resched() in hmm_range_fault(),
  eliminate the problem by waiting for the folio to be unlocked
  in do_swap_page() (Alistair Popple, Andrew Morton)
v3:
- Add a stub migration_entry_wait_on_locked() for the
  !CONFIG_MIGRATION case. (Kernel Test Robot)
v4:
- Rename migrate_entry_wait_on_locked() to
  softleaf_entry_wait_on_locked() and update docs (Alistair Popple)
v5:
- Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION
  version of softleaf_entry_wait_on_locked().
- Modify wording around function names in the commit message
  (Andrew Morton)

Suggested-by: Alistair Popple <apopple@nvidia.com>
Fixes: 1afaeb8293c9 ("mm/migrate: Trylock device page in do_swap_page")
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: linux-mm@kvack.org
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.15+
Reviewed-by: John Hubbard <jhubbard@nvidia.com> #v3
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit a69d1ab971a624c6f112cea61536569d579c3215)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/linux/migrate.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 26ca00c325d9..d5af2b7f577b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 		struct folio *dst, struct folio *src);
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 		__releases(ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
@@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag
 	return -ENOSYS;
 }
 
+static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+	__releases(ptl)
+{
+	WARN_ON_ONCE(1);
+
+	spin_unlock(ptl);
+}
+
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
cgit v1.2.3


From af4e9ef3d78420feb8fe58cd9a1ab80c501b3c08 Mon Sep 17 00:00:00 2001
From: David Laight <david.laight.linux@gmail.com>
Date: Mon, 2 Mar 2026 13:27:51 +0000
Subject: uaccess: Fix scoped_user_read_access() for 'pointer to const'

If a 'const struct foo __user *ptr' is used for the address passed to
scoped_user_read_access() then you get a warning/error

  uaccess.h:691:1: error: initialization discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

for the

  void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl)

assignment.

Fix by using 'auto' for both _tmpptr and the redeclaration of uptr.
Replace the CLASS() with explicit __cleanup() functions on uptr.

Fixes: e497310b4ffb ("uaccess: Provide scoped user access regions")
Signed-off-by: David Laight <david.laight.linux@gmail.com>
Reviewed-and-tested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uaccess.h | 54 ++++++++++++++++++-------------------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1f3804245c06..809e4f7dfdbd 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -647,36 +647,22 @@ static inline void user_access_restore(unsigned long flags) { }
 /* Define RW variant so the below _mode macro expansion works */
 #define masked_user_rw_access_begin(u)	masked_user_access_begin(u)
 #define user_rw_access_begin(u, s)	user_access_begin(u, s)
-#define user_rw_access_end()		user_access_end()
 
 /* Scoped user access */
-#define USER_ACCESS_GUARD(_mode)				\
-static __always_inline void __user *				\
-class_user_##_mode##_begin(void __user *ptr)			\
-{								\
-	return ptr;						\
-}								\
-								\
-static __always_inline void					\
-class_user_##_mode##_end(void __user *ptr)			\
-{								\
-	user_##_mode##_access_end();				\
-}								\
-								\
-DEFINE_CLASS(user_ ##_mode## _access, void __user *,		\
-	     class_user_##_mode##_end(_T),			\
-	     class_user_##_mode##_begin(ptr), void __user *ptr)	\
-								\
-static __always_inline class_user_##_mode##_access_t		\
-class_user_##_mode##_access_ptr(void __user *scope)		\
-{								\
-	return scope;						\
-}
 
-USER_ACCESS_GUARD(read)
-USER_ACCESS_GUARD(write)
-USER_ACCESS_GUARD(rw)
-#undef USER_ACCESS_GUARD
+/* Cleanup wrapper functions */
+static __always_inline void __scoped_user_read_access_end(const void *p)
+{
+	user_read_access_end();
+};
+static __always_inline void __scoped_user_write_access_end(const void *p)
+{
+	user_write_access_end();
+};
+static __always_inline void __scoped_user_rw_access_end(const void *p)
+{
+	user_access_end();
+};
 
 /**
  * __scoped_user_access_begin - Start a scoped user access
@@ -750,13 +736,13 @@ USER_ACCESS_GUARD(rw)
  *
  * Don't use directly. Use scoped_masked_user_$MODE_access() instead.
  */
-#define __scoped_user_access(mode, uptr, size, elbl)					\
-for (bool done = false; !done; done = true)						\
-	for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \
-	     !done; done = true)							\
-		for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true)	\
-			/* Force modified pointer usage within the scope */		\
-			for (const typeof(uptr) uptr = _tmpptr; !done; done = true)
+#define __scoped_user_access(mode, uptr, size, elbl)				\
+for (bool done = false; !done; done = true)					\
+	for (auto _tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl);	\
+	     !done; done = true)						\
+		/* Force modified pointer usage within the scope */		\
+		for (const auto uptr  __cleanup(__scoped_user_##mode##_access_end) = \
+		     _tmpptr; !done; done = true)
 
 /**
  * scoped_user_read_access_size - Start a scoped user read access with given size
-- 
cgit v1.2.3


From a116bac87118903925108e57781bbfc7a7eea27b Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
Date: Mon, 2 Mar 2026 16:23:09 -0800
Subject: dma-buf: Include ioctl.h in UAPI header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

include/uapi/linux/dma-buf.h uses several macros from ioctl.h to define
its ioctl commands. However, it does not include ioctl.h itself. So,
if userspace source code tries to include the dma-buf.h file without
including ioctl.h, it can result in build failures.

Therefore, include ioctl.h in the dma-buf UAPI header.

Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260303002309.1401849-1-isaacmanjarres@google.com
---
 include/uapi/linux/dma-buf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index 5a6fda66d9ad..e827c9d20c5d 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -20,6 +20,7 @@
 #ifndef _DMA_BUF_UAPI_H_
 #define _DMA_BUF_UAPI_H_
 
+#include <linux/ioctl.h>
 #include <linux/types.h>
 
 /**
-- 
cgit v1.2.3


From 479d589b40b836442bbdadc3fdb37f001bb67f26 Mon Sep 17 00:00:00 2001
From: Jiayuan Chen <jiayuan.chen@shopee.com>
Date: Thu, 26 Feb 2026 16:03:01 +0800
Subject: bpf/bonding: reject vlan+srcmac xmit_hash_policy change when XDP is
 loaded

bond_option_mode_set() already rejects mode changes that would make a
loaded XDP program incompatible via bond_xdp_check().  However,
bond_option_xmit_hash_policy_set() has no such guard.

For 802.3ad and balance-xor modes, bond_xdp_check() returns false when
xmit_hash_policy is vlan+srcmac, because the 802.1q payload is usually
absent due to hardware offload.  This means a user can:

1. Attach a native XDP program to a bond in 802.3ad/balance-xor mode
   with a compatible xmit_hash_policy (e.g. layer2+3).
2. Change xmit_hash_policy to vlan+srcmac while XDP remains loaded.

This leaves bond->xdp_prog set but bond_xdp_check() now returning false
for the same device.  When the bond is later destroyed, dev_xdp_uninstall()
calls bond_xdp_set(dev, NULL, NULL) to remove the program, which hits
the bond_xdp_check() guard and returns -EOPNOTSUPP, triggering:

WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL))

Fix this by rejecting xmit_hash_policy changes to vlan+srcmac when an
XDP program is loaded on a bond in 802.3ad or balance-xor mode.

commit 39a0876d595b ("net, bonding: Disallow vlan+srcmac with XDP")
introduced bond_xdp_check() which returns false for 802.3ad/balance-xor
modes when xmit_hash_policy is vlan+srcmac.  The check was wired into
bond_xdp_set() to reject XDP attachment with an incompatible policy, but
the symmetric path -- preventing xmit_hash_policy from being changed to an
incompatible value after XDP is already loaded -- was left unguarded in
bond_option_xmit_hash_policy_set().

Note:
commit 094ee6017ea0 ("bonding: check xdp prog when set bond mode")
later added a similar guard to bond_option_mode_set(), but
bond_option_xmit_hash_policy_set() remained unprotected.

Reported-by: syzbot+5a287bcdc08104bc3132@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6995aff6.050a0220.2eeac1.014e.GAE@google.com/T/
Fixes: 39a0876d595b ("net, bonding: Disallow vlan+srcmac with XDP")
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260226080306.98766-2-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/bonding.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bonding.h b/include/net/bonding.h
index 4ad5521e7731..395c6e281c5f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -699,6 +699,7 @@ void bond_debug_register(struct bonding *bond);
 void bond_debug_unregister(struct bonding *bond);
 void bond_debug_reregister(struct bonding *bond);
 const char *bond_mode_name(int mode);
+bool __bond_xdp_check(int mode, int xmit_policy);
 bool bond_xdp_check(struct bonding *bond, int mode);
 void bond_setup(struct net_device *bond_dev);
 unsigned int bond_get_num_tx_queues(void);
-- 
cgit v1.2.3


From 710f5c76580306cdb9ec51fac8fcf6a8faff7821 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Feb 2026 17:26:03 +0000
Subject: indirect_call_wrapper: do not reevaluate function pointer

We have an increasing number of READ_ONCE(xxx->function)
combined with INDIRECT_CALL_[1234]() helpers.

Unfortunately this forces INDIRECT_CALL_[1234]() to read
xxx->function many times, which is not what we wanted.

Fix these macros so that xxx->function value is not reloaded.

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/65 up/down: 122/-1084 (-962)
Function                                     old     new   delta
ip_push_pending_frames                        59     181    +122
ip6_finish_output                            687     681      -6
__udp_enqueue_schedule_skb                  1078    1072      -6
ioam6_output                                2319    2312      -7
xfrm4_rcv_encap_finish2                       64      56      -8
xfrm4_output                                 297     289      -8
vrf_ip_local_out                             278     270      -8
vrf_ip6_local_out                            278     270      -8
seg6_input_finish                             64      56      -8
rpl_output                                   700     692      -8
ipmr_forward_finish                          124     116      -8
ip_forward_finish                            143     135      -8
ip6mr_forward2_finish                        100      92      -8
ip6_forward_finish                            73      65      -8
input_action_end_bpf                        1091    1083      -8
dst_input                                     52      44      -8
__xfrm6_output                               801     793      -8
__xfrm4_output                                83      75      -8
bpf_input                                    500     491      -9
__tcp_check_space                            530     521      -9
input_action_end_dt6                         291     280     -11
vti6_tnl_xmit                               1634    1622     -12
bpf_xmit                                    1203    1191     -12
rpl_input                                    497     483     -14
rawv6_send_hdrinc                           1355    1341     -14
ndisc_send_skb                              1030    1016     -14
ipv6_srh_rcv                                1377    1363     -14
ip_send_unicast_reply                       1253    1239     -14
ip_rcv_finish                                226     212     -14
ip6_rcv_finish                               300     286     -14
input_action_end_x_core                      205     191     -14
input_action_end_x                           355     341     -14
input_action_end_t                           205     191     -14
input_action_end_dx6_finish                  127     113     -14
input_action_end_dx4_finish                  373     359     -14
input_action_end_dt4                         426     412     -14
input_action_end_core                        186     172     -14
input_action_end_b6_encap                    292     278     -14
input_action_end_b6                          198     184     -14
igmp6_send                                  1332    1318     -14
ip_sublist_rcv                               864     848     -16
ip6_sublist_rcv                             1091    1075     -16
ipv6_rpl_srh_rcv                            1937    1920     -17
xfrm_policy_queue_process                   1246    1228     -18
seg6_output_core                             903     885     -18
mld_sendpack                                 856     836     -20
NF_HOOK                                      756     736     -20
vti_tunnel_xmit                             1447    1426     -21
input_action_end_dx6                         664     642     -22
input_action_end                            1502    1480     -22
sock_sendmsg_nosec                           134     111     -23
ip6mr_forward2                               388     364     -24
sock_recvmsg_nosec                           134     109     -25
seg6_input_core                              836     810     -26
ip_send_skb                                  172     146     -26
ip_local_out                                 140     114     -26
ip6_local_out                                140     114     -26
__sock_sendmsg                               162     136     -26
__ip_queue_xmit                             1196    1170     -26
__ip_finish_output                           405     379     -26
ipmr_queue_fwd_xmit                          373     346     -27
sock_recvmsg                                 173     145     -28
ip6_xmit                                    1635    1607     -28
xfrm_output_resume                          1418    1389     -29
ip_build_and_send_pkt                        625     591     -34
dst_output                                   504     432     -72
Total: Before=25217686, After=25216724, chg -0.00%

Fixes: 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up indirect calls of builtin")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260227172603.1700433-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/indirect_call_wrapper.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index 35227d47cfc9..dc272b514a01 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -16,22 +16,26 @@
  */
 #define INDIRECT_CALL_1(f, f1, ...)					\
 	({								\
-		likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__);	\
+		typeof(f) __f1 = (f);					\
+		likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__);	\
 	})
 #define INDIRECT_CALL_2(f, f2, f1, ...)					\
 	({								\
-		likely(f == f2) ? f2(__VA_ARGS__) :			\
-				  INDIRECT_CALL_1(f, f1, __VA_ARGS__);	\
+		typeof(f) __f2 = (f);					\
+		likely(__f2 == f2) ? f2(__VA_ARGS__) :			\
+				  INDIRECT_CALL_1(__f2, f1, __VA_ARGS__);	\
 	})
 #define INDIRECT_CALL_3(f, f3, f2, f1, ...)					\
 	({									\
-		likely(f == f3) ? f3(__VA_ARGS__) :				\
-				  INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__);	\
+		typeof(f) __f3 = (f);						\
+		likely(__f3 == f3) ? f3(__VA_ARGS__) :				\
+				  INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__);	\
 	})
 #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...)					\
 	({									\
-		likely(f == f4) ? f4(__VA_ARGS__) :				\
-				  INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__);	\
+		typeof(f) __f4 = (f);						\
+		likely(__f4 == f4) ? f4(__VA_ARGS__) :				\
+				  INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__);	\
 	})
 
 #define INDIRECT_CALLABLE_DECLARE(f)	f
-- 
cgit v1.2.3


From 9de68394a61528d40f575c3e6719cc75c56f62c3 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Mon, 2 Mar 2026 01:25:44 +0100
Subject: Revert "driver core: enforce device_lock for driver_match_device()"

This reverts commit dc23806a7c47 ("driver core: enforce device_lock for
driver_match_device()") and commit 289b14592cef ("driver core: fix
inverted "locked" suffix of driver_match_device()").

While technically correct, there is a major downside to this approach:

When a device is already present in the system and a driver is
registered on the same bus, we iterate over all devices registered on
this bus to see if one of them matches. If we come across an already
bound one where the corresponding driver crashed while holding the
device lock (e.g. in probe()) we can't make any progress anymore.

However, drivers are typically the least tested code in the kernel and
hence it is a case that is likely to happen regularly. Besides hurting
developer ergonomics, it potentially decreases chances of shutting
things down cleanly and obtaining logs in production environments as
well [1].

This came up in the context of a firewire bug, which only in combination
with the reverted commit, caused the machine to hang [2]. Additionally,
it was observed in [3].

Thus, revert commit dc23806a7c47 ("driver core: enforce device_lock for
driver_match_device()") and add a brief note clarifying that an
implementer of struct bus_type must not expect match() to be called with
the device lock held.

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Link: https://lore.kernel.org/all/67f655bb-4d81-4609-b008-68d200255dd2@davidgow.net/ [2]
Link: https://lore.kernel.org/lkml/CALbr=LZ4v7N=tO1vgOsyj9AS+XuNbn6kG-QcF+PacdMjSo0iyw@mail.gmail.com/ [3]
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Closes: https://lore.kernel.org/driver-core/CAHk-=wgJ_L1C=HjcYJotg_zrZEmiLFJaoic+PWthjuQrutrfJw@mail.gmail.com/
Reviewed-by: Gui-Dong Han <hanguidong02@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260302002545.19389-1-dakr@kernel.org
[ Add additional Link: reference. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/device/bus.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 99c3c83ea520..63de5f053c33 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -35,6 +35,8 @@ struct fwnode_handle;
  *		otherwise. It may also return error code if determining that
  *		the driver supports the device is not possible. In case of
  *		-EPROBE_DEFER it will queue the device for deferred probing.
+ *		Note: This callback may be invoked with or without the device
+ *		lock held.
  * @uevent:	Called when a device is added, removed, or a few other things
  *		that generate uevents to add the environment variables.
  * @probe:	Called when a new device or driver add to this bus, and callback
-- 
cgit v1.2.3


From 4ee7fa6cf78ff26d783d39e2949d14c4c1cd5e7f Mon Sep 17 00:00:00 2001
From: Yung Chih Su <yuuchihsu@gmail.com>
Date: Mon, 2 Mar 2026 14:02:47 +0800
Subject: net: ipv4: fix ARM64 alignment fault in multipath hash seed

`struct sysctl_fib_multipath_hash_seed` contains two u32 fields
(user_seed and mp_seed), making it an 8-byte structure with a 4-byte
alignment requirement.

In `fib_multipath_hash_from_keys()`, the code evaluates the entire
struct atomically via `READ_ONCE()`:

    mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;

While this silently works on GCC by falling back to unaligned regular
loads which the ARM64 kernel tolerates, it causes a fatal kernel panic
when compiled with Clang and LTO enabled.

Commit e35123d83ee3 ("arm64: lto: Strengthen READ_ONCE() to acquire
when CONFIG_LTO=y") strengthens `READ_ONCE()` to use Load-Acquire
instructions (`ldar` / `ldapr`) to prevent compiler reordering bugs
under Clang LTO. Since the macro evaluates the full 8-byte struct,
Clang emits a 64-bit `ldar` instruction. ARM64 architecture strictly
requires `ldar` to be naturally aligned, thus executing it on a 4-byte
aligned address triggers a strict Alignment Fault (FSC = 0x21).

Fix the read side by moving the `READ_ONCE()` directly to the `u32`
member, which emits a safe 32-bit `ldar Wn`.

Furthermore, Eric Dumazet pointed out that `WRITE_ONCE()` on the entire
struct in `proc_fib_multipath_hash_set_seed()` is also flawed. Analysis
shows that Clang splits this 8-byte write into two separate 32-bit
`str` instructions. While this avoids an alignment fault, it destroys
atomicity and exposes a tear-write vulnerability. Fix this by
explicitly splitting the write into two 32-bit `WRITE_ONCE()`
operations.

Finally, add the missing `READ_ONCE()` when reading `user_seed` in
`proc_fib_multipath_hash_seed()` to ensure proper pairing and
concurrency safety.

Fixes: 4ee2a8cace3f ("net: ipv4: Add a sysctl to set multipath hash seed")
Signed-off-by: Yung Chih Su <yuuchihsu@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260302060247.7066-1-yuuchihsu@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b4495c38e0a0..318593743b6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,
 	siphash_aligned_key_t hash_key;
 	u32 mp_seed;
 
-	mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+	mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed);
 	fib_multipath_hash_construct_key(&hash_key, mp_seed);
 
 	return flow_hash_from_keys_seed(keys, &hash_key);
-- 
cgit v1.2.3


From e39bb9e02b68942f8e9359d2a3efe7d37ae6be0e Mon Sep 17 00:00:00 2001
From: Qing Wang <wangqing7171@gmail.com>
Date: Fri, 27 Feb 2026 10:58:42 +0800
Subject: tracing: Fix WARN_ON in tracing_buffers_mmap_close

When a process forks, the child process copies the parent's VMAs but the
user_mapped reference count is not incremented. As a result, when both the
parent and child processes exit, tracing_buffers_mmap_close() is called
twice. On the second call, user_mapped is already 0, causing the function to
return -ENODEV and triggering a WARN_ON.

Normally, this isn't an issue as the memory is mapped with VM_DONTCOPY set.
But this is only a hint, and the application can call
madvise(MADVISE_DOFORK) which resets the VM_DONTCOPY flag. When the
application does that, it can trigger this issue on fork.

Fix it by incrementing the user_mapped reference count without re-mapping
the pages in the VMA's open callback.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://patch.msgid.link/20260227025842.1085206-1-wangqing7171@gmail.com
Fixes: cf9f0f7c4c5bb ("tracing: Allow user-space mapping of the ring-buffer")
Reported-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=3b5dd2030fe08afdf65d
Tested-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com
Signed-off-by: Qing Wang <wangqing7171@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 876358cfe1b1..d862fa610270 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -248,6 +248,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);
 
 int ring_buffer_map(struct trace_buffer *buffer, int cpu,
 		    struct vm_area_struct *vma);
+void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
 int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
 int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
 #endif /* _LINUX_RING_BUFFER_H */
-- 
cgit v1.2.3


From ace7dcc8181373a0338efa1686c5e36eb121dff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 25 Feb 2026 09:42:20 +0200
Subject: drm/dp: Add definition for Panel Replay full-line granularity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE
X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line
granularity. Add definition for this.

Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260225074221.1744330-1-jouni.hogander@intel.com
(cherry picked from commit b93311673263bb98a200ab1cb6304f969bdada5c)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 include/drm/display/drm_dp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index e4eebabab975..8b15d3eeb716 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -571,6 +571,8 @@
 # define DP_PANEL_REPLAY_LINK_OFF_SUPPORTED_IN_PR_AFTER_ADAPTIVE_SYNC_SDP	(1 << 7)
 
 #define DP_PANEL_REPLAY_CAP_X_GRANULARITY		0xb2
+# define DP_PANEL_REPLAY_FULL_LINE_GRANULARITY		0xffff
+
 #define DP_PANEL_REPLAY_CAP_Y_GRANULARITY		0xb4
 
 /* Link Configuration */
-- 
cgit v1.2.3


From 82169dace41cbaa951341b0f80f4570be3b2dec0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 18 Feb 2026 10:52:04 +0100
Subject: xenbus: add xenbus_device parameter to xenbus_read_driver_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to prepare checking the xenbus device status in
xenbus_read_driver_state(), add the pointer to struct xenbus_device
as a parameter.

Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: "Martin K. Petersen" <martin.petersen@oracle.com> # SCSI
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# drivers/pci/xen-pcifront.c
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260218095205.453657-2-jgross@suse.com>
---
 include/xen/xenbus.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index c94caf852aea..15319da65b7f 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -228,7 +228,8 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port);
 int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port);
 
-enum xenbus_state xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev,
+					   const char *path);
 
 __printf(3, 4)
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
-- 
cgit v1.2.3


From e2dcf9065536ab4a1b00828ff0d19f7d282dfecc Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 18 Feb 2026 10:52:05 +0100
Subject: xen/xenbus: better handle backend crash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the backend domain crashes, coordinated device cleanup is not
possible (as it involves waiting for the backend state change). In that
case, toolstack forcefully removes frontend xenstore entries.
xenbus_dev_changed() handles this case, and triggers device cleanup.
It's possible that toolstack manages to connect new device in that
place, before xenbus_dev_changed() notices the old one is missing. If
that happens, new one won't be probed and will forever remain in
XenbusStateInitialising.

Fix this by checking the frontend's state in Xenstore. In case it has
been reset to XenbusStateInitialising by Xen tools, consider this
being the result of an unplug+plug operation.

It's important that cleanup on such unplug doesn't modify Xenstore
entries (especially the "state" key) as it belong to the new device
to be probed - changing it would derail establishing connection to the
new backend (most likely, closing the device before it was even
connected). Handle this case by setting new xenbus_device->vanished
flag to true, and check it before changing state entry.

And even if xenbus_dev_changed() correctly detects the device was
forcefully removed, the cleanup handling is still racy. Since this whole
handling doesn't happened in a single Xenstore transaction, it's possible
that toolstack might put a new device there already. Avoid re-creating
the state key (which in the case of loosing the race would actually
close newly attached device).

The problem does not apply to frontend domain crash, as this case
involves coordinated cleanup.

Problem originally reported at
https://lore.kernel.org/xen-devel/aOZvivyZ9YhVWDLN@mail-itl/T/#t,
including reproduction steps.

Based-on-patch-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260218095205.453657-3-jgross@suse.com>
---
 include/xen/xenbus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 15319da65b7f..8ca15743af7f 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -80,6 +80,7 @@ struct xenbus_device {
 	const char *devicetype;
 	const char *nodename;
 	const char *otherend;
+	bool vanished;
 	int otherend_id;
 	struct xenbus_watch otherend_watch;
 	struct device dev;
-- 
cgit v1.2.3


From 2d28ed588f8d7d0d41b0a4fad7f0d05e4bbf1797 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Tue, 24 Feb 2026 16:24:34 -0800
Subject: Revert "ptdesc: remove references to folios from __pagetable_ctor()
 and pagetable_dtor()"

This change swapped out mod_node_page_state for lruvec_stat_add_folio.
But, these two APIs are not interchangeable: the lruvec version also
increments memcg stats, in addition to "global" pgdat stats.

So after this change, the "pagetables" memcg stat in memory.stat always
yields "0", which is a userspace visible regression.

I tried to look for a refactor where we add a variant of
lruvec_stat_mod_folio which takes a pgdat and a memcg instead of a folio,
to try to adhere to the spirit of the original patch.  But at the end of
the day this just means we have to call folio_memcg(ptdesc_folio(ptdesc))
anyway, which doesn't really accomplish much.

This regression is visible in master as well as 6.18 stable, so CC stable
too.

Link: https://lkml.kernel.org/r/20260225002434.2953895-1-axelrasmussen@google.com
Fixes: f0c92726e89f ("ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor()")
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5be3d8a8f806..abb4963c1f06 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3514,26 +3514,21 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
 static inline void ptlock_free(struct ptdesc *ptdesc) {}
 #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
 
-static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
-{
-	return compound_nr(ptdesc_page(ptdesc));
-}
-
 static inline void __pagetable_ctor(struct ptdesc *ptdesc)
 {
-	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+	struct folio *folio = ptdesc_folio(ptdesc);
 
-	__SetPageTable(ptdesc_page(ptdesc));
-	mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
+	__folio_set_pgtable(folio);
+	lruvec_stat_add_folio(folio, NR_PAGETABLE);
 }
 
 static inline void pagetable_dtor(struct ptdesc *ptdesc)
 {
-	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+	struct folio *folio = ptdesc_folio(ptdesc);
 
 	ptlock_free(ptdesc);
-	__ClearPageTable(ptdesc_page(ptdesc));
-	mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
+	__folio_clear_pgtable(folio);
+	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
 }
 
 static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
-- 
cgit v1.2.3


From 7392f8e4ea632622b2cd2086675ba022db238b3a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:52:29 -0800
Subject: uaccess: correct kernel-doc parameter format

Use the correct kernel-doc function parameter format to avoid kernel-doc
warnings:

Warning: include/linux/uaccess.h:814 function parameter 'uptr' not
 described in 'scoped_user_rw_access_size'
Warning: include/linux/uaccess.h:826 function parameter 'uptr' not
 described in 'scoped_user_rw_access'

Link: https://lkml.kernel.org/r/20260302005229.3471955-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1f3804245c06..001cfef21b61 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -806,7 +806,7 @@ for (bool done = false; !done; done = true)						\
 
 /**
  * scoped_user_rw_access_size - Start a scoped user read/write access with given size
- * @uptr	Pointer to the user space address to read from and write to
+ * @uptr:	Pointer to the user space address to read from and write to
  * @size:	Size of the access starting from @uptr
  * @elbl:	Error label to goto when the access region is rejected
  *
@@ -817,7 +817,7 @@ for (bool done = false; !done; done = true)						\
 
 /**
  * scoped_user_rw_access - Start a scoped user read/write access
- * @uptr	Pointer to the user space address to read from and write to
+ * @uptr:	Pointer to the user space address to read from and write to
  * @elbl:	Error label to goto when the access region is rejected
  *
  * The size of the access starting from @uptr is determined via sizeof(*@uptr)).
-- 
cgit v1.2.3


From 599b4e290c8766b19378d85d4310c6ec8f90ade4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:52:22 -0800
Subject: mm/mmu_notifier: clean up mmu_notifier.h kernel-doc

Eliminate kernel-doc warnings in mmu_notifier.h:
- add a missing struct short description
- use the correct format for function parameters
- add missing function return comment sections

Warning: include/linux/mmu_notifier.h:236 missing initial short
 description on line: * struct mmu_interval_notifier_ops
Warning: include/linux/mmu_notifier.h:325 function parameter 'interval_sub'
 not described in 'mmu_interval_set_seq'
Warning: include/linux/mmu_notifier.h:325 function parameter 'cur_seq'
 not described in 'mmu_interval_set_seq'
Warning: include/linux/mmu_notifier.h:346 function parameter 'interval_sub'
 not described in 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:346 function parameter 'seq' not
 described in 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:346 No description found for return
 value of 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:370 function parameter 'interval_sub'
 not described in 'mmu_interval_check_retry'
Warning: include/linux/mmu_notifier.h:370 function parameter 'seq' not
 described in 'mmu_interval_check_retry'
Warning: include/linux/mmu_notifier.h:370 No description found for return
 value of 'mmu_interval_check_retry'

Link: https://lkml.kernel.org/r/20260302005222.3470783-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 07a2bbaf86e9..8450e18a87c2 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -234,7 +234,7 @@ struct mmu_notifier {
 };
 
 /**
- * struct mmu_interval_notifier_ops
+ * struct mmu_interval_notifier_ops - callback for range notification
  * @invalidate: Upon return the caller must stop using any SPTEs within this
  *              range. This function can sleep. Return false only if sleeping
  *              was required but mmu_notifier_range_blockable(range) is false.
@@ -309,8 +309,8 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub);
 
 /**
  * mmu_interval_set_seq - Save the invalidation sequence
- * @interval_sub - The subscription passed to invalidate
- * @cur_seq - The cur_seq passed to the invalidate() callback
+ * @interval_sub: The subscription passed to invalidate
+ * @cur_seq: The cur_seq passed to the invalidate() callback
  *
  * This must be called unconditionally from the invalidate callback of a
  * struct mmu_interval_notifier_ops under the same lock that is used to call
@@ -329,8 +329,8 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub,
 
 /**
  * mmu_interval_read_retry - End a read side critical section against a VA range
- * interval_sub: The subscription
- * seq: The return of the paired mmu_interval_read_begin()
+ * @interval_sub: The subscription
+ * @seq: The return of the paired mmu_interval_read_begin()
  *
  * This MUST be called under a user provided lock that is also held
  * unconditionally by op->invalidate() when it calls mmu_interval_set_seq().
@@ -338,7 +338,7 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub,
  * Each call should be paired with a single mmu_interval_read_begin() and
  * should be used to conclude the read side.
  *
- * Returns true if an invalidation collided with this critical section, and
+ * Returns: true if an invalidation collided with this critical section, and
  * the caller should retry.
  */
 static inline bool
@@ -350,20 +350,21 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub,
 
 /**
  * mmu_interval_check_retry - Test if a collision has occurred
- * interval_sub: The subscription
- * seq: The return of the matching mmu_interval_read_begin()
+ * @interval_sub: The subscription
+ * @seq: The return of the matching mmu_interval_read_begin()
  *
  * This can be used in the critical section between mmu_interval_read_begin()
- * and mmu_interval_read_retry().  A return of true indicates an invalidation
- * has collided with this critical region and a future
- * mmu_interval_read_retry() will return true.
- *
- * False is not reliable and only suggests a collision may not have
- * occurred. It can be called many times and does not have to hold the user
- * provided lock.
+ * and mmu_interval_read_retry().
  *
  * This call can be used as part of loops and other expensive operations to
  * expedite a retry.
+ * It can be called many times and does not have to hold the user
+ * provided lock.
+ *
+ * Returns: true indicates an invalidation has collided with this critical
+ * region and a future mmu_interval_read_retry() will return true.
+ * False is not reliable and only suggests a collision may not have
+ * occurred.
  */
 static inline bool
 mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub,
-- 
cgit v1.2.3


From 7f083faf59d14c04e01ec05a7507f036c965acf8 Mon Sep 17 00:00:00 2001
From: Koichiro Den <den@valinux.co.jp>
Date: Sat, 28 Feb 2026 23:53:07 +0900
Subject: net: sched: avoid qdisc_reset_all_tx_gt() vs dequeue race for
 lockless qdiscs

When shrinking the number of real tx queues,
netif_set_real_num_tx_queues() calls qdisc_reset_all_tx_gt() to flush
qdiscs for queues which will no longer be used.

qdisc_reset_all_tx_gt() currently serializes qdisc_reset() with
qdisc_lock(). However, for lockless qdiscs, the dequeue path is
serialized by qdisc_run_begin/end() using qdisc->seqlock instead, so
qdisc_reset() can run concurrently with __qdisc_run() and free skbs
while they are still being dequeued, leading to UAF.

This can easily be reproduced on e.g. virtio-net by imposing heavy
traffic while frequently changing the number of queue pairs:

  iperf3 -ub0 -c $peer -t 0 &
  while :; do
    ethtool -L eth0 combined 1
    ethtool -L eth0 combined 2
  done

With KASAN enabled, this leads to reports like:

  BUG: KASAN: slab-use-after-free in __qdisc_run+0x133f/0x1760
  ...
  Call Trace:
   <TASK>
   ...
   __qdisc_run+0x133f/0x1760
   __dev_queue_xmit+0x248f/0x3550
   ip_finish_output2+0xa42/0x2110
   ip_output+0x1a7/0x410
   ip_send_skb+0x2e6/0x480
   udp_send_skb+0xb0a/0x1590
   udp_sendmsg+0x13c9/0x1fc0
   ...
   </TASK>

  Allocated by task 1270 on cpu 5 at 44.558414s:
   ...
   alloc_skb_with_frags+0x84/0x7c0
   sock_alloc_send_pskb+0x69a/0x830
   __ip_append_data+0x1b86/0x48c0
   ip_make_skb+0x1e8/0x2b0
   udp_sendmsg+0x13a6/0x1fc0
   ...

  Freed by task 1306 on cpu 3 at 44.558445s:
   ...
   kmem_cache_free+0x117/0x5e0
   pfifo_fast_reset+0x14d/0x580
   qdisc_reset+0x9e/0x5f0
   netif_set_real_num_tx_queues+0x303/0x840
   virtnet_set_channels+0x1bf/0x260 [virtio_net]
   ethnl_set_channels+0x684/0xae0
   ethnl_default_set_doit+0x31a/0x890
   ...

Serialize qdisc_reset_all_tx_gt() against the lockless dequeue path by
taking qdisc->seqlock for TCQ_F_NOLOCK qdiscs, matching the
serialization model already used by dev_reset_queue().

Additionally clear QDISC_STATE_NON_EMPTY after reset so the qdisc state
reflects an empty queue, avoiding needless re-scheduling.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Signed-off-by: Koichiro Den <den@valinux.co.jp>
Link: https://patch.msgid.link/20260228145307.3955532-1-den@valinux.co.jp
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c3a7268b567e..d5d55cb21686 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -778,13 +778,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
 	struct Qdisc *qdisc;
+	bool nolock;
 
 	for (; i < dev->num_tx_queues; i++) {
 		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		if (qdisc) {
+			nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+			if (nolock)
+				spin_lock_bh(&qdisc->seqlock);
 			spin_lock_bh(qdisc_lock(qdisc));
 			qdisc_reset(qdisc);
 			spin_unlock_bh(qdisc_lock(qdisc));
+			if (nolock) {
+				clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+				clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+				spin_unlock_bh(&qdisc->seqlock);
+			}
 		}
 	}
 }
-- 
cgit v1.2.3


From 165573e41f2f66ef98940cf65f838b2cb575d9d1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Mar 2026 20:55:27 +0000
Subject: tcp: secure_seq: add back ports to TS offset

This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")

tcp_tw_recycle went away in 2017.

Zhouyan Deng reported off-path TCP source port leakage via
SYN cookie side-channel that can be fixed in multiple ways.

One of them is to bring back TCP ports in TS offset randomization.

As a bonus, we perform a single siphash() computation
to provide both an ISN and a TS offset.

Fixes: 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
Reported-by: Zhouyan Deng <dengzhouyan_nwpu@163.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Acked-by: Florian Westphal <fw@strlen.de>
Link: https://patch.msgid.link/20260302205527.1982836-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/secure_seq.h | 45 ++++++++++++++++++++++++++++++++++++++-------
 include/net/tcp.h        |  6 ++++--
 2 files changed, 42 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index cddebafb9f77..6f996229167b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -5,16 +5,47 @@
 #include <linux/types.h>
 
 struct net;
+extern struct net init_net;
+
+union tcp_seq_and_ts_off {
+	struct {
+		u32 seq;
+		u32 ts_off;
+	};
+	u64 hash64;
+};
 
 u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
-		   __be16 sport, __be16 dport);
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
-		     __be16 sport, __be16 dport);
-u32 secure_tcpv6_ts_off(const struct net *net,
-			const __be32 *saddr, const __be32 *daddr);
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+			  __be16 sport, __be16 dport);
+
+static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+				 __be16 sport, __be16 dport)
+{
+	union tcp_seq_and_ts_off ts;
+
+	ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr,
+				       sport, dport);
+
+	return ts.seq;
+}
+
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+			    const __be32 *daddr,
+			    __be16 sport, __be16 dport);
+
+static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+				   __be16 sport, __be16 dport)
+{
+	union tcp_seq_and_ts_off ts;
+
+	ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr,
+					 sport, dport);
 
+	return ts.seq;
+}
 #endif /* _NET_SECURE_SEQ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eb8bf63fdafc..978eea2d5df0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -43,6 +43,7 @@
 #include <net/dst.h>
 #include <net/mptcp.h>
 #include <net/xfrm.h>
+#include <net/secure_seq.h>
 
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
@@ -2464,8 +2465,9 @@ struct tcp_request_sock_ops {
 				       struct flowi *fl,
 				       struct request_sock *req,
 				       u32 tw_isn);
-	u32 (*init_seq)(const struct sk_buff *skb);
-	u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
+	union tcp_seq_and_ts_off (*init_seq_and_ts_off)(
+					const struct net *net,
+					const struct sk_buff *skb);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
-- 
cgit v1.2.3


From b824c3e16c1904bf80df489e293d1e3cbf98896d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 2 Mar 2026 17:26:31 +0100
Subject: net: Provide a PREEMPT_RT specific check for netdev_queue::_xmit_lock

After acquiring netdev_queue::_xmit_lock the number of the CPU owning
the lock is recorded in netdev_queue::xmit_lock_owner. This works as
long as the BH context is not preemptible.

On PREEMPT_RT the softirq context is preemptible and without the
softirq-lock it is possible to have multiple user in __dev_queue_xmit()
submitting a skb on the same CPU. This is fine in general but this means
also that the current CPU is recorded as netdev_queue::xmit_lock_owner.
This in turn leads to the recursion alert and the skb is dropped.

Instead checking the for CPU number, that owns the lock, PREEMPT_RT can
check if the lockowner matches the current task.

Add netif_tx_owned() which returns true if the current context owns the
lock by comparing the provided CPU number with the recorded number. This
resembles the current check by negating the condition (the current check
returns true if the lock is not owned).
On PREEMPT_RT use rt_mutex_owner() to return the lock owner and compare
the current task against it.
Use the new helper in __dev_queue_xmit() and netif_local_xmit_active()
which provides a similar check.
Update comments regarding pairing READ_ONCE().

Reported-by: Bert Karwatzki <spasswolf@web.de>
Closes: https://lore.kernel.org/all/20260216134333.412332-1-spasswolf@web.de
Fixes: 3253cb49cbad4 ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reported-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20260302162631.uGUyIqDT@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d4e6e00bb90a..67e25f6d15a4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4711,7 +4711,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
 	spin_lock(&txq->_xmit_lock);
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, cpu);
 }
 
@@ -4729,7 +4729,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
 	spin_lock_bh(&txq->_xmit_lock);
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 }
 
@@ -4738,7 +4738,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 	bool ok = spin_trylock(&txq->_xmit_lock);
 
 	if (likely(ok)) {
-		/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+		/* Pairs with READ_ONCE() in netif_tx_owned() */
 		WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 	}
 	return ok;
@@ -4746,14 +4746,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 
 static inline void __netif_tx_unlock(struct netdev_queue *txq)
 {
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock(&txq->_xmit_lock);
 }
 
 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 {
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock_bh(&txq->_xmit_lock);
 }
@@ -4846,6 +4846,23 @@ static inline void netif_tx_disable(struct net_device *dev)
 	local_bh_enable();
 }
 
+#ifndef CONFIG_PREEMPT_RT
+static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu)
+{
+	/* Other cpus might concurrently change txq->xmit_lock_owner
+	 * to -1 or to their cpu id, but not to our id.
+	 */
+	return READ_ONCE(txq->xmit_lock_owner) == cpu;
+}
+
+#else
+static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu)
+{
+	return rt_mutex_owner(&txq->_xmit_lock.lock) == current;
+}
+
+#endif
+
 static inline void netif_addr_lock(struct net_device *dev)
 {
 	unsigned char nest_level = 0;
-- 
cgit v1.2.3


From fb7fb4016300ac622c964069e286dc83166a5d52 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 2 Mar 2026 23:28:15 +0100
Subject: netfilter: nf_tables: clone set on flush only

Syzbot with fault injection triggered a failing memory allocation with
GFP_KERNEL which results in a WARN splat:

iter.err
WARNING: net/netfilter/nf_tables_api.c:845 at nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845, CPU#0: syz.0.17/5992
Modules linked in:
CPU: 0 UID: 0 PID: 5992 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2026
RIP: 0010:nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845
Code: 8b 05 86 5a 4e 09 48 3b 84 24 a0 00 00 00 75 62 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc cc e8 63 6d fa f7 90 <0f> 0b 90 43
+80 7c 35 00 00 0f 85 23 fe ff ff e9 26 fe ff ff 89 d9
RSP: 0018:ffffc900045af780 EFLAGS: 00010293
RAX: ffffffff89ca45bd RBX: 00000000fffffff4 RCX: ffff888028111e40
RDX: 0000000000000000 RSI: 00000000fffffff4 RDI: 0000000000000000
RBP: ffffc900045af870 R08: 0000000000400dc0 R09: 00000000ffffffff
R10: dffffc0000000000 R11: fffffbfff1d141db R12: ffffc900045af7e0
R13: 1ffff920008b5f24 R14: dffffc0000000000 R15: ffffc900045af920
FS:  000055557a6a5500(0000) GS:ffff888125496000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fb5ea271fc0 CR3: 000000003269e000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 __nft_release_table+0xceb/0x11f0 net/netfilter/nf_tables_api.c:12115
 nft_rcv_nl_event+0xc25/0xdb0 net/netfilter/nf_tables_api.c:12187
 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
 blocking_notifier_call_chain+0x6a/0x90 kernel/notifier.c:380
 netlink_release+0x123b/0x1ad0 net/netlink/af_netlink.c:761
 __sock_release net/socket.c:662 [inline]
 sock_close+0xc3/0x240 net/socket.c:1455

Restrict set clone to the flush set command in the preparation phase.
Add NFT_ITER_UPDATE_CLONE and use it for this purpose, update the rbtree
and pipapo backends to only clone the set when this iteration type is
used.

As for the existing NFT_ITER_UPDATE type, update the pipapo backend to
use the existing set clone if available, otherwise use the existing set
representation. After this update, there is no need to clone a set that
is being deleted, this includes bound anonymous set.

An alternative approach to NFT_ITER_UPDATE_CLONE is to add a .clone
interface and call it from the flush set path.

Reported-by: syzbot+4924a0edc148e8b4b342@syzkaller.appspotmail.com
Fixes: 3f1d886cc7c3 ("netfilter: nft_set_pipapo: move cloning of match info to insert/removal path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 426534a711b0..ea6f29ad7888 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -320,11 +320,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
  * @NFT_ITER_UNSPEC: unspecified, to catch errors
  * @NFT_ITER_READ: read-only iteration over set elements
  * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element
  */
 enum nft_iter_type {
 	NFT_ITER_UNSPEC,
 	NFT_ITER_READ,
 	NFT_ITER_UPDATE,
+	NFT_ITER_UPDATE_CLONE,
 };
 
 struct nft_set;
-- 
cgit v1.2.3


From 9df95785d3d8302f7c066050117b04cd3c2048c2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 3 Mar 2026 16:31:32 +0100
Subject: netfilter: nft_set_pipapo: split gc into unlink and reclaim phase

Yiming Qian reports Use-after-free in the pipapo set type:
  Under a large number of expired elements, commit-time GC can run for a very
  long time in a non-preemptible context, triggering soft lockup warnings and
  RCU stall reports (local denial of service).

We must split GC in an unlink and a reclaim phase.

We cannot queue elements for freeing until pointers have been swapped.
Expired elements are still exposed to both the packet path and userspace
dumpers via the live copy of the data structure.

call_rcu() does not protect us: dump operations or element lookups starting
after call_rcu has fired can still observe the free'd element, unless the
commit phase has made enough progress to swap the clone and live pointers
before any new reader has picked up the old version.

This a similar approach as done recently for the rbtree backend in commit
35f83a75529a ("netfilter: nft_set_rbtree: don't gc elements on insert").

Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ea6f29ad7888..e2d2bfc1f989 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1863,6 +1863,11 @@ struct nft_trans_gc {
 	struct rcu_head		rcu;
 };
 
+static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
+{
+	return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
 static inline void nft_ctx_update(struct nft_ctx *ctx,
 				  const struct nft_trans *trans)
 {
-- 
cgit v1.2.3


From e2cedd400c3ec0302ffca2490e8751772906ac23 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Wed, 4 Mar 2026 09:06:02 -0500
Subject: net/sched: act_ife: Fix metalist update behavior

Whenever an ife action replace changes the metalist, instead of
replacing the old data on the metalist, the current ife code is appending
the new metadata. Aside from being innapropriate behavior, this may lead
to an unbounded addition of metadata to the metalist which might cause an
out of bounds error when running the encode op:

[  138.423369][    C1] ==================================================================
[  138.424317][    C1] BUG: KASAN: slab-out-of-bounds in ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.424906][    C1] Write of size 4 at addr ffff8880077f4ffe by task ife_out_out_bou/255
[  138.425778][    C1] CPU: 1 UID: 0 PID: 255 Comm: ife_out_out_bou Not tainted 7.0.0-rc1-00169-gfbdfa8da05b6 #624 PREEMPT(full)
[  138.425795][    C1] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  138.425800][    C1] Call Trace:
[  138.425804][    C1]  <IRQ>
[  138.425808][    C1]  dump_stack_lvl (lib/dump_stack.c:122)
[  138.425828][    C1]  print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[  138.425839][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425844][    C1]  ? __virt_addr_valid (./arch/x86/include/asm/preempt.h:95 (discriminator 1) ./include/linux/rcupdate.h:975 (discriminator 1) ./include/linux/mmzone.h:2207 (discriminator 1) arch/x86/mm/physaddr.c:54 (discriminator 1))
[  138.425853][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425859][    C1]  kasan_report (mm/kasan/report.c:221 mm/kasan/report.c:597)
[  138.425868][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425878][    C1]  kasan_check_range (mm/kasan/generic.c:186 (discriminator 1) mm/kasan/generic.c:200 (discriminator 1))
[  138.425884][    C1]  __asan_memset (mm/kasan/shadow.c:84 (discriminator 2))
[  138.425889][    C1]  ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425893][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:171)
[  138.425898][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425903][    C1]  ife_encode_meta_u16 (net/sched/act_ife.c:57)
[  138.425910][    C1]  ? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:114)
[  138.425916][    C1]  ? __asan_memcpy (mm/kasan/shadow.c:105 (discriminator 3))
[  138.425921][    C1]  ? __pfx_ife_encode_meta_u16 (net/sched/act_ife.c:45)
[  138.425927][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425931][    C1]  tcf_ife_act (net/sched/act_ife.c:847 net/sched/act_ife.c:879)

To solve this issue, fix the replace behavior by adding the metalist to
the ife rcu data structure.

Fixes: aa9fd9a325d51 ("sched: act: ife: update parameters via rcu handling")
Reported-by: Ruitong Liu <cnitlrt@gmail.com>
Tested-by: Ruitong Liu <cnitlrt@gmail.com>
Co-developed-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260304140603.76500-1-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tc_act/tc_ife.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index c7f24a2da1ca..24d4d5a62b3c 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -13,15 +13,13 @@ struct tcf_ife_params {
 	u8 eth_src[ETH_ALEN];
 	u16 eth_type;
 	u16 flags;
-
+	struct list_head metalist;
 	struct rcu_head rcu;
 };
 
 struct tcf_ife_info {
 	struct tc_action common;
 	struct tcf_ife_params __rcu *params;
-	/* list of metaids allowed */
-	struct list_head metalist;
 };
 #define to_ife(a) ((struct tcf_ife_info *)a)
 
-- 
cgit v1.2.3


From 16394d80539937d348dd3b9ea32415c54e67a81b Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Thu, 5 Mar 2026 12:12:43 +0100
Subject: xsk: introduce helper to determine rxq->frag_size

rxq->frag_size is basically a step between consecutive strictly aligned
frames. In ZC mode, chunk size fits exactly, but if chunks are unaligned,
there is no safe way to determine accessible space to grow tailroom.

Report frag_size to be zero, if chunks are unaligned, chunk_size otherwise.

Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-3-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xdp_sock_drv.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index aefc368449d5..6b9ebae2dc95 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 	return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
 }
 
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+	return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool);
+}
+
 static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
 					 struct xdp_rxq_info *rxq)
 {
@@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 	return 0;
 }
 
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+	return 0;
+}
+
 static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
 					 struct xdp_rxq_info *rxq)
 {
-- 
cgit v1.2.3


From 75d9228982f23d68066ca0b7d87014c3eb8ddc85 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Thu, 5 Mar 2026 12:12:48 +0100
Subject: libeth, idpf: use truesize as XDP RxQ info frag_size

The only user of frag_size field in XDP RxQ info is
bpf_xdp_frags_increase_tail(). It clearly expects whole buffer size instead
of DMA write size. Different assumptions in idpf driver configuration lead
to negative tailroom.

To make it worse, buffer sizes are not actually uniform in idpf when
splitq is enabled, as there are several buffer queues, so rxq->rx_buf_size
is meaningless in this case.

Use truesize of the first bufq in AF_XDP ZC, as there is only one. Disable
growing tail for regular splitq.

Fixes: ac8a861f632e ("idpf: prepare structures to support XDP")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-8-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/libeth/xsk.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
index 481a7b28e6f2..82b5d21aae87 100644
--- a/include/net/libeth/xsk.h
+++ b/include/net/libeth/xsk.h
@@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
  * @pending: current number of XSkFQEs to refill
  * @thresh: threshold below which the queue is refilled
  * @buf_len: HW-writeable length per each buffer
+ * @truesize: step between consecutive buffers, 0 if none exists
  * @nid: ID of the closest NUMA node with memory
  */
 struct libeth_xskfq {
@@ -614,6 +615,8 @@ struct libeth_xskfq {
 	u32			thresh;
 
 	u32			buf_len;
+	u32			truesize;
+
 	int			nid;
 };
 
-- 
cgit v1.2.3


From 55f854dd5bdd8e19b936a00ef1f8d776ac32c7b0 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Wed, 4 Mar 2026 14:43:38 +0100
Subject: qmi_wwan: allow max_mtu above hard_mtu to control rx_urb_size

Commit c7159e960f14 ("usbnet: limit max_mtu based on device's hard_mtu")
capped net->max_mtu to the device's hard_mtu in usbnet_probe(). While
this correctly prevents oversized packets on standard USB network
devices, it breaks the qmi_wwan driver.

qmi_wwan relies on userspace (e.g. ModemManager) setting a large MTU on
the wwan0 interface to configure rx_urb_size via usbnet_change_mtu().
QMI modems negotiate USB transfer sizes of 16,383 or 32,767 bytes, and
the USB receive buffers must be sized accordingly. With max_mtu capped
to hard_mtu (~1500 bytes), userspace can no longer raise the MTU, the
receive buffers remain small, and download speeds drop from >300 Mbps
to ~0.8 Mbps.

Introduce a FLAG_NOMAXMTU driver flag that allows individual usbnet
drivers to opt out of the max_mtu cap. Set this flag in qmi_wwan's
driver_info structures to restore the previous behavior for QMI devices,
while keeping the safety fix in place for all other usbnet drivers.

Fixes: c7159e960f14 ("usbnet: limit max_mtu based on device's hard_mtu")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/lkml/CAPh3n803k8JcBPV5qEzUB-oKzWkAs-D5CU7z=Vd_nLRCr5ZqQg@mail.gmail.com/
Reported-by: Koen Vandeputte <koen.vandeputte@citymesh.com>
Tested-by: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Link: https://patch.msgid.link/20260304134338.1785002-1-lvivier@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/usb/usbnet.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index b0e84896e6ac..bbf799ccf3b3 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -132,6 +132,7 @@ struct driver_info {
 #define FLAG_MULTI_PACKET	0x2000
 #define FLAG_RX_ASSEMBLE	0x4000	/* rx packets may span >1 frames */
 #define FLAG_NOARP		0x8000	/* device can't do ARP */
+#define FLAG_NOMAXMTU		0x10000	/* allow max_mtu above hard_mtu */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3


From 1954c4f012206147c34acda8da04f827aa7d3ee3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 7 Mar 2026 20:07:15 +0000
Subject: eventpoll: Convert epoll_put_uevent() to scoped user access

Saves two function calls, and one stac/clac pair.

stac/clac is rather expensive on older cpus like Zen 2.

A synthetic network stress test gives a ~1.5% increase of pps
on AMD Zen 2.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kuniyuki Iwashima <kuniyu@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventpoll.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index ccb478eb174b..ea9ca0e4172a 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -82,11 +82,14 @@ static inline struct epoll_event __user *
 epoll_put_uevent(__poll_t revents, __u64 data,
 		 struct epoll_event __user *uevent)
 {
-	if (__put_user(revents, &uevent->events) ||
-	    __put_user(data, &uevent->data))
-		return NULL;
-
+	scoped_user_write_access_size(uevent, sizeof(*uevent), efault) {
+		unsafe_put_user(revents, &uevent->events, efault);
+		unsafe_put_user(data, &uevent->data, efault);
+	}
 	return uevent+1;
+
+efault:
+	return NULL;
 }
 #endif
 
-- 
cgit v1.2.3


From 6f1a9140ecda3baba3d945b9a6155af4268aafc4 Mon Sep 17 00:00:00 2001
From: Weiming Shi <bestswngs@gmail.com>
Date: Sat, 7 Mar 2026 00:01:34 +0800
Subject: net: add xmit recursion limit to tunnel xmit functions

Tunnel xmit functions (iptunnel_xmit, ip6tunnel_xmit) lack their own
recursion limit. When a bond device in broadcast mode has GRE tap
interfaces as slaves, and those GRE tunnels route back through the
bond, multicast/broadcast traffic triggers infinite recursion between
bond_xmit_broadcast() and ip_tunnel_xmit()/ip6_tnl_xmit(), causing
kernel stack overflow.

The existing XMIT_RECURSION_LIMIT (8) in the no-qdisc path is not
sufficient because tunnel recursion involves route lookups and full IP
output, consuming much more stack per level. Use a lower limit of 4
(IP_TUNNEL_RECURSION_LIMIT) to prevent overflow.

Add recursion detection using dev_xmit_recursion helpers directly in
iptunnel_xmit() and ip6tunnel_xmit() to cover all IPv4/IPv6 tunnel
paths including UDP encapsulated tunnels (VXLAN, Geneve, etc.).

Move dev_xmit_recursion helpers from net/core/dev.h to public header
include/linux/netdevice.h so they can be used by tunnel code.

 BUG: KASAN: stack-out-of-bounds in blake2s.constprop.0+0xe7/0x160
 Write of size 32 at addr ffff88810033fed0 by task kworker/0:1/11
 Workqueue: mld mld_ifc_work
 Call Trace:
  <TASK>
  __build_flow_key.constprop.0 (net/ipv4/route.c:515)
  ip_rt_update_pmtu (net/ipv4/route.c:1073)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:84)
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  ip_output (net/ipv4/ip_output.c:438)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:86)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  ip_output (net/ipv4/ip_output.c:438)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:86)
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  mld_sendpack
  mld_ifc_work
  process_one_work
  worker_thread
  </TASK>

Fixes: 745e20f1b626 ("net: add a recursion limit in xmit path")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260306160133.3852900-2-bestswngs@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 32 ++++++++++++++++++++++++++++++++
 include/net/ip6_tunnel.h  | 12 ++++++++++++
 include/net/ip_tunnels.h  |  7 +++++++
 3 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67e25f6d15a4..ae269a2e7f4d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3576,17 +3576,49 @@ struct page_pool_bh {
 };
 DECLARE_PER_CPU(struct page_pool_bh, system_page_pool);
 
+#define XMIT_RECURSION_LIMIT	8
+
 #ifndef CONFIG_PREEMPT_RT
 static inline int dev_recursion_level(void)
 {
 	return this_cpu_read(softnet_data.xmit.recursion);
 }
+
+static inline bool dev_xmit_recursion(void)
+{
+	return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
+			XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+	__this_cpu_inc(softnet_data.xmit.recursion);
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+	__this_cpu_dec(softnet_data.xmit.recursion);
+}
 #else
 static inline int dev_recursion_level(void)
 {
 	return current->net_xmit.recursion;
 }
 
+static inline bool dev_xmit_recursion(void)
+{
+	return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+	current->net_xmit.recursion++;
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+	current->net_xmit.recursion--;
+}
 #endif
 
 void __netif_schedule(struct Qdisc *q);
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 120db2865811..1253cbb4b0a4 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -156,6 +156,16 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 {
 	int pkt_len, err;
 
+	if (dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT) {
+		net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+				     dev->name);
+		DEV_STATS_INC(dev, tx_errors);
+		kfree_skb(skb);
+		return;
+	}
+
+	dev_xmit_recursion_inc();
+
 	memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 	IP6CB(skb)->flags = ip6cb_flags;
 	pkt_len = skb->len - skb_inner_network_offset(skb);
@@ -166,6 +176,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 			pkt_len = -1;
 		iptunnel_xmit_stats(dev, pkt_len);
 	}
+
+	dev_xmit_recursion_dec();
 }
 #endif
 #endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 4021e6a73e32..80662f812080 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -27,6 +27,13 @@
 #include <net/ip6_route.h>
 #endif
 
+/* Recursion limit for tunnel xmit to detect routing loops.
+ * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel
+ * recursion involves route lookups and full IP output, consuming much
+ * more stack per level, so a lower limit is needed.
+ */
+#define IP_TUNNEL_RECURSION_LIMIT	4
+
 /* Keep error state on tunnel for 30 sec */
 #define IPTUNNEL_ERR_TIMEO	(30*HZ)
 
-- 
cgit v1.2.3


From fa655a9ca73f7df32b8ca4d14ce11742f9578288 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Tue, 3 Mar 2026 22:31:01 +0100
Subject: nvme: Annotate struct nvme_dhchap_key with __counted_by

Add the __counted_by() compiler attribute to the flexible array member
'key' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and
CONFIG_FORTIFY_SOURCE.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme-auth.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index 60e069a6757f..e75c29c51464 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -11,7 +11,7 @@
 struct nvme_dhchap_key {
 	size_t len;
 	u8 hash;
-	u8 key[];
+	u8 key[] __counted_by(len);
 };
 
 u32 nvme_auth_get_seqnum(void);
-- 
cgit v1.2.3


From 22fd7f7fed2ae3702f90d1985c326354e86b9c75 Mon Sep 17 00:00:00 2001
From: Muhammad Amirul Asyraf Mohamad Jamian
 <muhammad.amirul.asyraf.mohamad.jamian@altera.com>
Date: Thu, 5 Mar 2026 01:31:51 -0800
Subject: firmware: stratix10-svc: Add Multi SVC clients support

In the current implementation, SVC client drivers such as socfpga-hwmon,
intel_fcs, stratix10-soc, stratix10-rsu each send an SMC command that
triggers a single thread in the stratix10-svc driver. Upon receiving a
callback, the initiating client driver sends a stratix10-svc-done signal,
terminating the thread without waiting for other pending SMC commands to
complete. This leads to a timeout issue in the firmware SVC mailbox service
when multiple client drivers send SMC commands concurrently.

To resolve this issue, a dedicated thread is now created per channel. The
stratix10-svc driver will support up to the number of channels defined by
SVC_NUM_CHANNEL. Thread synchronization is handled using a mutex to prevent
simultaneous issuance of SMC commands by multiple threads.

SVC_NUM_DATA_IN_FIFO is reduced from 32 to 8, since each channel now has
its own dedicated FIFO and the SDM processes commands one at a time.
8 entries per channel is sufficient while keeping the total aggregate
capacity the same (4 channels x 8 = 32 entries).

Additionally, a thread task is now validated before invoking kthread_stop
when the user aborts, ensuring safe termination.

Timeout values have also been adjusted to accommodate the increased load
from concurrent client driver activity.

Fixes: 7ca5ce896524 ("firmware: add Intel Stratix10 service layer driver")
Cc: stable@vger.kernel.org
Signed-off-by: Ang Tien Sung <tien.sung.ang@altera.com>
Signed-off-by: Fong, Yan Kei <yankei.fong@altera.com>
Signed-off-by: Muhammad Amirul Asyraf Mohamad Jamian <muhammad.amirul.asyraf.mohamad.jamian@altera.com>
Link: https://lore.kernel.org/all/20260305093151.2678-1-muhammad.amirul.asyraf.mohamad.jamian@altera.com
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
 include/linux/firmware/intel/stratix10-svc-client.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h
index d290060f4c73..91013161e9db 100644
--- a/include/linux/firmware/intel/stratix10-svc-client.h
+++ b/include/linux/firmware/intel/stratix10-svc-client.h
@@ -68,12 +68,12 @@
  * timeout value used in Stratix10 FPGA manager driver.
  * timeout value used in RSU driver
  */
-#define SVC_RECONFIG_REQUEST_TIMEOUT_MS         300
-#define SVC_RECONFIG_BUFFER_TIMEOUT_MS          720
-#define SVC_RSU_REQUEST_TIMEOUT_MS              300
+#define SVC_RECONFIG_REQUEST_TIMEOUT_MS         5000
+#define SVC_RECONFIG_BUFFER_TIMEOUT_MS          5000
+#define SVC_RSU_REQUEST_TIMEOUT_MS              2000
 #define SVC_FCS_REQUEST_TIMEOUT_MS		2000
 #define SVC_COMPLETED_TIMEOUT_MS		30000
-#define SVC_HWMON_REQUEST_TIMEOUT_MS		300
+#define SVC_HWMON_REQUEST_TIMEOUT_MS		2000
 
 struct stratix10_svc_chan;
 
-- 
cgit v1.2.3


From 28b225282d44e2ef40e7f46cfdbd5d1b20b8874f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 9 Mar 2026 17:39:07 -0700
Subject: page_pool: store detach_time as ktime_t to avoid false-negatives

While testing other changes in vng I noticed that
nl_netdev.page_pool_check flakes. This never happens in real CI.

Turns out vng may boot and get to that test in less than a second.
page_pool_detached() records the detach time in seconds, so if
vng is fast enough detach time is set to 0. Other code treats
0 as "not detached". detach_time is only used to report the state
to the user, so it's not a huge deal in practice but let's fix it.
Store the raw ktime_t (nanoseconds) instead. A nanosecond value
of 0 is practically impossible.

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Fixes: 69cb4952b6f6 ("net: page_pool: report when page pool was destroyed")
Link: https://patch.msgid.link/20260310003907.3540019-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/page_pool/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 0d453484a585..cdd95477af7a 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -247,7 +247,7 @@ struct page_pool {
 	/* User-facing fields, protected by page_pools_lock */
 	struct {
 		struct hlist_node list;
-		u64 detach_time;
+		ktime_t detach_time;
 		u32 id;
 	} user;
 };
-- 
cgit v1.2.3


From b2e48c429ec54715d16fefa719dd2fbded2e65be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 10 Mar 2026 21:28:53 +0100
Subject: sched/mmcid: Prevent CID stalls due to concurrent forks

A newly forked task is accounted as MMCID user before the task is visible
in the process' thread list and the global task list. This creates the
following problem:

 CPU1			CPU2
 fork()
   sched_mm_cid_fork(tnew1)
     tnew1->mm.mm_cid_users++;
     tnew1->mm_cid.cid = getcid()
-> preemption
			fork()
			  sched_mm_cid_fork(tnew2)
			    tnew2->mm.mm_cid_users++;
                            // Reaches the per CPU threshold
			    mm_cid_fixup_tasks_to_cpus()
			    for_each_other(current, p)
			         ....

As tnew1 is not visible yet, this fails to fix up the already allocated CID
of tnew1. As a consequence a subsequent schedule in might fail to acquire a
(transitional) CID and the machine stalls.

Move the invocation of sched_mm_cid_fork() after the new task becomes
visible in the thread and the task list to prevent this.

This also makes it symmetrical vs. exit() where the task is removed as CID
user before the task is removed from the thread and task lists.

Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202525.969061974@kernel.org
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7b4a980eb2f..5a5d3dbc9cdf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2354,7 +2354,6 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
 #ifdef CONFIG_SCHED_MM_CID
 void sched_mm_cid_before_execve(struct task_struct *t);
 void sched_mm_cid_after_execve(struct task_struct *t);
-void sched_mm_cid_fork(struct task_struct *t);
 void sched_mm_cid_exit(struct task_struct *t);
 static __always_inline int task_mm_cid(struct task_struct *t)
 {
@@ -2363,7 +2362,6 @@ static __always_inline int task_mm_cid(struct task_struct *t)
 #else
 static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
 static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
-static inline void sched_mm_cid_fork(struct task_struct *t) { }
 static inline void sched_mm_cid_exit(struct task_struct *t) { }
 static __always_inline int task_mm_cid(struct task_struct *t)
 {
-- 
cgit v1.2.3


From 192d852129b1b7c4f0ddbab95d0de1efd5ee1405 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 10 Mar 2026 21:29:09 +0100
Subject: sched/mmcid: Avoid full tasklist walks

Chasing vfork()'ed tasks on a CID ownership mode switch requires a full
task list walk, which is obviously expensive on large systems.

Avoid that by keeping a list of tasks using a mm MMCID entity in mm::mm_cid
and walk this list instead. This removes the proven to be flaky counting
logic and avoids a full task list walk in the case of vfork()'ed tasks.

Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202526.183824481@kernel.org
---
 include/linux/rseq_types.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
index da5fa6f40294..0b42045988db 100644
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -133,10 +133,12 @@ struct rseq_data { };
  * @active:	MM CID is active for the task
  * @cid:	The CID associated to the task either permanently or
  *		borrowed from the CPU
+ * @node:	Queued in the per MM MMCID list
  */
 struct sched_mm_cid {
 	unsigned int		active;
 	unsigned int		cid;
+	struct hlist_node	node;
 };
 
 /**
@@ -157,6 +159,7 @@ struct mm_cid_pcpu {
  * @work:		Regular work to handle the affinity mode change case
  * @lock:		Spinlock to protect against affinity setting which can't take @mutex
  * @mutex:		Mutex to serialize forks and exits related to this mm
+ * @user_list:		List of the MM CID users of a MM
  * @nr_cpus_allowed:	The number of CPUs in the per MM allowed CPUs map. The map
  *			is growth only.
  * @users:		The number of tasks sharing this MM. Separate from mm::mm_users
@@ -177,13 +180,14 @@ struct mm_mm_cid {
 
 	raw_spinlock_t		lock;
 	struct mutex		mutex;
+	struct hlist_head	user_list;
 
 	/* Low frequency modified */
 	unsigned int		nr_cpus_allowed;
 	unsigned int		users;
 	unsigned int		pcpu_thrs;
 	unsigned int		update_deferred;
-}____cacheline_aligned_in_smp;
+} ____cacheline_aligned;
 #else /* CONFIG_SCHED_MM_CID */
 struct mm_mm_cid { };
 struct sched_mm_cid { };
-- 
cgit v1.2.3


From 416909962e7cdf29fd01ac523c953f37708df93d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 17 Feb 2026 22:07:47 -0500
Subject: USB: usbcore: Introduce usb_bulk_msg_killable()

The synchronous message API in usbcore (usb_control_msg(),
usb_bulk_msg(), and so on) uses uninterruptible waits.  However,
drivers may call these routines in the context of a user thread, which
means it ought to be possible to at least kill them.

For this reason, introduce a new usb_bulk_msg_killable() function
which behaves the same as usb_bulk_msg() except for using
wait_for_completion_killable_timeout() instead of
wait_for_completion_timeout().  The same can be done later for
usb_control_msg() later on, if it turns out to be needed.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/linux-usb/3acfe838-6334-4f6d-be7c-4bb01704b33d@rowland.harvard.edu/
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/248628b4-cc83-4e81-a620-3ce4e0376d41@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index fbfcc70b07fb..57ceeb02a7cb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1868,8 +1868,9 @@ extern int usb_control_msg(struct usb_device *dev, unsigned int pipe,
 extern int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe,
 	void *data, int len, int *actual_length, int timeout);
 extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe,
-	void *data, int len, int *actual_length,
-	int timeout);
+	void *data, int len, int *actual_length, int timeout);
+extern int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe,
+	void *data, int len, int *actual_length, int timeout);
 
 /* wrappers around usb_control_msg() for the most common standard requests */
 int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request,
-- 
cgit v1.2.3


From 1015c27a5e1a63efae2b18a9901494474b4d1dc3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 17 Feb 2026 22:10:32 -0500
Subject: USB: core: Limit the length of unkillable synchronous timeouts

The usb_control_msg(), usb_bulk_msg(), and usb_interrupt_msg() APIs in
usbcore allow unlimited timeout durations.  And since they use
uninterruptible waits, this leaves open the possibility of hanging a
task for an indefinitely long time, with no way to kill it short of
unplugging the target device.

To prevent this sort of problem, enforce a maximum limit on the length
of these unkillable timeouts.  The limit chosen here, somewhat
arbitrarily, is 60 seconds.  On many systems (although not all) this
is short enough to avoid triggering the kernel's hung-task detector.

In addition, clear up the ambiguity of negative timeout values by
treating them the same as 0, i.e., using the maximum allowed timeout.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/3acfe838-6334-4f6d-be7c-4bb01704b33d@rowland.harvard.edu/
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/15fc9773-a007-47b0-a703-df89a8cf83dd@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 57ceeb02a7cb..04277af4bb9d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1862,6 +1862,9 @@ void usb_free_noncoherent(struct usb_device *dev, size_t size,
  *                         SYNCHRONOUS CALL SUPPORT                  *
  *-------------------------------------------------------------------*/
 
+/* Maximum value allowed for timeout in synchronous routines below */
+#define USB_MAX_SYNCHRONOUS_TIMEOUT		60000	/* ms */
+
 extern int usb_control_msg(struct usb_device *dev, unsigned int pipe,
 	__u8 request, __u8 requesttype, __u16 value, __u16 index,
 	void *data, __u16 size, int timeout);
-- 
cgit v1.2.3


From 9f6a983cfa22ac662c86e60816d3a357d4b551e9 Mon Sep 17 00:00:00 2001
From: Jie Deng <dengjie03@kylinos.cn>
Date: Fri, 27 Feb 2026 16:49:31 +0800
Subject: usb: core: new quirk to handle devices with zero configurations

Some USB devices incorrectly report bNumConfigurations as 0 in their
device descriptor, which causes the USB core to reject them during
enumeration.
logs:
usb 1-2: device descriptor read/64, error -71
usb 1-2: no configurations
usb 1-2: can't read configurations, error -22

However, these devices actually work correctly when
treated as having a single configuration.

Add a new quirk USB_QUIRK_FORCE_ONE_CONFIG to handle such devices.
When this quirk is set, assume the device has 1 configuration instead
of failing with -EINVAL.

This quirk is applied to the device with VID:PID 5131:2007 which
exhibits this behavior.

Signed-off-by: Jie Deng <dengjie03@kylinos.cn>
Link: https://patch.msgid.link/20260227084931.1527461-1-dengjie03@kylinos.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 2f7bd2fdc616..b3cc7beab4a3 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -78,4 +78,7 @@
 /* skip BOS descriptor request */
 #define USB_QUIRK_NO_BOS			BIT(17)
 
+/* Device claims zero configurations, forcing to 1 */
+#define USB_QUIRK_FORCE_ONE_CONFIG		BIT(18)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 96189080265e6bb5dde3a4afbaf947af493e3f82 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 9 Mar 2026 14:21:37 -0600
Subject: io_uring: ensure ctx->rings is stable for task work flags
 manipulation

If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while
the ring is being resized, it's possible for the OR'ing of
IORING_SQ_TASKRUN to happen in the small window of swapping into the
new rings and the old rings being freed.

Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is
protected by RCU. The task work flags manipulation is inside RCU
already, and if the resize ring freeing is done post an RCU synchronize,
then there's no need to add locking to the fast path of task work
additions.

Note: this is only done for DEFER_TASKRUN, as that's the only setup mode
that supports ring resizing. If this ever changes, then they too need to
use the io_ctx_mark_taskrun() helper.

Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/
Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3e4a82a6f817..dd1420bfcb73 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -388,6 +388,7 @@ struct io_ring_ctx {
 	 * regularly bounce b/w CPUs.
 	 */
 	struct {
+		struct io_rings	__rcu	*rings_rcu;
 		struct llist_head	work_llist;
 		struct llist_head	retry_llist;
 		unsigned long		check_cq;
-- 
cgit v1.2.3


From 94a4b1f959989de9c54d43c3a102fb1ee92e1414 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 7 Mar 2026 17:50:53 -0300
Subject: ipv6: move the disable_ipv6_mod knob to core code

From: Jakub Kicinski <kuba@kernel.org>

Make sure disable_ipv6_mod itself is not part of the IPv6 module,
in case core code wants to refer to it. We will remove support
for IPv6=m soon, this change helps make fixes we commit before
that less messy.

Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-1-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ipv6.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 443053a76dcf..a7421382a916 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -333,7 +333,12 @@ struct tcp6_timewait_sock {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
-bool ipv6_mod_enabled(void);
+extern int disable_ipv6_mod;
+
+static inline bool ipv6_mod_enabled(void)
+{
+	return disable_ipv6_mod == 0;
+}
 
 static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
 {
-- 
cgit v1.2.3


From c38b8f5f791ecce13ab77e2257f8fd2444ba80f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2026 04:39:08 +0000
Subject: net: prevent NULL deref in ip[6]tunnel_xmit()

Blamed commit missed that both functions can be called with dev == NULL.

Also add unlikely() hints for these conditions that only fuzzers can hit.

Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions")
Signed-off-by: Eric Dumazet <edumazet@google.com>
CC: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260312043908.2790803-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/ip6_tunnel.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 1253cbb4b0a4..359b595f1df9 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -156,10 +156,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 {
 	int pkt_len, err;
 
-	if (dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT) {
-		net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
-				     dev->name);
-		DEV_STATS_INC(dev, tx_errors);
+	if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) {
+		if (dev) {
+			net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+					     dev->name);
+			DEV_STATS_INC(dev, tx_errors);
+		}
 		kfree_skb(skb);
 		return;
 	}
-- 
cgit v1.2.3