diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-06-01 17:51:08 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-06-01 17:51:08 +0200 |
| commit | 1bab7eea6ecd7325ed62daecc8cd3a8f5b20f26f (patch) | |
| tree | f9a23c140fe90e3959231d1f5e4b6e4e218895a3 | |
| parent | 799289456aa4a482feecff17333b1113a963511b (diff) | |
| parent | 18ad16ce4a6b2714583fd1e1044c6ea8e53b3519 (diff) | |
Merge v6.18.34linux-rolling-lts
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
397 files changed, 4706 insertions, 2334 deletions
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 26e702c7016e..66287f8d645f 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -348,11 +348,12 @@ HyperThreading (HT) in the context of Intel processors, is enabled on at least one core, ``intel_pstate`` assigns performance-based priorities to CPUs. Namely, the priority of a given CPU reflects its highest HWP performance level which causes the CPU scheduler to generally prefer more performant CPUs, so the less -performant CPUs are used when the other ones are fully loaded. However, SMT -siblings (that is, logical CPUs sharing one physical core) are treated in a -special way such that if one of them is in use, the effective priority of the -other ones is lowered below the priorities of the CPUs located in the other -physical cores. +performant CPUs are used when the other ones are fully loaded. SMT siblings +(that is, logical CPUs sharing one physical core) are given the same priority. +The scheduler can pull tasks from lower-priority cores and place them on any +sibling. Since the scheduler spreads tasks among physical cores, tasks will be +placed on the SMT siblings of physical cores only after all physical cores are +busy. This approach maximizes performance in the majority of cases, but unfortunately it also leads to excessive energy usage in some important scenarios, like video diff --git a/Documentation/crypto/krb5.rst b/Documentation/crypto/krb5.rst index beffa0133446..f62e07ac6811 100644 --- a/Documentation/crypto/krb5.rst +++ b/Documentation/crypto/krb5.rst @@ -158,13 +158,22 @@ returned. When a message has been received, the location and size of the data with the message can be determined by calling:: - void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, - enum krb5_crypto_mode mode, - size_t *_offset, size_t *_len); + int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t *_offset, size_t *_len); The caller provides the offset and length of the message to the function, which then alters those values to indicate the region containing the data (plus any -padding). It is up to the caller to determine how much padding there is. +padding). It is up to the caller to determine how much padding there is. The +function returns an error if the length is too small or if the mode is +unsupported. An additional function:: + + int crypto_krb5_check_data_len(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t len, size_t min_content); + +is provided to just do a basic check that the decrypted/verified message would +have a sufficient minimum payload. Preparation Functions --------------------- diff --git a/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.yaml b/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.yaml index e28ef198a801..039c8e4a4c51 100644 --- a/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.yaml +++ b/Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.yaml @@ -13,23 +13,21 @@ description: | maintainers: - Nicolas Saenz Julienne <nsaenz@kernel.org> -allOf: - - $ref: /schemas/watchdog/watchdog.yaml# - properties: compatible: items: - enum: - brcm,bcm2835-pm - brcm,bcm2711-pm + - brcm,bcm2712-pm - const: brcm,bcm2835-pm-wdt reg: - minItems: 2 + minItems: 1 maxItems: 3 reg-names: - minItems: 2 + minItems: 1 items: - const: pm - const: asb @@ -62,7 +60,35 @@ required: - reg - "#power-domain-cells" - "#reset-cells" - - clocks + +allOf: + - $ref: /schemas/watchdog/watchdog.yaml# + + - if: + properties: + compatible: + contains: + enum: + - brcm,bcm2835-pm + - brcm,bcm2711-pm + then: + required: + - clocks + + properties: + reg: + minItems: 2 + + reg-names: + minItems: 2 + + else: + properties: + reg: + maxItems: 1 + + reg-names: + maxItems: 1 additionalProperties: false diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 5139705089f2..781129f78b06 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -92,6 +92,18 @@ GEM Atomic Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_gem_atomic_helper.c :export: +VBLANK Helper Reference +----------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_vblank_helper.c + :doc: overview + +.. kernel-doc:: include/drm/drm_vblank_helper.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_vblank_helper.c + :export: + Simple KMS Helper Reference =========================== @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 33 +SUBLEVEL = 34 EXTRAVERSION = NAME = Baby Opossum Posse diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 483965c5a4de..b154b4e3dfa8 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -5,4 +5,5 @@ generic-y += agp.h generic-y += asm-offsets.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 4c69522e0328..483caacc6988 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -5,5 +5,6 @@ generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += user.h generic-y += text-patching.h diff --git a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts index 3c3756509714..da552a66615e 100644 --- a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts +++ b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts @@ -34,9 +34,6 @@ clocks = <&mstp9_clks R7S72100_CLK_SPIBSC0>; power-domains = <&cpg_clocks>; - #address-cells = <1>; - #size-cells = <1>; - partitions { compatible = "fixed-partitions"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts index 91178fb9e721..3306bc9b7bc3 100644 --- a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts +++ b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts @@ -36,8 +36,6 @@ power-domains = <&cpg_clocks>; bank-width = <4>; device-width = <1>; - #address-cells = <1>; - #size-cells = <1>; partitions { compatible = "fixed-partitions"; diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 03657ff8fbe3..decad5f2c826 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += parport.h +generic-y += ring_buffer.h generated-y += mach-types.h generated-y += unistd-nr.h diff --git a/arch/arm/mach-versatile/integrator_cp.c b/arch/arm/mach-versatile/integrator_cp.c index 2ed4ded56b3f..03dfb5f720b7 100644 --- a/arch/arm/mach-versatile/integrator_cp.c +++ b/arch/arm/mach-versatile/integrator_cp.c @@ -86,14 +86,6 @@ static u64 notrace intcp_read_sched_clock(void) return val; } -static void __init intcp_init_early(void) -{ - cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator"); - if (IS_ERR(cm_map)) - return; - sched_clock_register(intcp_read_sched_clock, 32, 24000000); -} - static void __init intcp_init_irq_of(void) { cm_init(); @@ -119,6 +111,10 @@ static void __init intcp_init_of(void) { struct device_node *cpcon; + cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator"); + if (!IS_ERR(cm_map)) + sched_clock_register(intcp_read_sched_clock, 32, 24000000); + cpcon = of_find_matching_node(NULL, intcp_syscon_match); if (!cpcon) return; @@ -138,7 +134,6 @@ static const char * intcp_dt_board_compat[] = { DT_MACHINE_START(INTEGRATOR_CP_DT, "ARM Integrator/CP (Device Tree)") .reserve = integrator_reserve, .map_io = intcp_map_io, - .init_early = intcp_init_early, .init_irq = intcp_init_irq_of, .init_machine = intcp_init_of, .dt_compat = intcp_dt_board_compat, diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi index 205b87f557d6..d771694787b2 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi @@ -250,6 +250,15 @@ status = "disabled"; }; + pm: watchdog@7d200000 { + compatible = "brcm,bcm2712-pm", "brcm,bcm2835-pm-wdt"; + reg = <0x7d200000 0x604>; + reg-names = "pm"; + #power-domain-cells = <1>; + #reset-cells = <1>; + system-power-controller; + }; + pinctrl: pinctrl@7d504100 { compatible = "brcm,bcm2712c0-pinctrl"; reg = <0x7d504100 0x30>; diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 18c7811774d3..032cd7b76b48 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -409,7 +409,7 @@ __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) __AARCH64_INSN_FUNCS(cbnz, 0x7F000000, 0x35000000) __AARCH64_INSN_FUNCS(tbz, 0x7F000000, 0x36000000) __AARCH64_INSN_FUNCS(tbnz, 0x7F000000, 0x37000000) -__AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000) +__AARCH64_INSN_FUNCS(bcond, 0xFF000000, 0x54000000) __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) __AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 258cca4b4873..5062d8725fda 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,7 +33,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr); #define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio -bool tag_clear_highpages(struct page *to, int numpages); +bool tag_clear_highpages(struct page *to, int numpages, bool clear_pages); #define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES #define clear_user_page(page, vaddr, pg) clear_page(page) diff --git a/arch/arm64/include/asm/ring_buffer.h b/arch/arm64/include/asm/ring_buffer.h new file mode 100644 index 000000000000..62316c406888 --- /dev/null +++ b/arch/arm64/include/asm/ring_buffer.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_ARM64_RING_BUFFER_H +#define _ASM_ARM64_RING_BUFFER_H + +#include <asm/cacheflush.h> + +/* Flush D-cache on persistent ring buffer */ +#define arch_ring_buffer_flush_range(start, end) dcache_clean_pop(start, end) + +#endif /* _ASM_ARM64_RING_BUFFER_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index afc77977d4b9..e59cb36b5f36 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -490,8 +490,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_destroy_mpidr_data(vcpu->kvm); err = kvm_vgic_vcpu_init(vcpu); - if (err) + if (err) { + kvm_vgic_vcpu_destroy(vcpu); return err; + } err = kvm_share_hyp(vcpu, vcpu + 1); if (err) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 3f1c4b10fed9..a4c0fb7f02ea 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2307,6 +2307,10 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id, /* dte entry is valid */ offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + /* Mimic the MAPD behaviour and reject invalid EID bits. */ + if (num_eventid_bits > VITS_TYPER_IDBITS) + return -EINVAL; + if (!vgic_its_check_id(its, baser, id, NULL)) return -EINVAL; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a193b6a5d1e6..4c62082b9a3b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -967,7 +967,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, return vma_alloc_folio(flags, 0, vma, vaddr); } -bool tag_clear_highpages(struct page *page, int numpages) +bool tag_clear_highpages(struct page *page, int numpages, bool clear_pages) { /* * Check if MTE is supported and fall back to clear_highpage(). @@ -975,13 +975,16 @@ bool tag_clear_highpages(struct page *page, int numpages) * post_alloc_hook() will invoke tag_clear_highpages(). */ if (!system_supports_mte()) - return false; + return clear_pages; /* Newly allocated pages, shouldn't have been tagged yet */ for (int i = 0; i < numpages; i++, page++) { WARN_ON_ONCE(!try_page_mte_tagging(page)); - mte_zero_clear_page_tags(page_address(page)); + if (clear_pages) + mte_zero_clear_page_tags(page_address(page)); + else + mte_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } - return true; + return false; } diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 3a5c7f6e5aac..7dca0c6cdc84 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += qrwlock.h generic-y += qrwlock_types.h generic-y += qspinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += user.h generic-y += vmlinux.lds.h generic-y += text-patching.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 1efa1e993d4b..0f887d4238ed 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -5,4 +5,5 @@ generic-y += extable.h generic-y += iomap.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index b04d2cef935f..bdd9f0aa8f71 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -9,5 +9,6 @@ generic-y += qrwlock.h generic-y += user.h generic-y += ioctl.h generic-y += mmzone.h +generic-y += ring_buffer.h generic-y += statfs.h generic-y += text-patching.h diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c index 8ba391cfabb0..1985ed30dd16 100644 --- a/arch/loongarch/kernel/kprobes.c +++ b/arch/loongarch/kernel/kprobes.c @@ -60,16 +60,18 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); /* Install breakpoint in text */ void arch_arm_kprobe(struct kprobe *p) { - *p->addr = KPROBE_BP_INSN; - flush_insn_slot(p); + u32 insn = KPROBE_BP_INSN; + + larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE); } NOKPROBE_SYMBOL(arch_arm_kprobe); /* Remove breakpoint from text */ void arch_disarm_kprobe(struct kprobe *p) { - *p->addr = p->opcode; - flush_insn_slot(p); + u32 insn = p->opcode; + + larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -184,16 +186,16 @@ static bool reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { - case KPROBE_HIT_SS: case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: kprobes_inc_nmissed_count(p); setup_singlestep(p, regs, kcb, 1); break; + case KPROBE_HIT_SS: case KPROBE_REENTER: pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); - WARN_ON_ONCE(1); + BUG(); break; default: WARN_ON(1); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 6bfd4b8dad1b..900ce1da75a4 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -97,11 +97,7 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn); - /* With altmap the first mapped page is offset from @start */ - if (altmap) - page += vmem_altmap_offset(altmap); __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index b282e0dd8dc1..62543bf305ff 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -3,5 +3,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += spinlock.h generic-y += text-patching.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 7178f990e8b3..0030309b47ad 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += syscalls.h generic-y += tlb.h generic-y += user.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 684569b2ecd6..9771c3d85074 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -12,5 +12,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h +generic-y += ring_buffer.h generic-y += user.h generic-y += text-patching.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 28004301c236..0a2530964413 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cmpxchg.h generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += spinlock.h generic-y += user.h generic-y += text-patching.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index cef49d60d74c..8aa34621702d 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -8,4 +8,5 @@ generic-y += spinlock_types.h generic-y += spinlock.h generic-y += qrwlock_types.h generic-y += qrwlock.h +generic-y += ring_buffer.h generic-y += user.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 4fb596d94c89..d48d158f7241 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -4,4 +4,5 @@ generated-y += syscall_table_64.h generic-y += agp.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += user.h diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index f15e5920080b..e8718bc13eeb 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -83,11 +83,10 @@ config MSI_BITMAP_SELFTEST depends on DEBUG_KERNEL config GUEST_STATE_BUFFER_TEST - def_tristate n + def_tristate KUNIT_ALL_TESTS prompt "Enable Guest State Buffer unit tests" depends on KUNIT depends on KVM_BOOK3S_HV_POSSIBLE - default KUNIT_ALL_TESTS help The Guest State Buffer is a data format specified in the PAPR. It is by hcalls to communicate the state of L2 guests between diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 2e23533b67e3..805b5aeebb6f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -5,4 +5,5 @@ generated-y += syscall_table_spu.h generic-y += agp.h generic-y += mcs_spinlock.h generic-y += qrwlock.h +generic-y += ring_buffer.h generic-y += early_ioremap.h diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4bbeb8644d3d..b4472288e0d4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,6 +458,10 @@ DEFINE_PER_CPU(u8, irq_work_pending); #endif /* 32 vs 64 bit */ +/* + * Must be called with preemption disabled since it updates + * per-CPU irq_work state and programs the local CPU decrementer. + */ void arch_irq_work_raise(void) { /* @@ -471,10 +475,8 @@ void arch_irq_work_raise(void) * which could get tangled up if we're messing with the same state * here. */ - preempt_disable(); set_irq_work_pending_flag(); set_dec(1); - preempt_enable(); } static void set_dec_or_work(u64 val) diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 99f0f0f41876..4ad223525e89 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -27,8 +27,8 @@ static void __init km82xx_pic_init(void) { - struct device_node *np __free(device_node); - np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic"); + struct device_node *np __free(device_node) = of_find_compatible_node(NULL, + NULL, "fsl,pq2-pic"); if (!np) { pr_err("PIC init: can not find cpm-pic node\n"); diff --git a/arch/riscv/errata/mips/errata.c b/arch/riscv/errata/mips/errata.c index e984a8152208..2c3dc2259e93 100644 --- a/arch/riscv/errata/mips/errata.c +++ b/arch/riscv/errata/mips/errata.c @@ -57,7 +57,7 @@ void mips_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, } tmp = (1U << alt->patch_id); - if (cpu_req_errata && tmp) { + if (cpu_req_errata & tmp) { mutex_lock(&text_mutex); patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt), alt->alt_len); diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index bd5fc9403295..7721b63642f4 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -14,5 +14,6 @@ generic-y += ticket_spinlock.h generic-y += qrwlock.h generic-y += qrwlock_types.h generic-y += qspinlock.h +generic-y += ring_buffer.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index a2fae70ee174..ef55089c0c1a 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -431,8 +431,10 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s } kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); - if (!kvpmu->sdata) - return -ENOMEM; + if (!kvpmu->sdata) { + sbiret = SBI_ERR_FAILURE; + goto out; + } /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { @@ -476,8 +478,10 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low } einfo = kzalloc(shmem_size, GFP_KERNEL); - if (!einfo) - return -ENOMEM; + if (!einfo) { + ret = SBI_ERR_FAILURE; + goto out; + } ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); if (ret) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d85efe74a4b6..ee40ca01ac66 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -852,6 +852,27 @@ static void __init set_mmap_rnd_bits_max(void) mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; } +static bool __init is_vaddr_valid(unsigned long va) +{ + unsigned long up = 0; + + switch (satp_mode) { + case SATP_MODE_39: + up = 1UL << 38; + break; + case SATP_MODE_48: + up = 1UL << 47; + break; + case SATP_MODE_57: + up = 1UL << 56; + break; + default: + return false; + } + + return (va < up) || (va >= (ULONG_MAX - up + 1)); +} + /* * There is a simple way to determine if 4-level is supported by the * underlying hardware: establish 1:1 mapping in 4-level page table mode @@ -893,6 +914,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa) set_satp_mode_pmd + PMD_SIZE, PMD_SIZE, PAGE_KERNEL_EXEC); retry: + if (!is_vaddr_valid(set_satp_mode_pmd)) + goto out; + create_pgd_mapping(early_pg_dir, set_satp_mode_pmd, pgtable_l5_enabled ? @@ -915,6 +939,7 @@ retry: disable_pgtable_l4(); } +out: memset(early_pg_dir, 0, PAGE_SIZE); memset(early_p4d, 0, PAGE_SIZE); memset(early_pud, 0, PAGE_SIZE); diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 297bf7157968..2b367fa4de8e 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += asm-offsets.h generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += mmzone.h +generic-y += ring_buffer.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 4d3f10ed8275..f0403d3ee8ab 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -3,4 +3,5 @@ generated-y += syscall_table.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 17ee8a273aa6..49c6bb326b75 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -4,4 +4,5 @@ generated-y += syscall_table_64.h generic-y += agp.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b6810db24ca4..9be3ee2e3701 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += module.lds.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h +generic-y += ring_buffer.h generic-y += runtime-const.h generic-y += softirq_stack.h generic-y += switch_to.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4566000e15c4..078fd2c0d69d 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -14,3 +14,4 @@ generic-y += early_ioremap.h generic-y += fprobe.h generic-y += mcs_spinlock.h generic-y += mmzone.h +generic-y += ring_buffer.h diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 460e90a1a0b1..c8b112c6c549 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -89,7 +89,6 @@ struct mca_config mca_cfg __read_mostly = { }; static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen); -static unsigned long mce_need_notify; /* * MCA banks polled by the period polling timer for corrected events. @@ -151,8 +150,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm); void mce_log(struct mce_hw_err *err) { - if (mce_gen_pool_add(err)) + if (mce_gen_pool_add(err)) { + pr_info(HW_ERR "Machine check events logged\n"); irq_work_queue(&mce_irq_work); + } } EXPORT_SYMBOL_GPL(mce_log); @@ -584,28 +585,6 @@ bool mce_is_correctable(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_correctable); -/* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. - */ -static bool mce_notify_irq(void) -{ - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - - if (test_and_clear_bit(0, &mce_need_notify)) { - mce_work_trigger(); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - - return true; - } - - return false; -} - static int mce_early_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -617,9 +596,7 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val, /* Emit the trace record: */ trace_mce_record(err); - set_bit(0, &mce_need_notify); - - mce_notify_irq(); + mce_work_trigger(); return NOTIFY_DONE; } @@ -1771,7 +1748,7 @@ static void mce_timer_fn(struct timer_list *t) * Alert userspace if needed. If we logged an MCE, reduce the polling * interval, otherwise increase the polling interval. */ - if (mce_notify_irq()) + if (!mce_gen_pool_empty()) iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index ba6e9485e824..ba844088581f 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1252,12 +1252,14 @@ bool __init avic_hardware_setup(void) svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true; /* - * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) - * due to erratum 1235, which results in missed VM-Exits on the sender - * and thus missed wake events for blocking vCPUs due to the CPU - * failing to see a software update to clear IsRunning. + * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and + * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235, + * which results in missed VM-Exits on the sender and thus missed wake + * events for blocking vCPUs due to the CPU failing to see a software + * update to clear IsRunning. */ - enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17; + if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) + enable_ipiv = false; amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3823e52aef52..6260f65a78c5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -655,7 +655,7 @@ static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry) /* Fill new entry (keep size and page offset). */ entry->type = swap_entry->type; entry->addr = entry_end - swap_size + - swap_addr - swap_entry->addr; + swap_entry->addr - swap_addr; entry->size = swap_entry->size; /* Convert old entry to RAM, align to pages. */ diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 13fe45dea296..e57af619263a 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -6,5 +6,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h +generic-y += ring_buffer.h generic-y += user.h generic-y += text-patching.h diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 12d887349c26..46c59ed92bd1 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -205,7 +205,6 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, } bip->bip_flags |= BIP_COPY_USER; - bip->bip_vcnt = nr_vecs; return 0; free_bip: bio_integrity_free(bio); @@ -300,6 +299,24 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) if (unlikely(ret < 0)) goto free_bvec; + /* + * Handle partial pinning. This can happen when pin_user_pages_fast() + * returns fewer pages than requested. + */ + if (user_backed_iter(iter) && unlikely(ret != bytes)) { + if (ret > 0) { + int npinned = DIV_ROUND_UP(offset + ret, PAGE_SIZE); + int i; + + for (i = 0; i < npinned; i++) + unpin_user_page(pages[i]); + } + if (pages != stack_pages) + kvfree(pages); + ret = -EFAULT; + goto free_bvec; + } + nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset, &is_p2p); if (pages != stack_pages) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9a8d047be580..f1ea69743c54 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -2241,7 +2241,7 @@ void blk_cgroup_bio_start(struct bio *bio) } u64_stats_update_end_irqrestore(&bis->sync, flags); - css_rstat_updated(&blkcg->css, cpu); + __css_rstat_updated(&blkcg->css, cpu); put_cpu(); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 4ebb92014eae..ab05c5c9e6ae 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3285,6 +3285,25 @@ blk_status_t blk_insert_cloned_request(struct request *rq) return BLK_STS_IOERR; } + /* + * Integrity segment counting depends on the same queue limits + * (virt_boundary_mask, seg_boundary_mask, max_segment_size) that + * vary across stacked queues, so recompute against the bottom + * queue just like nr_phys_segments above. + */ + if (blk_integrity_rq(rq) && rq->bio) { + unsigned short max_int_segs = queue_max_integrity_segments(q); + + rq->nr_integrity_segments = + blk_rq_count_integrity_sg(rq->q, rq->bio); + if (rq->nr_integrity_segments > max_int_segs) { + printk(KERN_ERR "%s: over max integrity segments limit. (%u > %u)\n", + __func__, rq->nr_integrity_segments, + max_int_segs); + return BLK_STS_IOERR; + } + } + if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; diff --git a/crypto/krb5/krb5_api.c b/crypto/krb5/krb5_api.c index 23026d4206c8..c7ea40f900a7 100644 --- a/crypto/krb5/krb5_api.c +++ b/crypto/krb5/krb5_api.c @@ -134,27 +134,69 @@ EXPORT_SYMBOL(crypto_krb5_how_much_data); * Find the offset and size of the data in a secure message so that this * information can be used in the metadata buffer which will get added to the * digest by crypto_krb5_verify_mic(). + * + * Return: 0 if successful, -EBADMSG if the message is too short or -EINVAL if + * the mode is unsupported. */ -void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, - enum krb5_crypto_mode mode, - size_t *_offset, size_t *_len) +int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t *_offset, size_t *_len) { switch (mode) { case KRB5_CHECKSUM_MODE: + if (*_len < krb5->cksum_len) + return -EBADMSG; *_offset += krb5->cksum_len; *_len -= krb5->cksum_len; - return; + return 0; case KRB5_ENCRYPT_MODE: + if (*_len < krb5->conf_len + krb5->cksum_len) + return -EBADMSG; *_offset += krb5->conf_len; *_len -= krb5->conf_len + krb5->cksum_len; - return; + return 0; default: WARN_ON_ONCE(1); - return; + return -EINVAL; } } EXPORT_SYMBOL(crypto_krb5_where_is_the_data); +/** + * crypto_krb5_check_data_len - Check a message is big enough + * @krb5: The encoding to use. + * @mode: Mode of operation. + * @len: The length of the secure blob. + * @min_content: Minimum length of the content inside the blob. + * + * Check that a message is large enough to hold whatever bits the encryption + * type wants to glue on (nonce, checksum) plus a minimum amount of content. + * + * Return: 0 if successful, -EBADMSG if the message is too short or -EINVAL if + * the mode is unsupported. + */ +int crypto_krb5_check_data_len(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t len, size_t min_content) +{ + switch (mode) { + case KRB5_CHECKSUM_MODE: + if (len < krb5->cksum_len || + len - krb5->cksum_len < min_content) + return -EBADMSG; + return 0; + case KRB5_ENCRYPT_MODE: + if (len < krb5->conf_len + krb5->cksum_len || + len - (krb5->conf_len + krb5->cksum_len) < min_content) + return -EBADMSG; + return 0; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} +EXPORT_SYMBOL(crypto_krb5_check_data_len); + /* * Prepare the encryption with derived key data. */ diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index c4f117edb266..3335c92b0d7d 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -605,8 +605,11 @@ static const struct vm_operations_struct drm_vm_ops = { static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct qaic_bo *bo = to_qaic_bo(obj); + unsigned long remap_start; unsigned long offset = 0; + unsigned long remap_end; struct scatterlist *sg; + unsigned long length; int ret = 0; if (drm_gem_is_imported(obj)) @@ -614,11 +617,27 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { if (sg_page(sg)) { + /* if sg is too large for the VMA, so truncate it to fit */ + if (check_add_overflow(vma->vm_start, offset, &remap_start)) + return -EINVAL; + if (check_add_overflow(remap_start, sg->length, &remap_end)) + return -EINVAL; + + if (remap_end > vma->vm_end) { + if (check_sub_overflow(vma->vm_end, remap_start, &length)) + return -EINVAL; + } else { + length = sg->length; + } + + if (length == 0) + goto out; + ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)), - sg->length, vma->vm_page_prot); + length, vma->vm_page_prot); if (ret) goto out; - offset += sg->length; + offset += length; } } diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4fb8990d22d5..5ee4adc62342 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5499,6 +5499,7 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp) link->pmp = pmp; link->active_tag = ATA_TAG_POISON; link->hw_sata_spd_limit = UINT_MAX; + INIT_WORK(&link->deferred_qc_work, ata_scsi_deferred_qc_work); /* can't use iterator, ap isn't initialized yet */ for (i = 0; i < ATA_MAX_DEVICES; i++) { @@ -5581,7 +5582,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host) mutex_init(&ap->scsi_scan_mutex); INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug); INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); - INIT_WORK(&ap->deferred_qc_work, ata_scsi_deferred_qc_work); INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); init_completion(&ap->park_req_pending); @@ -6204,12 +6204,15 @@ static void ata_port_detach(struct ata_port *ap) /* It better be dead now and not have any remaining deferred qc. */ WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); - WARN_ON(ap->deferred_qc); - cancel_work_sync(&ap->deferred_qc_work); cancel_delayed_work_sync(&ap->hotplug_task); cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_for_each_link(link, ap, PMP_FIRST) { + WARN_ON(link->deferred_qc); + cancel_work_sync(&link->deferred_qc_work); + } + /* Delete port multiplier link transport devices */ if (ap->pmp_link) { int i; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 23be85418b3b..5e8a63206108 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -643,11 +643,11 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, if (qc->scsicmd != scmd) continue; if ((qc->flags & ATA_QCFLAG_ACTIVE) || - qc == ap->deferred_qc) + qc == qc->dev->link->deferred_qc) break; } - if (i < ATA_MAX_QUEUE && qc == ap->deferred_qc) { + if (i < ATA_MAX_QUEUE && qc == qc->dev->link->deferred_qc) { /* * This is a deferred command that timed out while * waiting for the command queue to drain. Since the qc @@ -658,8 +658,8 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, * deferred qc work from issuing this qc. */ WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); - ap->deferred_qc = NULL; - cancel_work(&ap->deferred_qc_work); + qc->dev->link->deferred_qc = NULL; + cancel_work(&qc->dev->link->deferred_qc_work); set_host_byte(scmd, DID_TIME_OUT); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } else if (i < ATA_MAX_QUEUE) { diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 57023324a56f..48ac09d9031e 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -110,13 +110,24 @@ int sata_pmp_qc_defer_cmd_switch(struct ata_queued_cmd *qc) { struct ata_link *link = qc->dev->link; struct ata_port *ap = link->ap; + int ret; if (ap->excl_link == NULL || ap->excl_link == link) { if (ap->nr_active_links == 0 || ata_link_active(link)) { qc->flags |= ATA_QCFLAG_CLEAR_EXCL; - return ata_std_qc_defer(qc); + ret = ata_std_qc_defer(qc); + if (ret == ATA_DEFER_LINK) + return ATA_DEFER_LINK_EXCL; + return ret; } + /* + * Note: ap->excl_link contains the link that is next in line, + * i.e. implicit round robin. If there is only one link + * dispatching, ap->excl_link will be left unclaimed, allowing + * other links to set ap->excl_link, ensuring that the currently + * active link cannot queue any more. + */ ap->excl_link = link; } @@ -571,8 +582,11 @@ static void sata_pmp_detach(struct ata_device *dev) if (ap->ops->pmp_detach) ap->ops->pmp_detach(ap); - ata_for_each_link(tlink, ap, EDGE) + ata_for_each_link(tlink, ap, EDGE) { + WARN_ON(tlink->deferred_qc); + cancel_work_sync(&tlink->deferred_qc_work); ata_eh_detach_dev(tlink->device); + } spin_lock_irqsave(ap->lock, flags); ap->nr_pmp_links = 0; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e7c78b8d3c2c..c7d2addf3487 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1659,8 +1659,9 @@ static void ata_qc_done(struct ata_queued_cmd *qc) void ata_scsi_deferred_qc_work(struct work_struct *work) { - struct ata_port *ap = - container_of(work, struct ata_port, deferred_qc_work); + struct ata_link *link = + container_of(work, struct ata_link, deferred_qc_work); + struct ata_port *ap = link->ap; struct ata_queued_cmd *qc; unsigned long flags; @@ -1671,10 +1672,10 @@ void ata_scsi_deferred_qc_work(struct work_struct *work) * such case, we should not need any more deferring the qc, so warn if * qc_defer() says otherwise. */ - qc = ap->deferred_qc; + qc = link->deferred_qc; if (qc && !ata_port_eh_scheduled(ap)) { WARN_ON_ONCE(ap->ops->qc_defer(qc)); - ap->deferred_qc = NULL; + link->deferred_qc = NULL; ata_qc_issue(qc); } @@ -1683,8 +1684,7 @@ void ata_scsi_deferred_qc_work(struct work_struct *work) void ata_scsi_requeue_deferred_qc(struct ata_port *ap) { - struct ata_queued_cmd *qc = ap->deferred_qc; - struct scsi_cmnd *scmd; + struct ata_link *link; lockdep_assert_held(ap->lock); @@ -1693,20 +1693,25 @@ void ata_scsi_requeue_deferred_qc(struct ata_port *ap) * do not try to be smart about what to do with this deferred command * and simply requeue it by completing it with DID_REQUEUE. */ - if (!qc) - return; - - scmd = qc->scsicmd; - ap->deferred_qc = NULL; - cancel_work(&ap->deferred_qc_work); - ata_qc_free(qc); - scmd->result = (DID_REQUEUE << 16); - scsi_done(scmd); + ata_for_each_link(link, ap, PMP_FIRST) { + struct ata_queued_cmd *qc = link->deferred_qc; + struct scsi_cmnd *scmd; + + if (qc) { + scmd = qc->scsicmd; + link->deferred_qc = NULL; + cancel_work(&link->deferred_qc_work); + ata_qc_free(qc); + scmd->result = (DID_REQUEUE << 16); + scsi_done(scmd); + } + } } -static void ata_scsi_schedule_deferred_qc(struct ata_port *ap) +static void ata_scsi_schedule_deferred_qc(struct ata_link *link) { - struct ata_queued_cmd *qc = ap->deferred_qc; + struct ata_queued_cmd *qc = link->deferred_qc; + struct ata_port *ap = link->ap; lockdep_assert_held(ap->lock); @@ -1723,12 +1728,12 @@ static void ata_scsi_schedule_deferred_qc(struct ata_port *ap) return; } if (!ap->ops->qc_defer(qc)) - queue_work(system_highpri_wq, &ap->deferred_qc_work); + queue_work(system_highpri_wq, &link->deferred_qc_work); } static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) { - struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID; @@ -1759,22 +1764,23 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) ata_qc_done(qc); - ata_scsi_schedule_deferred_qc(ap); + ata_scsi_schedule_deferred_qc(link); } static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) { + struct ata_link *link = qc->dev->link; int ret; if (!ap->ops->qc_defer) - goto issue; + goto issue_qc; /* * If we already have a deferred qc, then rely on the SCSI layer to * requeue and defer all incoming commands until the deferred qc is * processed, once all on-going commands complete. */ - if (ap->deferred_qc) { + if (link->deferred_qc) { ata_qc_free(qc); return SCSI_MLQUEUE_DEVICE_BUSY; } @@ -1786,38 +1792,46 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) break; case ATA_DEFER_LINK: ret = SCSI_MLQUEUE_DEVICE_BUSY; - break; + goto defer_qc; + case ATA_DEFER_LINK_EXCL: + /* + * Drivers making use of ap->excl_link cannot store the QC in + * link->deferred_qc, because the ap->excl_link handling is + * incompatible with the link->deferred_qc workqueue handling. + */ + ret = SCSI_MLQUEUE_DEVICE_BUSY; + goto free_qc; case ATA_DEFER_PORT: ret = SCSI_MLQUEUE_HOST_BUSY; - break; + goto free_qc; default: WARN_ON_ONCE(1); ret = SCSI_MLQUEUE_HOST_BUSY; - break; + goto free_qc; } - if (ret) { - /* - * We must defer this qc: if this is not an NCQ command, keep - * this qc as a deferred one and report to the SCSI layer that - * we issued it so that it is not requeued. The deferred qc will - * be issued with the port deferred_qc_work once all on-going - * commands complete. - */ - if (!ata_is_ncq(qc->tf.protocol)) { - ap->deferred_qc = qc; - return 0; - } +issue_qc: + ata_qc_issue(qc); + return 0; - /* Force a requeue of the command to defer its execution. */ - ata_qc_free(qc); - return ret; +defer_qc: + /* + * We must defer this qc: if this is not an NCQ command, keep + * this qc as a deferred one and report to the SCSI layer that + * we issued it so that it is not requeued. The deferred qc will + * be issued with the port deferred_qc_work once all on-going + * commands complete. + */ + if (!ata_is_ncq(qc->tf.protocol)) { + link->deferred_qc = qc; + return 0; } -issue: - ata_qc_issue(qc); +free_qc: + /* Force a requeue of the command to defer its execution. */ + ata_qc_free(qc); - return 0; + return ret; } /** diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index d642ece9f07a..57f1081b86db 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -789,6 +789,7 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc) struct ata_link *link = qc->dev->link; struct ata_port *ap = link->ap; u8 prot = qc->tf.protocol; + int ret; /* * There is a bug in the chip: @@ -826,7 +827,10 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc) qc->flags |= ATA_QCFLAG_CLEAR_EXCL; } - return ata_std_qc_defer(qc); + ret = ata_std_qc_defer(qc); + if (ret == ATA_DEFER_LINK) + return ATA_DEFER_LINK_EXCL; + return ret; } static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 6d84a02cfa5d..0ecb43556736 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -1251,8 +1251,10 @@ void memblk_nr_poison_inc(unsigned long pfn) const unsigned long block_id = pfn_to_block_id(pfn); struct memory_block *mem = find_memory_block_by_id(block_id); - if (mem) + if (mem) { atomic_long_inc(&mem->nr_hwpoison); + put_device(&mem->dev); + } } void memblk_nr_poison_sub(unsigned long pfn, long i) @@ -1260,8 +1262,10 @@ void memblk_nr_poison_sub(unsigned long pfn, long i) const unsigned long block_id = pfn_to_block_id(pfn); struct memory_block *mem = find_memory_block_by_id(block_id); - if (mem) + if (mem) { atomic_long_sub(i, &mem->nr_hwpoison); + put_device(&mem->dev); + } } static unsigned long memblk_nr_poison(struct memory_block *mem) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9236b5184bce..97cd8cf7bad3 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4565,24 +4565,12 @@ out: return ret; } -static void cancel_tasks_sync(struct rbd_device *rbd_dev) -{ - dout("%s rbd_dev %p\n", __func__, rbd_dev); - - cancel_work_sync(&rbd_dev->acquired_lock_work); - cancel_work_sync(&rbd_dev->released_lock_work); - cancel_delayed_work_sync(&rbd_dev->lock_dwork); - cancel_work_sync(&rbd_dev->unlock_work); -} - /* * header_rwsem must not be held to avoid a deadlock with * rbd_dev_refresh() when flushing notifies. */ static void rbd_unregister_watch(struct rbd_device *rbd_dev) { - cancel_tasks_sync(rbd_dev); - mutex_lock(&rbd_dev->watch_mutex); if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) __rbd_unregister_watch(rbd_dev); @@ -6548,10 +6536,18 @@ out_err: static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) { + dout("%s rbd_dev %p\n", __func__, rbd_dev); + + disable_delayed_work_sync(&rbd_dev->lock_dwork); + disable_work_sync(&rbd_dev->unlock_work); + down_write(&rbd_dev->lock_rwsem); if (__rbd_is_lock_owner(rbd_dev)) __rbd_release_lock(rbd_dev); up_write(&rbd_dev->lock_rwsem); + + flush_work(&rbd_dev->acquired_lock_work); + flush_work(&rbd_dev->released_lock_work); } /* diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2729b1556e81..c339222513b0 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -601,6 +601,9 @@ static int ublk_validate_params(const struct ublk_device *ub) if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9)) return -EINVAL; + if (p->max_sectors < PAGE_SECTORS) + return -EINVAL; + if (ublk_dev_is_zoned(ub) && !p->chunk_sectors) return -EINVAL; } else diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index c68a8de3025b..d0aa1666d6ac 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -567,12 +567,10 @@ static int btintel_pcie_get_mac_access(struct btintel_pcie_data *data) reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG); - reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS; - reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ; - if ((reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_STS) == 0) + if (!(reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ)) { reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ; - - btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + } do { reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG); @@ -592,16 +590,10 @@ static void btintel_pcie_release_mac_access(struct btintel_pcie_data *data) reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG); - if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ) + if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ) { reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ; - - if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS) - reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS; - - if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ) - reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ; - - btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + } } static void *btintel_pcie_copy_tlv(void *dest, enum btintel_pcie_tlv_type type, diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 48e1ae1793e5..481de85456e2 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -34,9 +34,6 @@ #define BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_STS (BIT(20)) #define BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ (BIT(21)) -/* Stop MAC Access disconnection request */ -#define BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS (BIT(22)) -#define BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ (BIT(23)) #define BTINTEL_PCIE_CSR_FUNC_CTRL_BUS_MASTER_STS (BIT(28)) #define BTINTEL_PCIE_CSR_FUNC_CTRL_BUS_MASTER_DISCON (BIT(29)) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index cf05746e9db5..38ba8f2bd2f9 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -496,6 +496,7 @@ static void btmtk_usb_wmt_recv(struct urb *urb) return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ + kfree(urb->setup_packet); return; } @@ -569,6 +570,7 @@ static int btmtk_usb_submit_wmt_recv_urb(struct hci_dev *hdev) if (err != -EPERM && err != -ENODEV) bt_dev_err(hdev, "urb %p submission failed (%d)", urb, -err); + kfree(dr); usb_unanchor_urb(urb); } diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 5455990ab211..10e936b87cc8 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -194,7 +194,15 @@ void hci_uart_init_work(struct work_struct *work) err = hci_register_dev(hu->hdev); if (err < 0) { BT_ERR("Can't register HCI device"); + + percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); + percpu_up_write(&hu->proto_lock); + + /* Safely cancel work after clearing flags */ + cancel_work_sync(&hu->write_work); + + /* Close protocol before freeing hdev */ hu->proto->close(hu); hdev = hu->hdev; hu->hdev = NULL; @@ -263,8 +271,12 @@ static int hci_uart_open(struct hci_dev *hdev) /* Close device */ static int hci_uart_close(struct hci_dev *hdev) { + struct hci_uart *hu = hci_get_drvdata(hdev); + BT_DBG("hdev %p", hdev); + cancel_work_sync(&hu->write_work); + hci_uart_flush(hdev); hdev->flush = NULL; return 0; @@ -531,6 +543,7 @@ static void hci_uart_tty_close(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; struct hci_dev *hdev; + bool proto_ready; BT_DBG("tty %p", tty); @@ -540,24 +553,38 @@ static void hci_uart_tty_close(struct tty_struct *tty) if (!hu) return; - hdev = hu->hdev; - if (hdev) - hci_uart_close(hdev); + /* Wait for init_ready to finish to prevent registration races */ + cancel_work_sync(&hu->init_ready); - if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { + proto_ready = test_bit(HCI_UART_PROTO_READY, &hu->flags); + if (proto_ready) { percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); percpu_up_write(&hu->proto_lock); + } - cancel_work_sync(&hu->init_ready); - cancel_work_sync(&hu->write_work); + /* + * Unconditionally cancel write_work AFTER clearing PROTO_READY. + * This ensures that concurrent protocol timers cannot requeue + * write_work via hci_uart_tx_wakeup(), permanently preventing + * double-free races and UAFs. + */ + cancel_work_sync(&hu->write_work); + + hdev = hu->hdev; + if (hdev) + hci_uart_close(hdev); /* proto->flush is safely skipped */ + if (proto_ready) { if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) hci_unregister_dev(hdev); - hci_free_dev(hdev); } + /* Close protocol before freeing hdev (intrinsically purges queues) */ hu->proto->close(hu); + + if (hdev) + hci_free_dev(hdev); } clear_bit(HCI_UART_PROTO_SET, &hu->flags); @@ -625,11 +652,12 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, * tty caller */ hu->proto->recv(hu, data, count); - percpu_up_read(&hu->proto_lock); if (hu->hdev) hu->hdev->stat.byte_rx += count; + percpu_up_read(&hu->proto_lock); + tty_unthrottle(tty); } @@ -695,6 +723,10 @@ static int hci_uart_register_dev(struct hci_uart *hu) percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_INIT, &hu->flags); percpu_up_write(&hu->proto_lock); + /* Cancel work after clearing flags */ + cancel_work_sync(&hu->write_work); + + /* Close protocol before freeing hdev */ hu->proto->close(hu); hu->hdev = NULL; hci_free_dev(hdev); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656..e7a6452bf544 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -311,6 +311,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * cxl_payload_from_user_allowed() - Check contents of in_payload. * @opcode: The mailbox command opcode. * @payload_in: Pointer to the input payload passed in from user space. + * @in_size: Size of @payload_in in bytes. * * Return: * * true - payload_in passes check for @opcode. @@ -325,12 +326,15 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * * The specific checks are determined by the opcode. */ -static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in, + size_t in_size) { switch (opcode) { case CXL_MBOX_OP_SET_PARTITION_INFO: { struct cxl_mbox_set_partition_info *pi = payload_in; + if (in_size < sizeof(*pi)) + return false; if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) return false; break; @@ -338,6 +342,8 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) case CXL_MBOX_OP_CLEAR_LOG: { const uuid_t *uuid = (uuid_t *)payload_in; + if (in_size < sizeof(uuid_t)) + return false; /* * Restrict the ‘Clear log’ action to only apply to * Vendor debug logs. @@ -365,7 +371,8 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd, if (IS_ERR(mbox_cmd->payload_in)) return PTR_ERR(mbox_cmd->payload_in); - if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) { + if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in, + in_size)) { dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); kvfree(mbox_cmd->payload_in); diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c index 50bfe56c755e..6779923e35c5 100644 --- a/drivers/firmware/arm_ffa/bus.c +++ b/drivers/firmware/arm_ffa/bus.c @@ -26,6 +26,8 @@ static int ffa_device_match(struct device *dev, const struct device_driver *drv) id_table = to_ffa_driver(drv)->id_table; ffa_dev = to_ffa_dev(dev); + if (!id_table) + return 0; while (!uuid_is_null(&id_table->uuid)) { /* @@ -123,7 +125,7 @@ int ffa_driver_register(struct ffa_driver *driver, struct module *owner, { int ret; - if (!driver->probe) + if (!driver->probe || !driver->id_table) return -EINVAL; driver->driver.bus = &ffa_bus_type; diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index d71a2ef335d1..827aac08a8e9 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -100,6 +100,7 @@ struct ffa_drv_info { bool mem_ops_native; bool msg_direct_req2_supp; bool bitmap_created; + bool bus_notifier_registered; bool notif_enabled; unsigned int sched_recv_irq; unsigned int notif_pend_irq; @@ -318,6 +319,12 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, #define PART_INFO_ID_MASK GENMASK(15, 0) #define PART_INFO_EXEC_CXT_MASK GENMASK(31, 16) #define PART_INFO_PROPS_MASK GENMASK(63, 32) +#define FFA_PART_INFO_GET_REGS_FIRST_REG 3 +#define FFA_PART_INFO_GET_REGS_REGS_PER_DESC 3 +#define FFA_PART_INFO_GET_REGS_MAX_DESC \ + (((sizeof(ffa_value_t) / sizeof_field(ffa_value_t, a0)) - \ + FFA_PART_INFO_GET_REGS_FIRST_REG) / \ + FFA_PART_INFO_GET_REGS_REGS_PER_DESC) #define PART_INFO_ID(x) ((u16)(FIELD_GET(PART_INFO_ID_MASK, (x)))) #define PART_INFO_EXEC_CXT(x) ((u16)(FIELD_GET(PART_INFO_EXEC_CXT_MASK, (x)))) #define PART_INFO_PROPERTIES(x) ((u32)(FIELD_GET(PART_INFO_PROPS_MASK, (x)))) @@ -325,15 +332,13 @@ static int __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, struct ffa_partition_info *buffer, int num_parts) { - u16 buf_sz, start_idx, cur_idx, count = 0, prev_idx = 0, tag = 0; + u16 buf_sz, start_idx = 0, cur_idx, count = 0, tag = 0; struct ffa_partition_info *buf = buffer; ffa_value_t partition_info; do { __le64 *regs; - int idx; - - start_idx = prev_idx ? prev_idx + 1 : 0; + int idx, nr_desc, buf_idx; invoke_ffa_fn((ffa_value_t){ .a0 = FFA_PARTITION_INFO_GET_REGS, @@ -349,15 +354,28 @@ __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, count = PARTITION_COUNT(partition_info.a2); if (!buffer || !num_parts) /* count only */ return count; + if (count > num_parts) + return -EINVAL; cur_idx = CURRENT_INDEX(partition_info.a2); + if (cur_idx < start_idx || cur_idx >= count) + return -EINVAL; + + nr_desc = cur_idx - start_idx + 1; + if (nr_desc > FFA_PART_INFO_GET_REGS_MAX_DESC) + return -EINVAL; + + buf_idx = buf - buffer; + if (buf_idx + nr_desc > num_parts) + return -EINVAL; + tag = UUID_INFO_TAG(partition_info.a2); buf_sz = PARTITION_INFO_SZ(partition_info.a2); if (buf_sz > sizeof(*buffer)) buf_sz = sizeof(*buffer); regs = (void *)&partition_info.a3; - for (idx = 0; idx < cur_idx - start_idx + 1; idx++, buf++) { + for (idx = 0; idx < nr_desc; idx++, buf++) { union { uuid_t uuid; u64 regs[2]; @@ -375,7 +393,7 @@ __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, uuid_copy(&buf->uuid, &uuid_regs.uuid); regs += 3; } - prev_idx = cur_idx; + start_idx = cur_idx + 1; } while (cur_idx < (count - 1)); @@ -1185,7 +1203,7 @@ static int ffa_sched_recv_cb_update(struct ffa_device *dev, ffa_sched_recv_cb callback, void *cb_data, bool is_registration) { - struct ffa_dev_part_info *partition = NULL, *tmp; + struct ffa_dev_part_info *partition = NULL; struct list_head *phead; bool cb_valid; @@ -1198,11 +1216,11 @@ ffa_sched_recv_cb_update(struct ffa_device *dev, ffa_sched_recv_cb callback, return -EINVAL; } - list_for_each_entry_safe(partition, tmp, phead, node) + list_for_each_entry(partition, phead, node) if (partition->dev == dev) break; - if (!partition) { + if (&partition->node == phead) { pr_err("%s: No such partition ID 0x%x\n", __func__, dev->vm_id); return -EINVAL; } @@ -1441,20 +1459,25 @@ static int ffa_notify_send(struct ffa_device *dev, int notify_id, static void handle_notif_callbacks(u64 bitmap, enum notify_type type) { + ffa_notifier_cb cb; + void *cb_data; int notify_id; - struct notifier_cb_info *cb_info = NULL; for (notify_id = 0; notify_id <= FFA_MAX_NOTIFICATIONS && bitmap; notify_id++, bitmap >>= 1) { if (!(bitmap & 1)) continue; - read_lock(&drv_info->notify_lock); - cb_info = notifier_hnode_get_by_type(notify_id, type); - read_unlock(&drv_info->notify_lock); + scoped_guard(read_lock, &drv_info->notify_lock) { + struct notifier_cb_info *cb_info; - if (cb_info && cb_info->cb) - cb_info->cb(notify_id, cb_info->cb_data); + cb_info = notifier_hnode_get_by_type(notify_id, type); + cb = cb_info ? cb_info->cb : NULL; + cb_data = cb_info ? cb_info->cb_data : NULL; + } + + if (cb) + cb(notify_id, cb_data); } } @@ -1462,39 +1485,56 @@ static void handle_fwk_notif_callbacks(u32 bitmap) { void *buf; uuid_t uuid; + void *fwk_cb_data; int notify_id = 0, target; + ffa_fwk_notifier_cb fwk_cb; struct ffa_indirect_msg_hdr *msg; - struct notifier_cb_info *cb_info = NULL; + size_t min_offset = offsetof(struct ffa_indirect_msg_hdr, uuid); /* Only one framework notification defined and supported for now */ if (!(bitmap & FRAMEWORK_NOTIFY_RX_BUFFER_FULL)) return; - mutex_lock(&drv_info->rx_lock); + scoped_guard(mutex, &drv_info->rx_lock) { + u32 offset, size; - msg = drv_info->rx_buffer; - buf = kmemdup((void *)msg + msg->offset, msg->size, GFP_KERNEL); - if (!buf) { - mutex_unlock(&drv_info->rx_lock); - return; - } + msg = drv_info->rx_buffer; + offset = msg->offset; + size = msg->size; - target = SENDER_ID(msg->send_recv_id); - if (msg->offset >= sizeof(*msg)) - uuid_copy(&uuid, &msg->uuid); - else - uuid_copy(&uuid, &uuid_null); + if (!size || (offset != min_offset && offset < sizeof(*msg)) || + offset > drv_info->rxtx_bufsz || + size > drv_info->rxtx_bufsz - offset) { + pr_err("invalid framework notification message\n"); + ffa_rx_release(); + return; + } - mutex_unlock(&drv_info->rx_lock); + buf = kmemdup((void *)msg + offset, size, GFP_KERNEL); + if (!buf) { + ffa_rx_release(); + return; + } - ffa_rx_release(); + target = SENDER_ID(msg->send_recv_id); + if (offset >= sizeof(*msg)) + uuid_copy(&uuid, &msg->uuid); + else + uuid_copy(&uuid, &uuid_null); + ffa_rx_release(); + } - read_lock(&drv_info->notify_lock); - cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid); - read_unlock(&drv_info->notify_lock); + scoped_guard(read_lock, &drv_info->notify_lock) { + struct notifier_cb_info *cb_info; + + cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, + &uuid); + fwk_cb = cb_info ? cb_info->fwk_cb : NULL; + fwk_cb_data = cb_info ? cb_info->cb_data : NULL; + } - if (cb_info && cb_info->fwk_cb) - cb_info->fwk_cb(notify_id, cb_info->cb_data, buf); + if (fwk_cb) + fwk_cb(notify_id, fwk_cb_data, buf); kfree(buf); } @@ -1538,7 +1578,7 @@ static void notif_pcpu_irq_work_fn(struct work_struct *work) struct ffa_drv_info *info = container_of(work, struct ffa_drv_info, notif_pcpu_work); - ffa_self_notif_handle(smp_processor_id(), true, info); + notif_get_and_handle(info); } static const struct ffa_info_ops ffa_drv_info_ops = { @@ -1625,6 +1665,15 @@ static struct notifier_block ffa_bus_nb = { .notifier_call = ffa_bus_notifier, }; +static void ffa_bus_notifier_unregister(void) +{ + if (!drv_info->bus_notifier_registered) + return; + + bus_unregister_notifier(&ffa_bus_type, &ffa_bus_nb); + drv_info->bus_notifier_registered = false; +} + static int ffa_xa_add_partition_info(struct ffa_device *dev) { struct ffa_dev_part_info *info; @@ -1708,6 +1757,8 @@ static void ffa_partitions_cleanup(void) struct list_head *phead; unsigned long idx; + ffa_bus_notifier_unregister(); + /* Clean up/free all registered devices */ ffa_devices_unregister(); @@ -1735,11 +1786,14 @@ static int ffa_setup_partitions(void) ret = bus_register_notifier(&ffa_bus_type, &ffa_bus_nb); if (ret) pr_err("Failed to register FF-A bus notifiers\n"); + else + drv_info->bus_notifier_registered = true; } count = ffa_partition_probe(&uuid_null, &pbuf); if (count <= 0) { pr_info("%s: No partitions found, error %d\n", __func__, count); + ffa_bus_notifier_unregister(); return -EINVAL; } @@ -2059,11 +2113,12 @@ static int __init ffa_init(void) rxtx_bufsz = SZ_4K; } + rxtx_bufsz = PAGE_ALIGN(rxtx_bufsz); drv_info->rxtx_bufsz = rxtx_bufsz; drv_info->rx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL); if (!drv_info->rx_buffer) { ret = -ENOMEM; - goto free_pages; + goto free_drv_info; } drv_info->tx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL); @@ -2074,7 +2129,7 @@ static int __init ffa_init(void) ret = ffa_rxtx_map(virt_to_phys(drv_info->tx_buffer), virt_to_phys(drv_info->rx_buffer), - PAGE_ALIGN(rxtx_bufsz) / FFA_PAGE_SIZE); + rxtx_bufsz / FFA_PAGE_SIZE); if (ret) { pr_err("failed to register FFA RxTx buffers\n"); goto free_pages; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index c3cf5541ed68..c27fe836c8ae 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -401,21 +401,11 @@ static void __init efi_debugfs_init(void) static inline void efi_debugfs_init(void) {} #endif -/* - * We register the efi subsystem with the firmware subsystem and the - * efivars subsystem with the efi subsystem, if the system was booted with - * EFI. - */ -static int __init efisubsys_init(void) +static int __init efipostcore_init(void) { - int error; - if (!efi_enabled(EFI_RUNTIME_SERVICES)) efi.runtime_supported_mask = 0; - if (!efi_enabled(EFI_BOOT)) - return 0; - if (efi.runtime_supported_mask) { /* * Since we process only one efi_runtime_service() at a time, an @@ -427,9 +417,23 @@ static int __init efisubsys_init(void) pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi.runtime_supported_mask = 0; - return 0; } } + return 0; +} +postcore_initcall(efipostcore_init); + +/* + * We register the efi subsystem with the firmware subsystem and the + * efivars subsystem with the efi subsystem, if the system was booted with + * EFI. + */ +static int __init efisubsys_init(void) +{ + int error; + + if (!efi_enabled(EFI_BOOT)) + return 0; if (efi_rt_services_supported(EFI_RT_SUPPORTED_TIME_SERVICES)) platform_device_register_simple("rtc-efi", 0, NULL, 0); diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c index 1809853f6353..ccc5654d3ae7 100644 --- a/drivers/fwctl/pds/main.c +++ b/drivers/fwctl/pds/main.c @@ -362,6 +362,9 @@ static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, void *out = NULL; int err; + if (in_len < sizeof(*rpc)) + return ERR_PTR(-EINVAL); + err = pdsfc_validate_rpc(pdsfc, rpc, scope); if (err) return ERR_PTR(err); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 4d644dcecad9..3b7284938bab 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1968,7 +1968,6 @@ menu "Virtual GPIO drivers" config GPIO_AGGREGATOR tristate "GPIO Aggregator" select CONFIGFS_FS - select DEV_SYNC_PROBE help Say yes here to enable the GPIO Aggregator, which provides a way to aggregate existing GPIO lines into a new virtual GPIO chip. diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 416f265d09d0..17db07cf92d0 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -32,8 +32,6 @@ #include <linux/gpio/forwarder.h> #include <linux/gpio/machine.h> -#include "dev-sync-probe.h" - #define AGGREGATOR_MAX_GPIOS 512 #define AGGREGATOR_LEGACY_PREFIX "_sysfs" @@ -42,7 +40,7 @@ */ struct gpio_aggregator { - struct dev_sync_probe_data probe_data; + struct platform_device *pdev; struct config_group group; struct gpiod_lookup_table *lookups; struct mutex lock; @@ -135,7 +133,7 @@ static bool gpio_aggregator_is_active(struct gpio_aggregator *aggr) { lockdep_assert_held(&aggr->lock); - return aggr->probe_data.pdev && platform_get_drvdata(aggr->probe_data.pdev); + return aggr->pdev && platform_get_drvdata(aggr->pdev); } /* Only aggregators created via legacy sysfs can be "activating". */ @@ -143,7 +141,7 @@ static bool gpio_aggregator_is_activating(struct gpio_aggregator *aggr) { lockdep_assert_held(&aggr->lock); - return aggr->probe_data.pdev && !platform_get_drvdata(aggr->probe_data.pdev); + return aggr->pdev && !platform_get_drvdata(aggr->pdev); } static size_t gpio_aggregator_count_lines(struct gpio_aggregator *aggr) @@ -909,6 +907,7 @@ static int gpio_aggregator_activate(struct gpio_aggregator *aggr) { struct platform_device_info pdevinfo; struct gpio_aggregator_line *line; + struct platform_device *pdev; struct fwnode_handle *swnode; unsigned int n = 0; int ret = 0; @@ -963,15 +962,29 @@ static int gpio_aggregator_activate(struct gpio_aggregator *aggr) gpiod_add_lookup_table(aggr->lookups); - ret = dev_sync_probe_register(&aggr->probe_data, &pdevinfo); - if (ret) + pdev = platform_device_register_full(&pdevinfo); + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); goto err_remove_lookup_table; + } + + wait_for_device_probe(); + + scoped_guard(device, &pdev->dev) { + if (!device_is_bound(&pdev->dev)) { + ret = -ENXIO; + goto err_unregister_pdev; + } + } + aggr->pdev = pdev; return 0; +err_unregister_pdev: + platform_device_unregister(pdev); err_remove_lookup_table: - kfree(aggr->lookups->dev_id); gpiod_remove_lookup_table(aggr->lookups); + kfree(aggr->lookups->dev_id); err_remove_swnode: fwnode_remove_software_node(swnode); err_remove_lookups: @@ -982,10 +995,15 @@ err_remove_lookups: static void gpio_aggregator_deactivate(struct gpio_aggregator *aggr) { - dev_sync_probe_unregister(&aggr->probe_data); + struct fwnode_handle *swnode; + + swnode = dev_fwnode(&aggr->pdev->dev); + platform_device_unregister(aggr->pdev); + aggr->pdev = NULL; gpiod_remove_lookup_table(aggr->lookups); kfree(aggr->lookups->dev_id); kfree(aggr->lookups); + fwnode_remove_software_node(swnode); } static void gpio_aggregator_lockup_configfs(struct gpio_aggregator *aggr, @@ -1146,7 +1164,7 @@ gpio_aggregator_device_dev_name_show(struct config_item *item, char *page) guard(mutex)(&aggr->lock); - pdev = aggr->probe_data.pdev; + pdev = aggr->pdev; if (pdev) return sysfs_emit(page, "%s\n", dev_name(&pdev->dev)); @@ -1323,7 +1341,6 @@ gpio_aggregator_make_group(struct config_group *group, const char *name) return ERR_PTR(ret); config_group_init_type_name(&aggr->group, name, &gpio_aggregator_device_type); - dev_sync_probe_init(&aggr->probe_data); return &aggr->group; } @@ -1473,12 +1490,6 @@ static ssize_t gpio_aggregator_new_device_store(struct device_driver *driver, scnprintf(name, sizeof(name), "%s.%d", AGGREGATOR_LEGACY_PREFIX, aggr->id); config_group_init_type_name(&aggr->group, name, &gpio_aggregator_device_type); - /* - * Since the device created by sysfs might be toggled via configfs - * 'live' attribute later, this initialization is needed. - */ - dev_sync_probe_init(&aggr->probe_data); - /* Expose to configfs */ res = configfs_register_group(&gpio_aggregator_subsys.su_group, &aggr->group); @@ -1497,7 +1508,7 @@ static ssize_t gpio_aggregator_new_device_store(struct device_driver *driver, goto remove_table; } - aggr->probe_data.pdev = pdev; + aggr->pdev = pdev; module_put(THIS_MODULE); return count; diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 986312c71678..89abf7a238d8 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1208,6 +1208,7 @@ static int gpio_v2_line_flags_validate(u64 flags) static int gpio_v2_line_config_validate(struct gpio_v2_line_config *lc, unsigned int num_lines) { + size_t unused_attrs; unsigned int i; u64 flags; int ret; @@ -1215,9 +1216,21 @@ static int gpio_v2_line_config_validate(struct gpio_v2_line_config *lc, if (lc->num_attrs > GPIO_V2_LINE_NUM_ATTRS_MAX) return -EINVAL; + unused_attrs = GPIO_V2_LINE_NUM_ATTRS_MAX - lc->num_attrs; + if (!mem_is_zero(lc->padding, sizeof(lc->padding))) return -EINVAL; + for (i = 0; i < lc->num_attrs; i++) { + if (lc->attrs[i].attr.padding != 0) + return -EINVAL; + } + + if (unused_attrs) { + if (!mem_is_zero(&lc->attrs[lc->num_attrs], unused_attrs * sizeof(*lc->attrs))) + return -EINVAL; + } + for (i = 0; i < num_lines; i++) { flags = gpio_v2_line_config_flags(lc, i); ret = gpio_v2_line_flags_validate(flags); diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 742f0d590c5a..b248e64587ed 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -152,7 +152,8 @@ drm_kms_helper-y := \ drm_plane_helper.o \ drm_probe_helper.o \ drm_self_refresh_helper.o \ - drm_simple_kms_helper.o + drm_simple_kms_helper.o \ + drm_vblank_helper.o drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index aa78c2ee9e21..bd5eff4bafe7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, amdgpu_ring_write(ring, 0); } + /* WA: Force sync after TRAP to avoid VPE1 fail to power off */ + if (ring->adev->vpe.collaborate_mode) { + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0)); + amdgpu_ring_write(ring, 0xabcd); + } } static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .emit_frame_size = 5 + /* vpe_ring_init_cond_exec */ 6 + /* vpe_ring_emit_pipeline_sync */ - 10 + 10 + 10 + /* vpe_ring_emit_fence */ + 12 + 12 + 12 + /* vpe_ring_emit_fence */ /* vpe_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 550a9f1d03f8..e004458f0e43 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -492,6 +492,10 @@ static enum bp_result get_gpio_i2c_info( - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + pin = (struct atom_gpio_pin_assignment *) header->gpio_pin; for (table_index = 0; table_index < count; table_index++) { @@ -680,6 +684,11 @@ static enum bp_result bios_parser_get_gpio_pin_info( count = (le16_to_cpu(header->table_header.structuresize) - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + for (i = 0; i < count; ++i) { if (header->gpio_pin[i].gpio_id != gpio_id) continue; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index 8d2cf95ae739..e00dc05c2d9d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -37,10 +37,13 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset, uint32_t size) { - if (bp->bios && offset + size < bp->bios_size) - return bp->bios + offset; - else + if (!bp->bios) return NULL; + + if (offset > bp->bios_size || size > bp->bios_size - offset) + return NULL; + + return bp->bios + offset; } #include "reg_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5f2d5638c819..0347174173f3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5884,7 +5884,11 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, uint8_t action; union dmub_rb_cmd cmd = {0}; - ASSERT(payload->length <= 16); + if (link_index >= dc->link_count || !dc->links[link_index]) + return false; + + if (payload->length > sizeof(cmd.dp_aux_access.aux_control.dpaux.data)) + return false; cmd.dp_aux_access.header.type = DMUB_CMD__DP_AUX_ACCESS; cmd.dp_aux_access.header.payload_bytes = 0; diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index 814713c5bea9..553a1df4688d 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -758,7 +758,9 @@ static int chipone_i2c_probe(struct i2c_client *client) dev_set_drvdata(dev, icn); i2c_set_clientdata(client, icn); - drm_bridge_add(&icn->bridge); + ret = devm_drm_bridge_add(dev, &icn->bridge); + if (ret) + return ret; return chipone_dsi_host_attach(icn); } diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index aa7b1dcc5d70..f9aff56dd503 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -1559,6 +1559,11 @@ static int it66121_probe(struct i2c_client *client) return ret; } + ctx->gpio_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(ctx->gpio_reset)) + return dev_err_probe(dev, PTR_ERR(ctx->gpio_reset), + "Failed to get reset GPIO\n"); + it66121_hw_reset(ctx); ctx->regmap = devm_regmap_init_i2c(client, &it66121_regmap_config); diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index c9e6505cbd88..2d02cc69f237 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -251,7 +251,6 @@ static void ge_b850v3_lvds_remove(void) goto out; drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); - ge_b850v3_lvds_ptr = NULL; out: mutex_unlock(&ge_b850v3_lvds_dev_mutex); @@ -261,6 +260,7 @@ static int ge_b850v3_register(void) { struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; struct device *dev = &stdp4028_i2c->dev; + int ret; /* drm bridge initialization */ ge_b850v3_lvds_ptr->bridge.ops = DRM_BRIDGE_OP_DETECT | @@ -277,11 +277,15 @@ static int ge_b850v3_register(void) if (!stdp4028_i2c->irq) return 0; - return devm_request_threaded_irq(&stdp4028_i2c->dev, - stdp4028_i2c->irq, NULL, - ge_b850v3_lvds_irq_handler, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, - "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); + ret = devm_request_threaded_irq(&stdp4028_i2c->dev, + stdp4028_i2c->irq, NULL, + ge_b850v3_lvds_irq_handler, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); + if (ret) + drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); + + return ret; } static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index bbec1c184f65..b06a5cba5295 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1913,7 +1913,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, ret = wait_event_timeout(dev->vblank[i].queue, state->crtcs[i].last_vblank_count != drm_crtc_vblank_count(crtc), - msecs_to_jiffies(100)); + msecs_to_jiffies(1000)); WARN(!ret, "[CRTC:%d:%s] vblank wait timed out\n", crtc->base.id, crtc->name); diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 46f59883183d..451ec9620226 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -136,8 +136,17 @@ * vblanks after a timer has expired, which can be configured through the * ``vblankoffdelay`` module parameter. * - * Drivers for hardware without support for vertical-blanking interrupts - * must not call drm_vblank_init(). For such drivers, atomic helpers will + * Drivers for hardware without support for vertical-blanking interrupts can + * use DRM vblank timers to send vblank events at the rate of the current + * display mode's refresh. While not synchronized to the hardware's + * vertical-blanking regions, the timer helps DRM clients and compositors to + * adapt their update cycle to the display output. Drivers should set up + * vblanking as usual, but call drm_crtc_vblank_start_timer() and + * drm_crtc_vblank_cancel_timer() as part of their atomic mode setting. + * See also DRM vblank helpers for more information. + * + * Drivers without support for vertical-blanking interrupts nor timers must + * not call drm_vblank_init(). For these drivers, atomic helpers will * automatically generate fake vblank events as part of the display update. * This functionality also can be controlled by the driver by enabling and * disabling struct drm_crtc_state.no_vblank. @@ -508,6 +517,9 @@ static void drm_vblank_init_release(struct drm_device *dev, void *ptr) drm_WARN_ON(dev, READ_ONCE(vblank->enabled) && drm_core_check_feature(dev, DRIVER_MODESET)); + if (vblank->vblank_timer.crtc) + hrtimer_cancel(&vblank->vblank_timer.timer); + drm_vblank_destroy_worker(vblank); timer_delete_sync(&vblank->disable_timer); } @@ -2162,3 +2174,159 @@ err_free: return ret; } +/* + * VBLANK timer + */ + +static enum hrtimer_restart drm_vblank_timer_function(struct hrtimer *timer) +{ + struct drm_vblank_crtc_timer *vtimer = + container_of(timer, struct drm_vblank_crtc_timer, timer); + struct drm_crtc *crtc = vtimer->crtc; + const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; + struct drm_device *dev = crtc->dev; + unsigned long flags; + ktime_t interval; + u64 ret_overrun; + bool succ; + + spin_lock_irqsave(&vtimer->interval_lock, flags); + interval = vtimer->interval; + spin_unlock_irqrestore(&vtimer->interval_lock, flags); + + if (!interval) + return HRTIMER_NORESTART; + + ret_overrun = hrtimer_forward_now(&vtimer->timer, interval); + if (ret_overrun != 1) + drm_dbg_vbl(dev, "vblank timer overrun\n"); + + if (crtc_funcs->handle_vblank_timeout) + succ = crtc_funcs->handle_vblank_timeout(crtc); + else + succ = drm_crtc_handle_vblank(crtc); + if (!succ) + return HRTIMER_NORESTART; + + return HRTIMER_RESTART; +} + +/** + * drm_crtc_vblank_start_timer - Starts the vblank timer on the given CRTC + * @crtc: the CRTC + * + * Drivers should call this function from their CRTC's enable_vblank + * function to start a vblank timer. The timer will fire after the duration + * of a full frame. drm_crtc_vblank_cancel_timer() disables a running timer. + * + * Returns: + * 0 on success, or a negative errno code otherwise. + */ +int drm_crtc_vblank_start_timer(struct drm_crtc *crtc) +{ + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct drm_vblank_crtc_timer *vtimer = &vblank->vblank_timer; + unsigned long flags; + + if (!vtimer->crtc) { + /* + * Set up the data structures on the first invocation. + */ + vtimer->crtc = crtc; + spin_lock_init(&vtimer->interval_lock); + hrtimer_setup(&vtimer->timer, drm_vblank_timer_function, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + } else { + /* + * Timer should not be active. If it is, wait for the + * previous cancel operations to finish. + */ + while (hrtimer_active(&vtimer->timer)) + hrtimer_try_to_cancel(&vtimer->timer); + } + + drm_calc_timestamping_constants(crtc, &crtc->mode); + + spin_lock_irqsave(&vtimer->interval_lock, flags); + vtimer->interval = ns_to_ktime(vblank->framedur_ns); + spin_unlock_irqrestore(&vtimer->interval_lock, flags); + + hrtimer_start(&vtimer->timer, vtimer->interval, HRTIMER_MODE_REL); + + return 0; +} +EXPORT_SYMBOL(drm_crtc_vblank_start_timer); + +/** + * drm_crtc_vblank_cancel_timer - Cancels the given CRTC's vblank timer + * @crtc: the CRTC + * + * Drivers should call this function from their CRTC's disable_vblank + * function to stop a vblank timer. + */ +void drm_crtc_vblank_cancel_timer(struct drm_crtc *crtc) +{ + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct drm_vblank_crtc_timer *vtimer = &vblank->vblank_timer; + unsigned long flags; + + /* + * Calling hrtimer_cancel() can result in a deadlock with DRM's + * vblank_time_lime_lock and hrtimers' softirq_expiry_lock. So + * clear interval and indicate cancellation. The timer function + * will cancel itself on the next invocation. + */ + + spin_lock_irqsave(&vtimer->interval_lock, flags); + vtimer->interval = 0; + spin_unlock_irqrestore(&vtimer->interval_lock, flags); + + hrtimer_try_to_cancel(&vtimer->timer); +} +EXPORT_SYMBOL(drm_crtc_vblank_cancel_timer); + +/** + * drm_crtc_vblank_get_vblank_timeout - Returns the vblank timeout + * @crtc: The CRTC + * @vblank_time: Returns the next vblank timestamp + * + * The helper drm_crtc_vblank_get_vblank_timeout() returns the next vblank + * timestamp of the CRTC's vblank timer according to the timer's expiry + * time. + */ +void drm_crtc_vblank_get_vblank_timeout(struct drm_crtc *crtc, ktime_t *vblank_time) +{ + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct drm_vblank_crtc_timer *vtimer = &vblank->vblank_timer; + u64 cur_count; + ktime_t cur_time; + + if (!READ_ONCE(vblank->enabled)) { + *vblank_time = ktime_get(); + return; + } + + /* + * A concurrent vblank timeout could update the expires field before + * we compare it with the vblank time. Hence we'd compare the old + * expiry time to the new vblank time; deducing the timer had already + * expired. Reread until we get consistent values from both fields. + */ + do { + cur_count = drm_crtc_vblank_count_and_time(crtc, &cur_time); + *vblank_time = READ_ONCE(vtimer->timer.node.expires); + } while (cur_count != drm_crtc_vblank_count_and_time(crtc, &cur_time)); + + if (drm_WARN_ON(crtc->dev, !ktime_compare(*vblank_time, cur_time))) + return; /* Already expired */ + + /* + * To prevent races we roll the hrtimer forward before we do any + * interrupt processing - this is how real hw works (the interrupt + * is only generated after all the vblank registers are updated) + * and what the vblank core expects. Therefore we need to always + * correct the timestamp by one frame. + */ + *vblank_time = ktime_sub(*vblank_time, vtimer->interval); +} +EXPORT_SYMBOL(drm_crtc_vblank_get_vblank_timeout); diff --git a/drivers/gpu/drm/drm_vblank_helper.c b/drivers/gpu/drm/drm_vblank_helper.c new file mode 100644 index 000000000000..a04a6ba1b0ca --- /dev/null +++ b/drivers/gpu/drm/drm_vblank_helper.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: MIT + +#include <drm/drm_atomic.h> +#include <drm/drm_crtc.h> +#include <drm/drm_managed.h> +#include <drm/drm_modeset_helper_vtables.h> +#include <drm/drm_print.h> +#include <drm/drm_vblank.h> +#include <drm/drm_vblank_helper.h> + +/** + * DOC: overview + * + * The vblank helper library provides functions for supporting vertical + * blanking in DRM drivers. + * + * For vblank timers, several callback implementations are available. + * Drivers enable support for vblank timers by setting the vblank callbacks + * in struct &drm_crtc_funcs to the helpers provided by this library. The + * initializer macro DRM_CRTC_VBLANK_TIMER_FUNCS does this conveniently. + * The driver further has to send the VBLANK event from its atomic_flush + * callback and control vblank from the CRTC's atomic_enable and atomic_disable + * callbacks. The callbacks are located in struct &drm_crtc_helper_funcs. + * The vblank helper library provides implementations of these callbacks + * for drivers without further requirements. The initializer macro + * DRM_CRTC_HELPER_VBLANK_FUNCS sets them coveniently. + * + * Once the driver enables vblank support with drm_vblank_init(), each + * CRTC's vblank timer fires according to the programmed display mode. By + * default, the vblank timer invokes drm_crtc_handle_vblank(). Drivers with + * more specific requirements can set their own handler function in + * struct &drm_crtc_helper_funcs.handle_vblank_timeout. + */ + +/* + * VBLANK helpers + */ + +/** + * drm_crtc_vblank_atomic_flush - + * Implements struct &drm_crtc_helper_funcs.atomic_flush + * @crtc: The CRTC + * @state: The atomic state to apply + * + * The helper drm_crtc_vblank_atomic_flush() implements atomic_flush of + * struct drm_crtc_helper_funcs for CRTCs that only need to send out a + * VBLANK event. + * + * See also struct &drm_crtc_helper_funcs.atomic_flush. + */ +void drm_crtc_vblank_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *dev = crtc->dev; + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + struct drm_pending_vblank_event *event; + + spin_lock_irq(&dev->event_lock); + + event = crtc_state->event; + crtc_state->event = NULL; + + if (event) { + if (drm_crtc_vblank_get(crtc) == 0) + drm_crtc_arm_vblank_event(crtc, event); + else + drm_crtc_send_vblank_event(crtc, event); + } + + spin_unlock_irq(&dev->event_lock); +} +EXPORT_SYMBOL(drm_crtc_vblank_atomic_flush); + +/** + * drm_crtc_vblank_atomic_enable - Implements struct &drm_crtc_helper_funcs.atomic_enable + * @crtc: The CRTC + * @state: The atomic state + * + * The helper drm_crtc_vblank_atomic_enable() implements atomic_enable + * of struct drm_crtc_helper_funcs for CRTCs the only need to enable VBLANKs. + * + * See also struct &drm_crtc_helper_funcs.atomic_enable. + */ +void drm_crtc_vblank_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + drm_crtc_vblank_on(crtc); +} +EXPORT_SYMBOL(drm_crtc_vblank_atomic_enable); + +/** + * drm_crtc_vblank_atomic_disable - Implements struct &drm_crtc_helper_funcs.atomic_disable + * @crtc: The CRTC + * @state: The atomic state + * + * The helper drm_crtc_vblank_atomic_disable() implements atomic_disable + * of struct drm_crtc_helper_funcs for CRTCs the only need to disable VBLANKs. + * + * See also struct &drm_crtc_funcs.atomic_disable. + */ +void drm_crtc_vblank_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + drm_crtc_vblank_off(crtc); +} +EXPORT_SYMBOL(drm_crtc_vblank_atomic_disable); + +/* + * VBLANK timer + */ + +/** + * drm_crtc_vblank_helper_enable_vblank_timer - Implements struct &drm_crtc_funcs.enable_vblank + * @crtc: The CRTC + * + * The helper drm_crtc_vblank_helper_enable_vblank_timer() implements + * enable_vblank of struct drm_crtc_helper_funcs for CRTCs that require + * a VBLANK timer. It sets up the timer on the first invocation. The + * started timer expires after the current frame duration. See struct + * &drm_vblank_crtc.framedur_ns. + * + * See also struct &drm_crtc_helper_funcs.enable_vblank. + * + * Returns: + * 0 on success, or a negative errno code otherwise. + */ +int drm_crtc_vblank_helper_enable_vblank_timer(struct drm_crtc *crtc) +{ + return drm_crtc_vblank_start_timer(crtc); +} +EXPORT_SYMBOL(drm_crtc_vblank_helper_enable_vblank_timer); + +/** + * drm_crtc_vblank_helper_disable_vblank_timer - Implements struct &drm_crtc_funcs.disable_vblank + * @crtc: The CRTC + * + * The helper drm_crtc_vblank_helper_disable_vblank_timer() implements + * disable_vblank of struct drm_crtc_funcs for CRTCs that require a + * VBLANK timer. + * + * See also struct &drm_crtc_helper_funcs.disable_vblank. + */ +void drm_crtc_vblank_helper_disable_vblank_timer(struct drm_crtc *crtc) +{ + drm_crtc_vblank_cancel_timer(crtc); +} +EXPORT_SYMBOL(drm_crtc_vblank_helper_disable_vblank_timer); + +/** + * drm_crtc_vblank_helper_get_vblank_timestamp_from_timer - + * Implements struct &drm_crtc_funcs.get_vblank_timestamp + * @crtc: The CRTC + * @max_error: Maximum acceptable error + * @vblank_time: Returns the next vblank timestamp + * @in_vblank_irq: True is called from drm_crtc_handle_vblank() + * + * The helper drm_crtc_helper_get_vblank_timestamp_from_timer() implements + * get_vblank_timestamp of struct drm_crtc_funcs for CRTCs that require a + * VBLANK timer. It returns the timestamp according to the timer's expiry + * time. + * + * See also struct &drm_crtc_funcs.get_vblank_timestamp. + * + * Returns: + * True on success, or false otherwise. + */ +bool drm_crtc_vblank_helper_get_vblank_timestamp_from_timer(struct drm_crtc *crtc, + int *max_error, + ktime_t *vblank_time, + bool in_vblank_irq) +{ + drm_crtc_vblank_get_vblank_timeout(crtc, vblank_time); + + return true; +} +EXPORT_SYMBOL(drm_crtc_vblank_helper_get_vblank_timestamp_from_timer); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 61f22275403b..a44fbac1e5e2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4899,7 +4899,7 @@ int intel_dp_as_sdp_unpack(struct drm_dp_as_sdp *as_sdp, as_sdp->length = sdp->sdp_header.HB3 & DP_ADAPTIVE_SYNC_SDP_LENGTH; as_sdp->mode = sdp->db[0] & DP_ADAPTIVE_SYNC_SDP_OPERATION_MODE; as_sdp->vtotal = (sdp->db[2] << 8) | sdp->db[1]; - as_sdp->target_rr = (u64)sdp->db[3] | ((u64)sdp->db[4] & 0x3); + as_sdp->target_rr = ((sdp->db[4] & 0x3) << 8) | sdp->db[3]; as_sdp->target_rr_divider = sdp->db[4] & 0x20 ? true : false; return 0; diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index c7be530ca041..b8ccd6e55bed 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -240,7 +240,7 @@ static const struct of_device_id mtk_cec_of_ids[] = { }; MODULE_DEVICE_TABLE(of, mtk_cec_of_ids); -struct platform_driver mtk_cec_driver = { +static struct platform_driver mtk_cec_driver = { .probe = mtk_cec_probe, .remove = mtk_cec_remove, .driver = { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c index 6358e1af69b4..2acbdb025d89 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c @@ -328,7 +328,7 @@ static const struct of_device_id mtk_hdmi_ddc_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_match); -struct platform_driver mtk_hdmi_ddc_driver = { +static struct platform_driver mtk_hdmi_ddc_driver = { .probe = mtk_hdmi_ddc_probe, .remove = mtk_hdmi_ddc_remove, .driver = { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 4b5a4edd0702..056a9e18cd4a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -419,15 +419,21 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = vm->mm_range; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->highest_bank_bit; return 0; case MSM_PARAM_RAYTRACING: *value = adreno_gpu->has_ray_tracing; return 0; case MSM_PARAM_UBWC_SWIZZLE: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->ubwc_swizzle; return 0; case MSM_PARAM_MACROTILE_MODE: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->macrotile_mode; return 0; case MSM_PARAM_UCHE_TRAP_BASE: diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 7545c0293efb..6f2370c9dd98 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -5,6 +5,7 @@ #include <drm/drm_edid.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_managed.h> #include "dpu_writeback.h" @@ -125,7 +126,7 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc, struct dpu_wb_connector *dpu_wb_conn; int rc = 0; - dpu_wb_conn = devm_kzalloc(dev->dev, sizeof(*dpu_wb_conn), GFP_KERNEL); + dpu_wb_conn = drmm_kzalloc(dev, sizeof(*dpu_wb_conn), GFP_KERNEL); if (!dpu_wb_conn) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index 071bcdea80f7..591507db2646 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -9,7 +9,7 @@ #include "msm_disp_snapshot.h" -static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *base_addr) +static void msm_disp_state_dump_regs(u32 **reg, u32 len, void __iomem *base_addr) { u32 len_padded; u32 num_rows; @@ -19,11 +19,11 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b void __iomem *end_addr; int i; - len_padded = aligned_len * REG_DUMP_ALIGN; - num_rows = aligned_len / REG_DUMP_ALIGN; + len_padded = round_up(len, REG_DUMP_ALIGN); + num_rows = DIV_ROUND_UP(len, REG_DUMP_ALIGN); addr = base_addr; - end_addr = base_addr + aligned_len; + end_addr = base_addr + len; *reg = kvzalloc(len_padded, GFP_KERNEL); if (!*reg) @@ -48,8 +48,8 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, void __iomem *base_addr, struct drm_printer *p) { + void __iomem *addr, *end_addr; int i; - void __iomem *addr; u32 num_rows; if (!dump_addr) { @@ -58,6 +58,7 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, } addr = base_addr; + end_addr = base_addr + len; num_rows = len / REG_DUMP_ALIGN; for (i = 0; i < num_rows; i++) { @@ -67,6 +68,17 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, dump_addr[i * 4 + 2], dump_addr[i * 4 + 3]); addr += REG_DUMP_ALIGN; } + + if (addr != end_addr) { + drm_printf(p, "0x%lx : %08x", + (unsigned long)(addr - base_addr), + dump_addr[i * 4]); + if (addr + 0x4 < end_addr) + drm_printf(p, " %08x", dump_addr[i * 4 + 1]); + if (addr + 0x8 < end_addr) + drm_printf(p, " %08x", dump_addr[i * 4 + 2]); + drm_printf(p, "\n"); + } } void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p) @@ -186,7 +198,7 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len, va_end(va); INIT_LIST_HEAD(&new_blk->node); - new_blk->size = ALIGN(len, REG_DUMP_ALIGN); + new_blk->size = len; new_blk->base_addr = base_addr; msm_disp_state_dump_regs(&new_blk->state, new_blk->size, base_addr); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 1c0841a1c101..50474c994d47 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -2003,6 +2003,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) /* fixup base address by io offset */ msm_host->ctrl_base += cfg->io_offset; + msm_host->ctrl_size -= cfg->io_offset; ret = devm_regulator_bulk_get_const(&pdev->dev, cfg->num_regulators, cfg->regulator_data, diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 31fa51a44f86..8f118b5185a1 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -43,8 +43,7 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) } static bool -with_vm_locks(struct ww_acquire_ctx *ticket, - void (*fn)(struct drm_gem_object *obj), +with_vm_locks(void (*fn)(struct drm_gem_object *obj), struct drm_gem_object *obj) { /* @@ -52,7 +51,7 @@ with_vm_locks(struct ww_acquire_ctx *ticket, * success paths */ struct drm_gpuvm_bo *vm_bo, *last_locked = NULL; - int ret = 0; + bool locked = true; drm_gem_for_each_gpuvm_bo (vm_bo, obj) { struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm); @@ -60,23 +59,14 @@ with_vm_locks(struct ww_acquire_ctx *ticket, if (resv == obj->resv) continue; - ret = dma_resv_lock(resv, ticket); - - /* - * Since we already skip the case when the VM and obj - * share a resv (ie. _NO_SHARE objs), we don't expect - * to hit a double-locking scenario... which the lock - * unwinding cannot really cope with. - */ - WARN_ON(ret == -EALREADY); - /* - * Don't bother with slow-lock / backoff / retry sequence, - * if we can't get the lock just give up and move on to - * the next object. + * dma_resv_lock can't be used due to acquiring 'ticket' before the + * fs_reclaim lock, which is held in shrinker context */ - if (ret) + if (!dma_resv_trylock(resv)) { + locked = false; goto out_unlock; + } /* * Hold a ref to prevent the vm_bo from being freed @@ -108,11 +98,11 @@ out_unlock: } } - return ret == 0; + return locked; } static bool -purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) +purge(struct drm_gem_object *obj, struct ww_acquire_ctx *unused) { if (!is_purgeable(to_msm_bo(obj))) return false; @@ -120,11 +110,11 @@ purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) if (msm_gem_active(obj)) return false; - return with_vm_locks(ticket, msm_gem_purge, obj); + return with_vm_locks(msm_gem_purge, obj); } static bool -evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) +evict(struct drm_gem_object *obj, struct ww_acquire_ctx *unused) { if (is_unevictable(to_msm_bo(obj))) return false; @@ -132,7 +122,7 @@ evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) if (msm_gem_active(obj)) return false; - return with_vm_locks(ticket, msm_gem_evict, obj); + return with_vm_locks(msm_gem_evict, obj); } static bool @@ -164,7 +154,6 @@ static unsigned long msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct msm_drm_private *priv = shrinker->private_data; - struct ww_acquire_ctx ticket; struct { struct drm_gem_lru *lru; bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket); @@ -185,11 +174,14 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) { if (!stages[i].cond) continue; + /* + * 'ticket' not needed on trylock paths + */ stages[i].freed = drm_gem_lru_scan(stages[i].lru, nr, &stages[i].remaining, stages[i].shrink, - &ticket); + NULL); nr -= stages[i].freed; freed += stages[i].freed; remaining += stages[i].remaining; diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index a188617653e8..82a172f21a3e 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -677,7 +677,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, int prot) { struct msm_iommu *iommu = to_msm_iommu(mmu); - size_t ret; + ssize_t ret; WARN_ON(off != 0); @@ -686,7 +686,8 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, iova |= GENMASK_ULL(63, 49); ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot); - WARN_ON(!ret); + if (ret < 0) + return ret; return (ret == len) ? 0 : -EINVAL; } diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 0ec06bfbbebb..e0cbd12c51c9 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -104,20 +104,6 @@ v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, } static void -v3d_cpu_job_free(struct drm_sched_job *sched_job) -{ - struct v3d_cpu_job *job = to_cpu_job(sched_job); - - v3d_timestamp_query_info_free(&job->timestamp_query, - job->timestamp_query.count); - - v3d_performance_query_info_free(&job->performance_query, - job->performance_query.count); - - v3d_job_cleanup(&job->base); -} - -static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { struct v3d_perfmon *perfmon = v3d->global_perfmon; @@ -860,7 +846,7 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { .run_job = v3d_cpu_job_run, - .free_job = v3d_cpu_job_free + .free_job = v3d_sched_job_free }; static int diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 23fa18e5e65c..95b9e68b0dfe 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -119,6 +119,24 @@ v3d_render_job_free(struct kref *ref) v3d_job_free(ref); } +static void +v3d_cpu_job_free(struct kref *ref) +{ + struct v3d_cpu_job *job = container_of(ref, struct v3d_cpu_job, + base.refcount); + + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); + + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); + + if (job->indirect_csd.indirect) + drm_gem_object_put(job->indirect_csd.indirect); + + v3d_job_free(ref); +} + void v3d_job_cleanup(struct v3d_job *job) { if (!job) @@ -1321,7 +1339,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); ret = v3d_job_init(v3d, file_priv, &cpu_job->base, - v3d_job_free, 0, &se, V3D_CPU); + v3d_cpu_job_free, 0, &se, V3D_CPU); if (ret) { v3d_job_deallocate((void *)&cpu_job); goto fail; @@ -1404,8 +1422,6 @@ fail: v3d_job_cleanup((void *)csd_job); v3d_job_cleanup(clean_job); v3d_put_multisync_post_deps(&se); - kvfree(cpu_job->timestamp_query.queries); - kvfree(cpu_job->performance_query.queries); return ret; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index f17660a71a3e..2f3531950aa4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -317,6 +317,7 @@ virtio_gpu_array_from_handles(struct drm_file *drm_file, u32 *handles, u32 nents void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs, struct drm_gem_object *obj); int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs); +int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs); void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs); void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs, struct dma_fence *fence); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 90c99d83c4cf..015b5debd745 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -238,6 +238,23 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs) return ret; } +int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs) +{ + int ret; + + if (objs->nents != 1) + return -EINVAL; + + dma_resv_lock(objs->objs[0]->resv, NULL); + + ret = dma_resv_reserve_fences(objs->objs[0]->resv, 1); + if (ret) { + virtio_gpu_array_unlock_resv(objs); + return ret; + } + return 0; +} + void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs) { if (objs->nents == 1) { diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 29e4b458ae57..192327723bb9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -214,7 +214,10 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); - virtio_gpu_array_lock_resv(objs); + if (virtio_gpu_lock_one_resv_uninterruptible(objs)) { + virtio_gpu_array_put_free(objs); + return; + } virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, width, height, objs, vgplane_st->fence); @@ -458,7 +461,10 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); - virtio_gpu_array_lock_resv(objs); + if (virtio_gpu_lock_one_resv_uninterruptible(objs)) { + virtio_gpu_array_put_free(objs); + return; + } virtio_gpu_cmd_transfer_to_host_2d (vgdev, 0, plane->state->crtc_w, diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index e60573e0f3e9..bd79f24686dc 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -7,25 +7,18 @@ #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> +#include <drm/drm_vblank_helper.h> #include "vkms_drv.h" -static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer) +static bool vkms_crtc_handle_vblank_timeout(struct drm_crtc *crtc) { - struct vkms_output *output = container_of(timer, struct vkms_output, - vblank_hrtimer); - struct drm_crtc *crtc = &output->crtc; + struct vkms_output *output = drm_crtc_to_vkms_output(crtc); struct vkms_crtc_state *state; - u64 ret_overrun; bool ret, fence_cookie; fence_cookie = dma_fence_begin_signalling(); - ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer, - output->period_ns); - if (ret_overrun != 1) - pr_warn("%s: vblank timer overrun\n", __func__); - spin_lock(&output->lock); ret = drm_crtc_handle_vblank(crtc); if (!ret) @@ -57,55 +50,6 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer) dma_fence_end_signalling(fence_cookie); - return HRTIMER_RESTART; -} - -static int vkms_enable_vblank(struct drm_crtc *crtc) -{ - struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); - struct vkms_output *out = drm_crtc_to_vkms_output(crtc); - - hrtimer_setup(&out->vblank_hrtimer, &vkms_vblank_simulate, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - out->period_ns = ktime_set(0, vblank->framedur_ns); - hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL); - - return 0; -} - -static void vkms_disable_vblank(struct drm_crtc *crtc) -{ - struct vkms_output *out = drm_crtc_to_vkms_output(crtc); - - hrtimer_cancel(&out->vblank_hrtimer); -} - -static bool vkms_get_vblank_timestamp(struct drm_crtc *crtc, - int *max_error, ktime_t *vblank_time, - bool in_vblank_irq) -{ - struct vkms_output *output = drm_crtc_to_vkms_output(crtc); - struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); - - if (!READ_ONCE(vblank->enabled)) { - *vblank_time = ktime_get(); - return true; - } - - *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires); - - if (WARN_ON(*vblank_time == vblank->time)) - return true; - - /* - * To prevent races we roll the hrtimer forward before we do any - * interrupt processing - this is how real hw works (the interrupt is - * only generated after all the vblank registers are updated) and what - * the vblank core expects. Therefore we need to always correct the - * timestampe by one frame. - */ - *vblank_time -= output->period_ns; - return true; } @@ -159,9 +103,7 @@ static const struct drm_crtc_funcs vkms_crtc_funcs = { .reset = vkms_atomic_crtc_reset, .atomic_duplicate_state = vkms_atomic_crtc_duplicate_state, .atomic_destroy_state = vkms_atomic_crtc_destroy_state, - .enable_vblank = vkms_enable_vblank, - .disable_vblank = vkms_disable_vblank, - .get_vblank_timestamp = vkms_get_vblank_timestamp, + DRM_CRTC_VBLANK_TIMER_FUNCS, .get_crc_sources = vkms_get_crc_sources, .set_crc_source = vkms_set_crc_source, .verify_crc_source = vkms_verify_crc_source, @@ -213,18 +155,6 @@ static int vkms_crtc_atomic_check(struct drm_crtc *crtc, return 0; } -static void vkms_crtc_atomic_enable(struct drm_crtc *crtc, - struct drm_atomic_state *state) -{ - drm_crtc_vblank_on(crtc); -} - -static void vkms_crtc_atomic_disable(struct drm_crtc *crtc, - struct drm_atomic_state *state) -{ - drm_crtc_vblank_off(crtc); -} - static void vkms_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state) __acquires(&vkms_output->lock) @@ -265,8 +195,9 @@ static const struct drm_crtc_helper_funcs vkms_crtc_helper_funcs = { .atomic_check = vkms_crtc_atomic_check, .atomic_begin = vkms_crtc_atomic_begin, .atomic_flush = vkms_crtc_atomic_flush, - .atomic_enable = vkms_crtc_atomic_enable, - .atomic_disable = vkms_crtc_atomic_disable, + .atomic_enable = drm_crtc_vblank_atomic_enable, + .atomic_disable = drm_crtc_vblank_atomic_disable, + .handle_vblank_timeout = vkms_crtc_handle_vblank_timeout, }; struct vkms_output *vkms_crtc_init(struct drm_device *dev, struct drm_plane *primary, diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 8013c31efe3b..fb9711e1c6fb 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -215,8 +215,6 @@ struct vkms_output { struct drm_crtc crtc; struct drm_writeback_connector wb_connector; struct drm_encoder wb_encoder; - struct hrtimer vblank_hrtimer; - ktime_t period_ns; struct workqueue_struct *composer_workq; spinlock_t lock; diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 4ae847b628e2..6324f526dcfa 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -35,11 +35,19 @@ bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; - struct xe_gsc *gsc = >->uc.gsc; + struct xe_gsc *gsc; bool ret = true; unsigned int fw_ref; - if (!gsc || !xe_uc_fw_is_enabled(&gsc->fw)) { + if (!gt) { + drm_dbg_kms(&xe->drm, + "not checking GSC status for HDCP2.x: media GT not present or disabled\n"); + return false; + } + + gsc = >->uc.gsc; + + if (!xe_uc_fw_is_enabled(&gsc->fw)) { drm_dbg_kms(&xe->drm, "GSC Components not ready for HDCP2.x\n"); return false; diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 8371ec002e4e..2a496987b829 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -487,8 +487,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) EXEC_QUEUE_FLAG_PERMANENT, 0); if (IS_ERR(q)) { xe_gt_err(gt, "Failed to create queue for GSC submission\n"); - err = PTR_ERR(q); - goto out_bo; + return PTR_ERR(q); } wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); @@ -511,8 +510,6 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) out_q: xe_exec_queue_put(q); -out_bo: - xe_bo_unpin_map_no_vm(bo); return err; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c index 7d532bded02a..a85ba4435378 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c @@ -114,8 +114,10 @@ int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 * VFs with no events are not printed. * * This function can only be called on PF. + * + * Return: always 0 */ -void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) { unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt); const struct xe_gt_sriov_monitor *data; @@ -144,4 +146,6 @@ void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p #undef __format #undef __value } + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h index 7ca9351a271b..0b8f088d3a16 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h @@ -13,7 +13,7 @@ struct drm_printer; struct xe_gt; void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid); -void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 0461d5513487..ca58ef3f9fd3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1030,13 +1030,15 @@ void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) } /** - * xe_gt_sriov_vf_print_config - Print VF self config. + * xe_gt_sriov_vf_print_config() - Print VF self config. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; struct xe_device *xe = gt_to_xe(gt); @@ -1060,16 +1062,20 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs); + + return 0; } /** - * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF. + * xe_gt_sriov_vf_print_runtime() - Print VF's runtime regs received from PF. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) { struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; unsigned int size = gt->sriov.vf.runtime.num_regs; @@ -1078,16 +1084,20 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) for (; size--; vf_regs++) drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value); + + return 0; } /** - * xe_gt_sriov_vf_print_version - Print VF ABI versions. + * xe_gt_sriov_vf_print_version() - Print VF ABI versions. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; @@ -1117,4 +1127,6 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR); drm_printf(p, "\thandshake:\t%u.%u\n", pf_version->major, pf_version->minor); + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 0af1dc769fe0..7f6c59b1ef7b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -35,8 +35,8 @@ s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt); u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); -void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 98bfb127eafc..7d04591e297a 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2043,8 +2043,10 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) - return -EOPNOTSUPP; + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) { + ret = -EOPNOTSUPP; + goto err_exec_q; + } } /* diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index f6be3ffee023..04d3ec360c1d 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -234,7 +234,7 @@ static const struct hid_device_id hid_quirks[] = { * used as a driver. See hid_scan_report(). */ static const struct hid_device_id hid_have_special_driver[] = { -#if IS_ENABLED(CONFIG_APPLEDISPLAY) +#if IS_ENABLED(CONFIG_USB_APPLEDISPLAY) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x9218) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x9219) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x921c) }, diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 34fb03ae8ee2..c6db3e7c5fd3 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -184,7 +184,9 @@ static int uclogic_input_configured(struct hid_device *hdev, suffix = "System Control"; break; } - } else { + } + + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); if (!hi->input->name) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index 16f780bc879b..cb19057f1191 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -94,7 +94,7 @@ static int quickspi_get_device_descriptor(struct quickspi_device *qsdev) dev_err_once(qsdev->dev, "Read DEVICE_DESCRIPTOR failed, ret = %d\n", ret); dev_err_once(qsdev->dev, "DEVICE_DESCRIPTOR expected len = %u, actual read = %u\n", input_len, read_len); - return ret; + return ret ?: -EINVAL; } input_rep_type = ((struct input_report_body_header *)read_buf)->input_report_type; @@ -318,7 +318,7 @@ int reset_tic(struct quickspi_device *qsdev) dev_err_once(qsdev->dev, "Read RESET_RESPONSE body failed, ret = %d\n", ret); dev_err_once(qsdev->dev, "RESET_RESPONSE body expected len = %u, actual = %u\n", read_len, actual_read_len); - return ret; + return ret ?: -EINVAL; } input_rep_type = FIELD_GET(HIDSPI_IN_REP_BDY_HDR_REP_TYPE, reset_response); diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index c1f528e292f3..a465a8a7ef5a 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -738,6 +738,7 @@ struct lm90_data { struct mutex update_lock; struct delayed_work alert_work; struct work_struct report_work; + bool shutdown; /* true if shutting down */ bool valid; /* true if register values are valid */ bool alarms_valid; /* true if status register values are valid */ unsigned long last_updated; /* in jiffies */ @@ -1156,6 +1157,9 @@ static void lm90_report_alarms(struct work_struct *work) static int lm90_update_alarms_locked(struct lm90_data *data, bool force) { + if (data->shutdown) + return 0; + if (force || !data->alarms_valid || time_after(jiffies, data->alarms_updated + msecs_to_jiffies(data->update_interval))) { struct i2c_client *client = data->client; @@ -2600,15 +2604,23 @@ static void lm90_restore_conf(void *_data) struct lm90_data *data = _data; struct i2c_client *client = data->client; - cancel_delayed_work_sync(&data->alert_work); - cancel_work_sync(&data->report_work); - /* Restore initial configuration */ if (data->flags & LM90_HAVE_CONVRATE) lm90_write_convrate(data, data->convrate_orig); lm90_write_reg(client, LM90_REG_CONFIG1, data->config_orig); } +static void lm90_stop_work(void *_data) +{ + struct lm90_data *data = _data; + + hwmon_lock(data->hwmon_dev); + data->shutdown = true; + hwmon_unlock(data->hwmon_dev); + cancel_delayed_work_sync(&data->alert_work); + cancel_work_sync(&data->report_work); +} + static int lm90_init_client(struct i2c_client *client, struct lm90_data *data) { struct device_node *np = client->dev.of_node; @@ -2919,6 +2931,10 @@ static int lm90_probe(struct i2c_client *client) data->hwmon_dev = hwmon_dev; + err = devm_add_action_or_reset(&client->dev, lm90_stop_work, data); + if (err) + return err; + if (client->irq) { dev_dbg(dev, "IRQ: %d\n", client->irq); err = devm_request_threaded_irq(dev, client->irq, @@ -2947,7 +2963,8 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type, */ struct lm90_data *data = i2c_get_clientdata(client); - if ((data->flags & LM90_HAVE_BROKEN_ALERT) && + hwmon_lock(data->hwmon_dev); + if (!data->shutdown && (data->flags & LM90_HAVE_BROKEN_ALERT) && (data->current_alarms & data->alert_alarms)) { if (!(data->config & 0x80)) { dev_dbg(&client->dev, "Disabling ALERT#\n"); @@ -2956,6 +2973,7 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type, schedule_delayed_work(&data->alert_work, max_t(int, HZ, msecs_to_jiffies(data->update_interval))); } + hwmon_unlock(data->hwmon_dev); } else { dev_dbg(&client->dev, "Everything OK\n"); } diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index d90f8f80be8e..9631a64cb1eb 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -46,6 +46,7 @@ #define ADM1266_BLACKBOX_OFFSET 0 #define ADM1266_BLACKBOX_SIZE 64 +#define ADM1266_BLACKBOX_MAX_RECORDS 32 #define ADM1266_PMBUS_BLOCK_MAX 255 @@ -60,7 +61,7 @@ struct adm1266_data { u8 *dev_mem; struct mutex buf_mutex; u8 write_buf[ADM1266_PMBUS_BLOCK_MAX + 1] ____cacheline_aligned; - u8 read_buf[ADM1266_PMBUS_BLOCK_MAX + 1] ____cacheline_aligned; + u8 read_buf[ADM1266_PMBUS_BLOCK_MAX + 2] ____cacheline_aligned; }; static const struct nvmem_cell_info adm1266_nvmem_cells[] = { @@ -175,6 +176,8 @@ static int adm1266_gpio_get(struct gpio_chip *chip, unsigned int offset) ret = i2c_smbus_read_block_data(data->client, pmbus_cmd, read_buf); if (ret < 0) return ret; + if (ret < 2) + return -EIO; pins_status = read_buf[0] + (read_buf[1] << 8); if (offset < ADM1266_GPIO_NR) @@ -195,6 +198,8 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask ret = i2c_smbus_read_block_data(data->client, ADM1266_GPIO_STATUS, read_buf); if (ret < 0) return ret; + if (ret < 2) + return -EIO; status = read_buf[0] + (read_buf[1] << 8); @@ -207,11 +212,12 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask ret = i2c_smbus_read_block_data(data->client, ADM1266_PDIO_STATUS, read_buf); if (ret < 0) return ret; + if (ret < 2) + return -EIO; status = read_buf[0] + (read_buf[1] << 8); - *bits = 0; - for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_STATUS) { + for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_NR) { if (test_bit(gpio_nr - ADM1266_GPIO_NR, &status)) set_bit(gpio_nr, bits); } @@ -347,9 +353,10 @@ static void adm1266_init_debugfs(struct adm1266_data *data) static int adm1266_nvmem_read_blackbox(struct adm1266_data *data, u8 *read_buff) { + u8 record[ADM1266_PMBUS_BLOCK_MAX]; int record_count; char index; - u8 buf[5]; + u8 buf[I2C_SMBUS_BLOCK_MAX]; int ret; ret = i2c_smbus_read_block_data(data->client, ADM1266_BLACKBOX_INFO, buf); @@ -360,15 +367,18 @@ static int adm1266_nvmem_read_blackbox(struct adm1266_data *data, u8 *read_buff) return -EIO; record_count = buf[3]; + if (record_count > ADM1266_BLACKBOX_MAX_RECORDS) + return -EIO; for (index = 0; index < record_count; index++) { - ret = adm1266_pmbus_block_xfer(data, ADM1266_READ_BLACKBOX, 1, &index, read_buff); + ret = adm1266_pmbus_block_xfer(data, ADM1266_READ_BLACKBOX, 1, &index, record); if (ret < 0) return ret; if (ret != ADM1266_BLACKBOX_SIZE) return -EIO; + memcpy(read_buff, record, ADM1266_BLACKBOX_SIZE); read_buff += ADM1266_BLACKBOX_SIZE; } @@ -432,7 +442,7 @@ static int adm1266_set_rtc(struct adm1266_data *data) char write_buf[6]; int i; - kt = ktime_get_seconds(); + kt = ktime_get_real_seconds(); memset(write_buf, 0, sizeof(write_buf)); @@ -462,20 +472,20 @@ static int adm1266_probe(struct i2c_client *client) crc8_populate_msb(pmbus_crc_table, 0x7); mutex_init(&data->buf_mutex); - ret = adm1266_config_gpio(data); + ret = adm1266_set_rtc(data); if (ret < 0) return ret; - ret = adm1266_set_rtc(data); - if (ret < 0) + ret = pmbus_do_probe(client, &data->info); + if (ret) return ret; ret = adm1266_config_nvmem(data); if (ret < 0) return ret; - ret = pmbus_do_probe(client, &data->info); - if (ret) + ret = adm1266_config_gpio(data); + if (ret < 0) return ret; adm1266_init_debugfs(data); diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index fac159f7128d..4143be70eea2 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -639,6 +639,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port, if (mana_ib_is_rnic(dev)) { props->gid_tbl_len = 16; props->ip_gids = true; + props->max_msg_sz = SZ_16M; if (port == 1) props->port_cap_flags = IB_PORT_CM_SUP; } diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index e8a88b378d51..34d03584160c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -1082,6 +1082,21 @@ static int siw_get_hdr(struct siw_rx_stream *srx) } /* + * Peer-controlled mpa_len must not underflow srx->fpdu_part_rem + * in siw_tcp_rx_data(); a negative value flows as a signed copy + * length into siw_check_mem() and skb_copy_bits(). + */ + if (unlikely(be16_to_cpu(c_hdr->mpa_len) + MPA_HDR_SIZE < + iwarp_pktinfo[opcode].hdr_len)) { + pr_warn_ratelimited("siw: short mpa_len %u for opcode %u (hdr_len %u)\n", + be16_to_cpu(c_hdr->mpa_len), opcode, + iwarp_pktinfo[opcode].hdr_len); + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_LLP, + LLP_ETYPE_MPA, LLP_ECODE_FPDU_START, 0); + return -EINVAL; + } + + /* * DDP/RDMAP header receive completed. Check if the current * DDP segment starts a new RDMAP message or continues a previously * started RDMAP message. diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 3f305e694fe8..1b1c6ea4ee5a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -295,8 +295,8 @@ remove_group: put_kobj: kobject_del(&srv_path->kobj); destroy_root: - kobject_put(&srv_path->kobj); rtrs_srv_destroy_once_sysfs_root_folders(srv_path); + kobject_put(&srv_path->kobj); return err; } diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index 20b04996441d..3909a1fb218e 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -26,22 +26,20 @@ static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf, { struct seq_file *m = filp->private_data; struct amd_iommu *iommu = m->private; - int ret; - - iommu->dbg_mmio_offset = -1; + int ret, dbg_mmio_offset = iommu->dbg_mmio_offset = -1; if (cnt > OFS_IN_SZ) return -EINVAL; - ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset); + ret = kstrtos32_from_user(ubuf, cnt, 0, &dbg_mmio_offset); if (ret) return ret; - if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64)) { - iommu->dbg_mmio_offset = -1; - return -EINVAL; - } + if (dbg_mmio_offset < 0 || dbg_mmio_offset > + iommu->mmio_phys_end - sizeof(u64)) + return -EINVAL; + iommu->dbg_mmio_offset = dbg_mmio_offset; return cnt; } @@ -49,14 +47,16 @@ static int iommu_mmio_show(struct seq_file *m, void *unused) { struct amd_iommu *iommu = m->private; u64 value; + int dbg_mmio_offset = iommu->dbg_mmio_offset; - if (iommu->dbg_mmio_offset < 0) { + if (dbg_mmio_offset < 0 || dbg_mmio_offset > + iommu->mmio_phys_end - sizeof(u64)) { seq_puts(m, "Please provide mmio register's offset\n"); return 0; } - value = readq(iommu->mmio_base + iommu->dbg_mmio_offset); - seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value); + value = readq(iommu->mmio_base + dbg_mmio_offset); + seq_printf(m, "Offset:0x%x Value:0x%016llx\n", dbg_mmio_offset, value); return 0; } @@ -67,23 +67,20 @@ static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf { struct seq_file *m = filp->private_data; struct amd_iommu *iommu = m->private; - int ret; - - iommu->dbg_cap_offset = -1; + int ret, dbg_cap_offset = iommu->dbg_cap_offset = -1; if (cnt > OFS_IN_SZ) return -EINVAL; - ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset); + ret = kstrtos32_from_user(ubuf, cnt, 0, &dbg_cap_offset); if (ret) return ret; /* Capability register at offset 0x14 is the last IOMMU capability register. */ - if (iommu->dbg_cap_offset > 0x14) { - iommu->dbg_cap_offset = -1; + if (dbg_cap_offset < 0 || dbg_cap_offset > 0x14) return -EINVAL; - } + iommu->dbg_cap_offset = dbg_cap_offset; return cnt; } @@ -91,21 +88,21 @@ static int iommu_capability_show(struct seq_file *m, void *unused) { struct amd_iommu *iommu = m->private; u32 value; - int err; + int err, dbg_cap_offset = iommu->dbg_cap_offset; - if (iommu->dbg_cap_offset < 0) { + if (dbg_cap_offset < 0 || dbg_cap_offset > 0x14) { seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n"); return 0; } - err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value); + err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + dbg_cap_offset, &value); if (err) { seq_printf(m, "Not able to read capability register at 0x%x\n", - iommu->dbg_cap_offset); + dbg_cap_offset); return 0; } - seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value); + seq_printf(m, "Offset:0x%x Value:0x%08x\n", dbg_cap_offset, value); return 0; } diff --git a/drivers/irqchip/irq-ath79-cpu.c b/drivers/irqchip/irq-ath79-cpu.c index 923e4bba3776..9b7273a7f8ce 100644 --- a/drivers/irqchip/irq-ath79-cpu.c +++ b/drivers/irqchip/irq-ath79-cpu.c @@ -85,10 +85,3 @@ static int __init ar79_cpu_intc_of_init( } IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", ar79_cpu_intc_of_init); - -void __init ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3) -{ - irq_wb_chan[2] = irq_wb_chan2; - irq_wb_chan[3] = irq_wb_chan3; - mips_cpu_irq_init(); -} diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c index 3cb2b9423121..8bed59816e82 100644 --- a/drivers/mfd/bcm2835-pm.c +++ b/drivers/mfd/bcm2835-pm.c @@ -108,6 +108,7 @@ static const struct of_device_id bcm2835_pm_of_match[] = { { .compatible = "brcm,bcm2835-pm-wdt", }, { .compatible = "brcm,bcm2835-pm", }, { .compatible = "brcm,bcm2711-pm", }, + { .compatible = "brcm,bcm2712-pm", }, {}, }; MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 548b85befbf4..4571da0d7a8f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -973,12 +973,16 @@ mt7530_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) unsigned int age_count; unsigned int age_unit; - /* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds */ - if (secs < 1 || secs > (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1)) - return -ERANGE; - - /* iterate through all possible age_count to find the closest pair */ - for (tmp_age_count = 0; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) { + /* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds. + * The DSA core has already validated the range using + * ds->ageing_time_min and ds->ageing_time_max. + * + * Iterate through all possible age_count values to find the closest + * pair. Start from 1 because the per-entry aging counter is + * initialized to AGE_CNT and a value of 0 means the entry will + * never be aged out. + */ + for (tmp_age_count = 1; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) { unsigned int tmp_age_unit = secs / (tmp_age_count + 1) - 1; if (tmp_age_unit <= AGE_UNIT_MAX) { @@ -1246,37 +1250,40 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface) static void mt753x_trap_frames(struct mt7530_priv *priv) { - /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them - * VLAN-untagged. + /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress + * them with the EG_TAG attribute set to disabled (system default) + * so that any VLAN tags in the frame are not modified by the + * switch egress VLAN tag processing. This preserves VLAN tags + * for reception on VLAN sub-interfaces. */ mt7530_rmw(priv, MT753X_BPC, PAE_BPDU_FR | PAE_EG_TAG_MASK | PAE_PORT_FW_MASK | BPDU_EG_TAG_MASK | BPDU_PORT_FW_MASK, - PAE_BPDU_FR | PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + PAE_BPDU_FR | PAE_EG_TAG(MT7530_VLAN_EG_DISABLED) | PAE_PORT_FW(TO_CPU_FW_CPU_ONLY) | - BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + BPDU_EG_TAG(MT7530_VLAN_EG_DISABLED) | TO_CPU_FW_CPU_ONLY); - /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress - * them VLAN-untagged. + /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and + * egress them with EG_TAG disabled. */ mt7530_rmw(priv, MT753X_RGAC1, R02_BPDU_FR | R02_EG_TAG_MASK | R02_PORT_FW_MASK | R01_BPDU_FR | R01_EG_TAG_MASK | R01_PORT_FW_MASK, - R02_BPDU_FR | R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R02_BPDU_FR | R02_EG_TAG(MT7530_VLAN_EG_DISABLED) | R02_PORT_FW(TO_CPU_FW_CPU_ONLY) | R01_BPDU_FR | - R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R01_EG_TAG(MT7530_VLAN_EG_DISABLED) | TO_CPU_FW_CPU_ONLY); - /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress - * them VLAN-untagged. + /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and + * egress them with EG_TAG disabled. */ mt7530_rmw(priv, MT753X_RGAC2, R0E_BPDU_FR | R0E_EG_TAG_MASK | R0E_PORT_FW_MASK | R03_BPDU_FR | R03_EG_TAG_MASK | R03_PORT_FW_MASK, - R0E_BPDU_FR | R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R0E_BPDU_FR | R0E_EG_TAG(MT7530_VLAN_EG_DISABLED) | R0E_PORT_FW(TO_CPU_FW_CPU_ONLY) | R03_BPDU_FR | - R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R03_EG_TAG(MT7530_VLAN_EG_DISABLED) | TO_CPU_FW_CPU_ONLY); } @@ -2378,6 +2385,8 @@ mt7530_setup(struct dsa_switch *ds) ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; + ds->ageing_time_min = 2 * 1000; + ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000; if (priv->id == ID_MT7530) { regulator_set_voltage(priv->core_pwr, 1000000, 1000000); @@ -2567,6 +2576,8 @@ mt7531_setup_common(struct dsa_switch *ds) ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; + ds->ageing_time_min = 2 * 1000; + ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000; mt753x_trap_frames(priv); diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 27d62acfcc39..9781a6fc9bf9 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1800,11 +1800,8 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) u32 val, pse_port, chan; int src_port; - /* Forward the traffic to the proper GDM port */ - pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 - : FE_PSE_PORT_GDM4; airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX), - pse_port); + FE_PSE_PORT_DROP); airoha_fe_clear(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX), GDM_STRIP_CRC_MASK); @@ -1822,6 +1819,11 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) GDM_SHORT_LEN_MASK | GDM_LONG_LEN_MASK, FIELD_PREP(GDM_SHORT_LEN_MASK, 60) | FIELD_PREP(GDM_LONG_LEN_MASK, AIROHA_MAX_MTU)); + /* Forward the traffic to the proper GDM port */ + pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 + : FE_PSE_PORT_GDM4; + airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX), + pse_port); /* Disable VIP and IFC for GDM2 */ airoha_fe_clear(eth, REG_FE_VIP_PORT_EN, BIT(AIROHA_GDM2_IDX)); diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index 04c5e3abd8d7..810a0cd9bcac 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -64,9 +64,14 @@ DEFINE_SHOW_ATTRIBUTE(identity); void pdsc_debugfs_add_ident(struct pdsc *pdsc) { + struct dentry *dentry; + /* This file will already exist in the reset flow */ - if (debugfs_lookup("identity", pdsc->dentry)) + dentry = debugfs_lookup("identity", pdsc->dentry); + if (!IS_ERR_OR_NULL(dentry)) { + dput(dentry); return; + } debugfs_create_file("identity", 0400, pdsc->dentry, pdsc, &identity_fops); diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 495ef4ef8c10..1d1e559bd99d 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -162,12 +162,19 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) dev_dbg(dev, "DEVCMD %d %s after %ld secs\n", opcode, pdsc_devcmd_str(opcode), duration / HZ); - if ((!done || timeout) && running) { + if (!running) { + dev_err(dev, "DEVCMD %d %s fw not running\n", + opcode, pdsc_devcmd_str(opcode)); + pdsc_devcmd_clean(pdsc); + return -ENXIO; + } + + if (!done || timeout) { dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n", opcode, pdsc_devcmd_str(opcode), done, timeout, max_seconds); - err = -ETIMEDOUT; pdsc_devcmd_clean(pdsc); + return -ETIMEDOUT; } status = pdsc_devcmd_status(pdsc); diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index d8dc39da4161..621791a3c543 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -121,12 +121,14 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names)); for (i = 0; i < listlen; i++) { + char *fw_ver = fw_list.fw_names[i].fw_version; + if (i < ARRAY_SIZE(fw_slotnames)) strscpy(buf, fw_slotnames[i], sizeof(buf)); else snprintf(buf, sizeof(buf), "fw.slot_%d", i); - err = devlink_info_version_stored_put(req, buf, - fw_list.fw_names[i].fw_version); + fw_ver[sizeof(fw_list.fw_names[i].fw_version) - 1] = '\0'; + err = devlink_info_version_stored_put(req, buf, fw_ver); if (err) return err; } diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index cbc730c7cff2..8e6bd99b22d7 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1856,6 +1856,9 @@ static int ag71xx_probe(struct platform_device *pdev) ag71xx_int_disable(ag, AG71XX_INT_POLL); ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) + return ndev->irq; + err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, 0x0, dev_name(&pdev->dev), ndev); if (err) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 63cdf6d9d077..abf6a6451cb1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1369,13 +1369,12 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) reg &= ~(TBUF_EEE_EN | TBUF_PM_EN); bcmgenet_writel(reg, priv->base + off); - /* Do the same for thing for RBUF */ + /* RBUF EEE/PM can break the RX path on GENET. Keep it disabled. */ reg = bcmgenet_rbuf_readl(priv, RBUF_ENERGY_CTRL); - if (enable) - reg |= RBUF_EEE_EN | RBUF_PM_EN; - else + if (reg & (RBUF_EEE_EN | RBUF_PM_EN)) { reg &= ~(RBUF_EEE_EN | RBUF_PM_EN); - bcmgenet_rbuf_writel(priv, reg, RBUF_ENERGY_CTRL); + bcmgenet_rbuf_writel(priv, reg, RBUF_ENERGY_CTRL); + } if (!enable && priv->clk_eee_enabled) { clk_disable_unprepare(priv->clk_eee); diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index fa5857923db4..b4bfd6c174e7 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1271,7 +1271,6 @@ static const struct net_device_ops net_ops = { static void __init reset_chip(struct net_device *dev) { -#if !defined(CONFIG_MACH_MX31ADS) struct net_local *lp = netdev_priv(dev); unsigned long reset_start_time; @@ -1298,7 +1297,6 @@ static void __init reset_chip(struct net_device *dev) while ((readreg(dev, PP_SelfST) & INIT_DONE) == 0 && time_before(jiffies, reset_start_time + 2)) ; -#endif /* !CONFIG_MACH_MX31ADS */ } /* This is the real probe routine. diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 6a2004bbe87f..40bec34f06a7 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -122,6 +122,9 @@ struct gemini_ethernet_port { struct napi_struct napi; struct hrtimer rx_coalesce_timer; unsigned int rx_coalesce_nsecs; + struct sk_buff *rx_skb; + unsigned int rx_frag_nr; + unsigned int freeq_refill; struct gmac_txq txq[TX_QUEUE_NUM]; unsigned int txq_order; @@ -1443,10 +1446,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) unsigned short m = (1 << port->rxq_order) - 1; struct gemini_ethernet *geth = port->geth; void __iomem *ptr_reg = port->rxq_rwptr; + unsigned int frag_nr = port->rx_frag_nr; + struct sk_buff *skb = port->rx_skb; unsigned int frame_len, frag_len; struct gmac_rxdesc *rx = NULL; struct gmac_queue_page *gpage; - static struct sk_buff *skb; union gmac_rxdesc_0 word0; union gmac_rxdesc_1 word1; union gmac_rxdesc_3 word3; @@ -1456,7 +1460,6 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) unsigned short r, w; union dma_rwptr rw; dma_addr_t mapping; - int frag_nr = 0; spin_lock_irqsave(&geth->irq_lock, flags); rw.bits32 = readl(ptr_reg); @@ -1492,6 +1495,12 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE); if (!gpage) { dev_err(geth->dev, "could not find mapping\n"); + if (skb) { + napi_free_frags(&port->napi); + port->stats.rx_dropped++; + skb = NULL; + frag_nr = 0; + } continue; } page = gpage->page; @@ -1500,6 +1509,8 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) if (skb) { napi_free_frags(&port->napi); port->stats.rx_dropped++; + skb = NULL; + frag_nr = 0; } skb = gmac_skb_if_good_frame(port, word0, frame_len); @@ -1534,6 +1545,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) if (word3.bits32 & EOF_BIT) { napi_gro_frags(&port->napi); skb = NULL; + frag_nr = 0; --budget; } continue; @@ -1542,6 +1554,7 @@ err_drop: if (skb) { napi_free_frags(&port->napi); skb = NULL; + frag_nr = 0; } if (mapping) @@ -1550,6 +1563,8 @@ err_drop: port->stats.rx_dropped++; } + port->rx_skb = skb; + port->rx_frag_nr = frag_nr; writew(r, ptr_reg); return budget; } @@ -1877,6 +1892,8 @@ static int gmac_stop(struct net_device *netdev) gmac_disable_tx_rx(netdev); gmac_stop_dma(port); napi_disable(&port->napi); + port->rx_skb = NULL; + port->rx_frag_nr = 0; gmac_enable_irq(netdev, 0); gmac_cleanup_rxq(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 9fc8681cc58e..270f5a00ece3 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -537,14 +537,14 @@ void ice_dcb_rebuild(struct ice_pf *pf) struct ice_dcbx_cfg *err_cfg; int ret; + mutex_lock(&pf->tc_mutex); + ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { dev_err(dev, "Query Port ETS failed\n"); goto dcb_error; } - mutex_lock(&pf->tc_mutex); - if (!pf->hw.port_info->qos_cfg.is_sw_lldp) ice_cfg_etsrec_defaults(pf->hw.port_info); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c064c3653c54..dc5d821bf334 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3764,7 +3764,7 @@ int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, ICE_MCAST_VLAN_PROMISC_BITS, vid); - if (ret) + if (ret && ret != -EEXIST) goto finish; } @@ -4186,6 +4186,12 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) } ice_pf_dcb_recfg(pf, locked); ice_vsi_open(vsi); + /* Rx rings are reallocated during VSI rebuild and lose their ptp_rx + * flag. Restore timestamp mode so newly allocated rings are set up + * for hardware Rx timestamping. + */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_restore_timestamp_mode(pf); goto done; rebuild_err: @@ -8069,7 +8075,7 @@ int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc) ctx->info.q_opt_rss |= FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc); ctx->info.q_opt_tc = vsi->info.q_opt_tc; - ctx->info.q_opt_flags = vsi->info.q_opt_rss; + ctx->info.q_opt_flags = vsi->info.q_opt_flags; err = ice_update_vsi(hw, vsi->idx, ctx, NULL); if (err) { diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index d461e00d15e9..99bf38cf352a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -2141,16 +2141,23 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) } incval = (u64)hi << 32 | lo; + if (!ice_ptp_lock(hw)) { + dev_err(ice_hw_to_dev(hw), "Failed to acquire PTP semaphore\n"); + return -EBUSY; + } + err = ice_write_40b_ptp_reg_eth56g(hw, port, PHY_REG_TIMETUS_L, incval); if (err) - return err; + goto err_ptp_unlock; err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL); if (err) - return err; + goto err_ptp_unlock; ice_ptp_exec_tmr_cmd(hw); + ice_ptp_unlock(hw); + err = ice_sync_phy_timer_eth56g(hw, port); if (err) return err; @@ -2166,6 +2173,10 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); return 0; + +err_ptp_unlock: + ice_ptp_unlock(hw); + return err; } /** @@ -4503,18 +4514,17 @@ static int ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) { struct ice_e810_params *params = &hw->ptp.phy.e810; - unsigned long flags; u32 val; int err; - spin_lock_irqsave(¶ms->atqbal_wq.lock, flags); + spin_lock_irq(¶ms->atqbal_wq.lock); /* Wait for any pending in-progress low latency interrupt */ err = wait_event_interruptible_locked_irq(params->atqbal_wq, !(params->atqbal_flags & ATQBAL_FLAGS_INTR_IN_PROGRESS)); if (err) { - spin_unlock_irqrestore(¶ms->atqbal_wq.lock, flags); + spin_unlock_irq(¶ms->atqbal_wq.lock); return err; } @@ -4529,7 +4539,7 @@ ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) REG_LL_PROXY_H); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read PTP timestamp using low latency read\n"); - spin_unlock_irqrestore(¶ms->atqbal_wq.lock, flags); + spin_unlock_irq(¶ms->atqbal_wq.lock); return err; } @@ -4539,7 +4549,7 @@ ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) /* Read the low 32 bit value and set the TS valid bit */ *lo = rd32(hw, REG_LL_PROXY_L) | TS_VALID; - spin_unlock_irqrestore(¶ms->atqbal_wq.lock, flags); + spin_unlock_irq(¶ms->atqbal_wq.lock); return 0; } @@ -5254,9 +5264,13 @@ static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp) */ bool ice_ptp_lock(struct ice_hw *hw) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u32 hw_lock; int i; + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + #define MAX_TRIES 15 for (i = 0; i < MAX_TRIES; i++) { @@ -5283,6 +5297,11 @@ bool ice_ptp_lock(struct ice_hw *hw) */ void ice_ptp_unlock(struct ice_hw *hw) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); + + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index e0774a640955..73f08d02f9c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2594,8 +2594,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) ice_trace(xmit_frame_ring, tx_ring, skb); - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_buf[tx_ring->next_to_use]; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto out_drop; count = ice_xmit_desc_count(skb); if (ice_chk_linearize(skb, count)) { @@ -2622,6 +2622,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) offload.tx_ring = tx_ring; + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buf[tx_ring->next_to_use]; first->skb = skb; first->type = ICE_TX_BUF_SKB; first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); @@ -2686,7 +2688,6 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) out_drop: ice_trace(xmit_frame_ring_drop, tx_ring, skb); dev_kfree_skb_any(skb); - first->type = ICE_TX_BUF_EMPTY; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c index 370f6ec2a374..a82024bf388b 100644 --- a/drivers/net/ethernet/intel/ice/virt/queues.c +++ b/drivers/net/ethernet/intel/ice/virt/queues.c @@ -840,7 +840,7 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qpi->rxq.databuffer_size != 0 && (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || - qpi->rxq.databuffer_size < 1024)) + qpi->rxq.databuffer_size < 128)) goto error_param; ring->rx_buf_len = qpi->rxq.databuffer_size; if (qpi->rxq.max_pkt_size > max_frame_size || diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index 0a8b50350b86..31c5593550e1 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -949,6 +949,8 @@ int idpf_ptp_init(struct idpf_adapter *adapter) goto free_ptp; } + spin_lock_init(&adapter->ptp->read_dev_clk_lock); + err = idpf_ptp_create_clock(adapter); if (err) goto free_ptp; @@ -974,8 +976,6 @@ int idpf_ptp_init(struct idpf_adapter *adapter) goto remove_clock; } - spin_lock_init(&adapter->ptp->read_dev_clk_lock); - pci_dbg(adapter->pdev, "PTP init successful\n"); return 0; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 8a110145bfee..52de2bcbadbe 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -34,6 +34,7 @@ static int igc_fpe_init_smd_frame(struct igc_ring *ring, return -ENOMEM; } + buffer->type = IGC_TX_BUFFER_TYPE_SKB; buffer->skb = skb; buffer->protocol = 0; buffer->bytecount = skb->len; @@ -109,10 +110,16 @@ static int igc_fpe_xmit_smd_frame(struct igc_adapter *adapter, __netif_tx_lock(nq, cpu); err = igc_fpe_init_tx_descriptor(ring, skb, type); - igc_flush_tx_descriptors(ring); + if (err) + goto err_free_skb_any; + igc_flush_tx_descriptors(ring); __netif_tx_unlock(nq); + return 0; +err_free_skb_any: + __netif_tx_unlock(nq); + dev_kfree_skb_any(skb); return err; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d5ce20f47def..c999abd78481 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1221,6 +1221,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, ether_addr_equal(rx_ring->netdev->dev_addr, eth_hdr(skb)->h_source)) { dev_kfree_skb_irq(skb); + skb = NULL; continue; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 81b55f1416e0..c6b5f6564ed0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1294,13 +1294,18 @@ static inline void link_status_user_format(u64 lstat, struct cgx_link_user_info *linfo, struct cgx *cgx, u8 lmac_id) { + unsigned int speed; + linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); - linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat); linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat); linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat); + speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat); + linfo->speed = speed < ARRAY_SIZE(cgx_speed_mbps) ? + cgx_speed_mbps[speed] : 0; + if (linfo->lmac_type_id >= LMAC_MODE_MAX) { dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d", linfo->lmac_type_id, cgx->cgx_id, lmac_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 8658cb2143df..e28675fe1890 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -837,7 +837,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u16 vf_func; /* Only CGX PF/VF can add allmulticast entry */ - if (is_lbk_vf(rvu, pcifunc) && is_sdp_vf(rvu, pcifunc)) + if (is_lbk_vf(rvu, pcifunc) || is_sdp_vf(rvu, pcifunc)) return; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index b476733a0234..3271ab5539a5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -609,7 +609,7 @@ static int rvu_rep_rsrc_init(struct otx2_nic *priv) err = otx2_init_hw_resources(priv); if (err) - goto err_free_rsrc; + goto err_free_mem; /* Set maximum frame size allowed in HW */ err = otx2_hw_set_mtu(priv, priv->hw.max_mtu); @@ -621,6 +621,7 @@ static int rvu_rep_rsrc_init(struct otx2_nic *priv) err_free_rsrc: otx2_free_hw_resources(priv); +err_free_mem: otx2_free_queue_mem(qset); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index d6ace2b6fc1d..198033620c83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -159,13 +159,13 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) * channels are being closed for other reason and this work is not * relevant anymore. */ - while (!netdev_trylock(sq->netdev)) { + while (!netdev_trylock(priv->netdev)) { if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) return 0; msleep(20); } - err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); + err = mlx5e_health_channel_eq_recover(priv->netdev, eq, sq->cq.ch_stats); if (!err) { to_ctx->status = 0; /* this sq recovered */ goto out; @@ -185,7 +185,7 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", err); out: - netdev_unlock(sq->netdev); + netdev_unlock(priv->netdev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index f03507a522b4..51fb857a2766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -794,8 +794,10 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, sa_entry->dev = dev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ - if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) - goto out; + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) { + x->xso.offload_handle = (unsigned long)sa_entry; + return 0; + } err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); if (err) @@ -872,7 +874,6 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, xa_unlock_bh(&ipsec->sadb); } -out: x->xso.offload_handle = (unsigned long)sa_entry; if (allow_tunnel_mode) mlx5_eswitch_unblock_encap(priv->mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 3cfe743610d3..ab50d2c734ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -142,7 +142,8 @@ static int mlx5_esw_ipsec_modify_flow_dests(struct mlx5_eswitch *esw, attr = flow->attr; esw_attr = attr->esw_attr; - if (esw_attr->out_count - esw_attr->split_count > 1) + if (!esw_attr->out_count || + esw_attr->out_count - esw_attr->split_count > 1) return 0; err = mlx5_eswitch_restore_ipsec_rule(esw, flow->rule[0], esw_attr, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 47752d3fde0b..1179a6e127c5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -749,11 +749,10 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) for (p = 0; p < lan966x->num_phys_ports; p++) { port = lan966x->ports[p]; - if (!port) + if (!port || !port->dev) continue; - if (port->dev) - unregister_netdev(port->dev); + unregister_netdev(port->dev); lan966x_xdp_port_deinit(port); if (lan966x->fdma && lan966x->fdma_ndev == port->dev) @@ -873,6 +872,9 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, err = register_netdev(dev); if (err) { dev_err(lan966x->dev, "register_netdev failed\n"); + phylink_destroy(phylink); + port->phylink = NULL; + port->dev = NULL; return err; } diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 840c6b8957c9..5faf4ca75b0f 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -77,21 +77,19 @@ static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, } static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, - struct hwc_work_request *rx_req) + struct hwc_work_request *rx_req, u16 msg_id) { const struct gdma_resp_hdr *resp_msg = rx_req->buf_va; struct hwc_caller_ctx *ctx; int err; - if (!test_bit(resp_msg->response.hwc_msg_id, - hwc->inflight_msg_res.map)) { - dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", - resp_msg->response.hwc_msg_id); + if (!test_bit(msg_id, hwc->inflight_msg_res.map)) { + dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", msg_id); mana_hwc_post_rx_wqe(hwc->rxq, rx_req); return; } - ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; + ctx = hwc->caller_ctx + msg_id; err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); if (err) goto out; @@ -251,6 +249,7 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, struct gdma_sge *sge; u64 rq_base_addr; u64 rx_req_idx; + u16 msg_id; u8 *wqe; if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id)) @@ -266,16 +265,26 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; + if (rx_req_idx >= hwc_rxq->msg_buf->num_reqs) { + dev_err(hwc->dev, "HWC RX: wrong rx_req_idx=%llu, num_reqs=%u\n", + rx_req_idx, hwc_rxq->msg_buf->num_reqs); + return; + } + rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; resp = (struct gdma_resp_hdr *)rx_req->buf_va; - if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { - dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", - resp->response.hwc_msg_id); + /* Read msg_id once from DMA buffer to prevent TOCTOU: + * DMA memory is shared/unencrypted in CVMs - host can + * modify it between reads. + */ + msg_id = READ_ONCE(resp->response.hwc_msg_id); + if (msg_id >= hwc->num_inflight_msg) { + dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", msg_id); return; } - mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req); + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req, msg_id); /* Can no longer use 'resp', because the buffer is posted to the HW * in mana_hwc_handle_resp() above. diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 33f4f58ee51c..1fb09372c25a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -1038,11 +1038,13 @@ static void qed_cid_map_free(struct qed_hwfn *p_hwfn) for (type = 0; type < MAX_CONN_TYPES; type++) { bitmap_free(p_mngr->acquired[type].cid_map); + p_mngr->acquired[type].cid_map = NULL; p_mngr->acquired[type].max_count = 0; p_mngr->acquired[type].start_cid = 0; for (vf = 0; vf < MAX_NUM_VFS; vf++) { bitmap_free(p_mngr->acquired_vf[type][vf].cid_map); + p_mngr->acquired_vf[type][vf].cid_map = NULL; p_mngr->acquired_vf[type][vf].max_count = 0; p_mngr->acquired_vf[type][vf].start_cid = 0; } diff --git a/drivers/net/ethernet/ti/icssm/icssm_prueth.c b/drivers/net/ethernet/ti/icssm/icssm_prueth.c index 293b7af04263..cc92f2068584 100644 --- a/drivers/net/ethernet/ti/icssm/icssm_prueth.c +++ b/drivers/net/ethernet/ti/icssm/icssm_prueth.c @@ -1347,6 +1347,7 @@ static int icssm_prueth_probe(struct platform_device *pdev) dev_err(dev, "%pOF error reading port_id %d\n", eth_node, ret); of_node_put(eth_node); + of_node_put(eth_ports_node); return ret; } diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index d3dc0914450a..0534d2471e61 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -211,12 +211,12 @@ static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf) switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < dev->real_num_rx_queues; i++) + for (i = 0; i < dev->num_tx_queues; i++) for (j = 0; j < IFB_Q_STATS_LEN; j++) ethtool_sprintf(&p, "rx_queue_%u_%.18s", i, ifb_q_stats_desc[j].desc); - for (i = 0; i < dev->real_num_tx_queues; i++) + for (i = 0; i < dev->num_tx_queues; i++) for (j = 0; j < IFB_Q_STATS_LEN; j++) ethtool_sprintf(&p, "tx_queue_%u_%.18s", i, ifb_q_stats_desc[j].desc); @@ -229,8 +229,7 @@ static int ifb_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return IFB_Q_STATS_LEN * (dev->real_num_rx_queues + - dev->real_num_tx_queues); + return IFB_Q_STATS_LEN * dev->num_tx_queues * 2; default: return -EOPNOTSUPP; } @@ -262,12 +261,12 @@ static void ifb_get_ethtool_stats(struct net_device *dev, struct ifb_q_private *txp; int i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = 0; i < dev->num_tx_queues; i++) { txp = dp->tx_private + i; ifb_fill_stats_data(&data, &txp->rx_stats); } - for (i = 0; i < dev->real_num_tx_queues; i++) { + for (i = 0; i < dev->num_tx_queues; i++) { txp = dp->tx_private + i; ifb_fill_stats_data(&data, &txp->tx_stats); } diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index 955c9a37e1f8..c03e58e28a86 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -196,7 +196,7 @@ void ovpn_decrypt_post(void *data, int ret) skb = NULL; drop: if (unlikely(skb)) - dev_dstats_rx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_rx_dropped(peer->ovpn->dev); kfree_skb(skb); drop_nocount: if (likely(peer)) @@ -220,7 +220,7 @@ void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb) net_info_ratelimited("%s: no available key for peer %u, key-id: %u\n", netdev_name(peer->ovpn->dev), peer->id, key_id); - dev_dstats_rx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_rx_dropped(peer->ovpn->dev); kfree_skb(skb); ovpn_peer_put(peer); return; @@ -298,7 +298,7 @@ err_unlock: rcu_read_unlock(); err: if (unlikely(skb)) - dev_dstats_tx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_tx_dropped(peer->ovpn->dev); if (likely(peer)) ovpn_peer_put(peer); if (likely(ks)) @@ -340,7 +340,7 @@ static void ovpn_send(struct ovpn_priv *ovpn, struct sk_buff *skb, */ skb_list_walk_safe(skb, curr, next) { if (unlikely(!ovpn_encrypt_one(peer, curr))) { - dev_dstats_tx_dropped(ovpn->dev); + ovpn_dev_dstats_tx_dropped(ovpn->dev); kfree_skb(curr); } } @@ -411,7 +411,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!curr)) { net_err_ratelimited("%s: skb_share_check failed for payload packet\n", netdev_name(dev)); - dev_dstats_tx_dropped(ovpn->dev); + ovpn_dev_dstats_tx_dropped(ovpn->dev); continue; } @@ -437,7 +437,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev) drop: ovpn_peer_put(peer); drop_no_peer: - dev_dstats_tx_dropped(ovpn->dev); + ovpn_dev_dstats_tx_dropped(ovpn->dev); skb_tx_error(skb); kfree_skb_list(skb); return NETDEV_TX_OK; diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c index 1bb1afe766a4..3f76b1b0e5f6 100644 --- a/drivers/net/ovpn/main.c +++ b/drivers/net/ovpn/main.c @@ -92,6 +92,8 @@ static void ovpn_net_uninit(struct net_device *dev) { struct ovpn_priv *ovpn = netdev_priv(dev); + disable_delayed_work_sync(&ovpn->keepalive_work); + ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN); gro_cells_destroy(&ovpn->gro_cells); } @@ -208,15 +210,6 @@ static int ovpn_newlink(struct net_device *dev, return register_netdevice(dev); } -static void ovpn_dellink(struct net_device *dev, struct list_head *head) -{ - struct ovpn_priv *ovpn = netdev_priv(dev); - - cancel_delayed_work_sync(&ovpn->keepalive_work); - ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN); - unregister_netdevice_queue(dev, head); -} - static int ovpn_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ovpn_priv *ovpn = netdev_priv(dev); @@ -235,7 +228,6 @@ static struct rtnl_link_ops ovpn_link_ops = { .policy = ovpn_policy, .maxtype = IFLA_OVPN_MAX, .newlink = ovpn_newlink, - .dellink = ovpn_dellink, .fill_info = ovpn_fill_info, }; diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index c7f382437630..bdb56ef0c904 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -455,10 +455,12 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info) sock_release: ovpn_socket_release(peer); peer_release: - /* release right away because peer was not yet hashed, thus it is not - * used in any context + /* For UDP, the peer is unreachable until added to the hashtables, so + * dropping the initial reference is enough. For TCP, the peer may be + * concurrently reachable via sk_user_data->peer until + * ovpn_socket_release() detaches; rely on the refcount. */ - ovpn_peer_release(peer); + ovpn_peer_put(peer); return ret; } diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index 4bfcab0c8652..87a83321f1dd 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -348,7 +348,7 @@ static void ovpn_peer_release_rcu(struct rcu_head *head) * ovpn_peer_release - release peer private members * @peer: the peer to release */ -void ovpn_peer_release(struct ovpn_peer *peer) +static void ovpn_peer_release(struct ovpn_peer *peer) { ovpn_crypto_state_release(&peer->crypto); spin_lock_bh(&peer->lock); @@ -1029,14 +1029,29 @@ static int ovpn_peer_add_p2p(struct ovpn_priv *ovpn, struct ovpn_peer *peer) */ int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer) { + int ret = -ENODEV; + + /* Prevent adding new peers while destroying the ovpn interface. + * Failing to do so would end up holding the device reference + * endlessly hostage of the new peer object with no chance of + * release.. + */ + netdev_lock(ovpn->dev); + if (ovpn->dev->reg_state != NETREG_REGISTERED) + goto out; + switch (ovpn->mode) { case OVPN_MODE_MP: - return ovpn_peer_add_mp(ovpn, peer); + ret = ovpn_peer_add_mp(ovpn, peer); + break; case OVPN_MODE_P2P: - return ovpn_peer_add_p2p(ovpn, peer); + ret = ovpn_peer_add_p2p(ovpn, peer); + break; } +out: + netdev_unlock(ovpn->dev); - return -EOPNOTSUPP; + return ret; } /** diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h index a1423f2b09e0..4de5aeae33f7 100644 --- a/drivers/net/ovpn/peer.h +++ b/drivers/net/ovpn/peer.h @@ -125,7 +125,6 @@ static inline bool ovpn_peer_hold(struct ovpn_peer *peer) return kref_get_unless_zero(&peer->refcount); } -void ovpn_peer_release(struct ovpn_peer *peer); void ovpn_peer_release_kref(struct kref *kref); /** diff --git a/drivers/net/ovpn/stats.h b/drivers/net/ovpn/stats.h index 53433d8b6c33..3a45b97c0056 100644 --- a/drivers/net/ovpn/stats.h +++ b/drivers/net/ovpn/stats.h @@ -11,6 +11,8 @@ #ifndef _NET_OVPN_OVPNSTATS_H_ #define _NET_OVPN_OVPNSTATS_H_ +#include <linux/netdevice.h> + /* one stat */ struct ovpn_peer_stat { atomic64_t bytes; @@ -44,4 +46,18 @@ static inline void ovpn_peer_stats_increment_tx(struct ovpn_peer_stats *stats, ovpn_peer_stats_increment(&stats->tx, n); } +static inline void ovpn_dev_dstats_tx_dropped(struct net_device *dev) +{ + local_bh_disable(); + dev_dstats_tx_dropped(dev); + local_bh_enable(); +} + +static inline void ovpn_dev_dstats_rx_dropped(struct net_device *dev) +{ + local_bh_disable(); + dev_dstats_rx_dropped(dev); + local_bh_enable(); +} + #endif /* _NET_OVPN_OVPNSTATS_H_ */ diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index 5499c1572f3e..505c2f214c9f 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -152,7 +152,7 @@ err: if (WARN_ON(!ovpn_peer_hold(peer))) goto err_nopeer; schedule_work(&peer->tcp.defer_del_work); - dev_dstats_rx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_rx_dropped(peer->ovpn->dev); err_nopeer: kfree_skb(skb); } @@ -298,9 +298,9 @@ static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk) } while (peer->tcp.out_msg.len > 0); if (!peer->tcp.out_msg.len) { - preempt_disable(); + local_bh_disable(); dev_dstats_tx_add(peer->ovpn->dev, skb->len); - preempt_enable(); + local_bh_enable(); } kfree_skb(peer->tcp.out_msg.skb); @@ -331,7 +331,7 @@ static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk, ovpn_tcp_send_sock(peer, sk); if (peer->tcp.out_msg.skb) { - dev_dstats_tx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_tx_dropped(peer->ovpn->dev); kfree_skb(skb); return; } @@ -353,7 +353,7 @@ void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk, if (sock_owned_by_user(sk)) { if (skb_queue_len(&peer->tcp.out_queue) >= READ_ONCE(net_hotdata.max_backlog)) { - dev_dstats_tx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_tx_dropped(peer->ovpn->dev); kfree_skb(skb); goto unlock; } @@ -581,14 +581,19 @@ static void ovpn_tcp_close(struct sock *sk, long timeout) rcu_read_lock(); sock = rcu_dereference_sk_user_data(sk); - if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) { + if (!sock) { rcu_read_unlock(); return; } + peer = sock->peer; + if (!peer || !ovpn_peer_hold(peer)) { + rcu_read_unlock(); + return; + } rcu_read_unlock(); - ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); + ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); peer->tcp.sk_cb.prot->close(sk, timeout); ovpn_peer_put(peer); } diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c index 272b535ecaad..367563d84472 100644 --- a/drivers/net/ovpn/udp.c +++ b/drivers/net/ovpn/udp.c @@ -126,7 +126,7 @@ static int ovpn_udp_encap_recv(struct sock *sk, struct sk_buff *skb) return 0; drop: - dev_dstats_rx_dropped(ovpn->dev); + ovpn_dev_dstats_rx_dropped(ovpn->dev); drop_noovpn: kfree_skb(skb); return 0; diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index e480c2a07450..252fb12b3e68 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -393,6 +393,7 @@ static struct phy_driver dp83811_driver[] = { .config_init = dp83811_config_init, .config_aneg = dp83811_config_aneg, .soft_reset = dp83811_phy_reset, + .get_features = genphy_c45_pma_read_ext_abilities, .get_wol = dp83811_get_wol, .set_wol = dp83811_set_wol, .config_intr = dp83811_config_intr, diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 61670be0f095..d2e36e460cc7 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -939,6 +939,14 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_eee_abilities); */ int genphy_c45_an_config_eee_aneg(struct phy_device *phydev) { + /* Writing MMD AN advertisements while autoneg is disabled has no + * effect on link-partner negotiation, but on some PHYs (e.g. the + * Broadcom BCM54213PE) the write itself disturbs the receive + * datapath. Skip it. + */ + if (phydev->autoneg == AUTONEG_DISABLE) + return 0; + if (!phydev->eee_cfg.eee_enabled) { __ETHTOOL_DECLARE_LINK_MODE_MASK(adv) = {}; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index dea8b94286d1..78cf05a17f8f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2792,7 +2792,8 @@ EXPORT_SYMBOL(phy_advertise_supported); */ void phy_advertise_eee_all(struct phy_device *phydev) { - linkmode_copy(phydev->advertising_eee, phydev->supported_eee); + linkmode_andnot(phydev->advertising_eee, phydev->supported_eee, + phydev->eee_disabled_modes); } EXPORT_SYMBOL_GPL(phy_advertise_eee_all); @@ -2818,7 +2819,8 @@ EXPORT_SYMBOL_GPL(phy_advertise_eee_all); */ void phy_support_eee(struct phy_device *phydev) { - linkmode_copy(phydev->advertising_eee, phydev->supported_eee); + linkmode_andnot(phydev->advertising_eee, phydev->supported_eee, + phydev->eee_disabled_modes); phydev->eee_cfg.tx_lpi_enabled = true; phydev->eee_cfg.eee_enabled = true; } diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 23eb3c9d0bcd..af354483d191 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -210,7 +210,7 @@ static int of_load_pse_pis(struct pse_controller_dev *pcdev) ret = of_load_pse_pi_pairsets(node, &pi, ret); if (ret) goto out; - } else if (ret != ENOENT) { + } else if (ret != -ENOENT) { dev_err(pcdev->dev, "error: wrong number of pairsets. Should be 1 or 2, got %d (%pOF)\n", ret, node); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1197f245e873..6fd3b14273b3 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -919,11 +919,11 @@ static long tap_ioctl(struct file *file, unsigned int cmd, struct tap_queue *q = file->private_data; struct tap_dev *tap; void __user *argp = (void __user *)arg; + struct sockaddr_storage ss = {}; struct ifreq __user *ifr = argp; unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; - struct sockaddr_storage ss; int s; int ret; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index ce22141e5efd..cd20508399b9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3,7 +3,6 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ @@ -1947,15 +1946,15 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) ret = -ESHUTDOWN; ath10k_dbg(ar, ATH10K_DBG_WMI, "drop wmi command %d, hardware is wedged\n", cmd_id); - } - /* try to send pending beacons first. they take priority */ - ath10k_wmi_tx_beacons_nowait(ar); + } else { + /* try to send pending beacons first. they take priority */ + ath10k_wmi_tx_beacons_nowait(ar); - ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id); - - if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) - ret = -ESHUTDOWN; + ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id); + if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) + ret = -ESHUTDOWN; + } (ret != -EAGAIN); }), 3 * HZ); diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 44eea682c297..5666f6647445 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2214,8 +2214,7 @@ ath11k_dp_rx_h_find_peer(struct ath11k_base *ab, struct sk_buff *msdu) lockdep_assert_held(&ab->base_lock); - if (rxcb->peer_id) - peer = ath11k_peer_find_by_id(ab, rxcb->peer_id); + peer = ath11k_peer_find_by_id(ab, rxcb->peer_id); if (peer) return peer; diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c index 0c797b8d0a27..8330c9e7ac7d 100644 --- a/drivers/net/wireless/ath/ath11k/hal.c +++ b/drivers/net/wireless/ath/ath11k/hal.c @@ -1388,14 +1388,22 @@ EXPORT_SYMBOL(ath11k_hal_srng_deinit); void ath11k_hal_srng_clear(struct ath11k_base *ab) { - /* No need to memset rdp and wrp memory since each individual - * segment would get cleared in ath11k_hal_srng_src_hw_init() - * and ath11k_hal_srng_dst_hw_init(). + /* + * Preserve the shared pointer buffers, but clear the previous + * firmware instance's hp/tp state before handing them back to FW. + * LMAC rings reuse this shared memory without going through the + * normal SRNG hw-init path that zeros non-LMAC ring pointers. */ memset(ab->hal.srng_list, 0, sizeof(ab->hal.srng_list)); memset(ab->hal.shadow_reg_addr, 0, sizeof(ab->hal.shadow_reg_addr)); + if (ab->hal.rdp.vaddr) + memset(ab->hal.rdp.vaddr, 0, + sizeof(*ab->hal.rdp.vaddr) * HAL_SRNG_RING_ID_MAX); + if (ab->hal.wrp.vaddr) + memset(ab->hal.wrp.vaddr, 0, + sizeof(*ab->hal.wrp.vaddr) * HAL_SRNG_NUM_LMAC_RINGS); ab->hal.avail_blk_resource = 0; ab->hal.current_blk_index = 0; ab->hal.num_shadow_reg_configured = 0; diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c index 753bd93f0212..51e0840bc0d1 100644 --- a/drivers/net/wireless/ath/ath11k/hal_rx.c +++ b/drivers/net/wireless/ath/ath11k/hal_rx.c @@ -1467,11 +1467,8 @@ ath11k_hal_rx_parse_mon_status_tlv(struct ath11k_base *ab, case HAL_RX_MPDU_START: { struct hal_rx_mpdu_info *mpdu_info = (struct hal_rx_mpdu_info *)tlv_data; - u16 peer_id; - peer_id = ath11k_hal_rx_mpduinfo_get_peerid(ab, mpdu_info); - if (peer_id) - ppdu_info->peer_id = peer_id; + ppdu_info->peer_id = ath11k_hal_rx_mpduinfo_get_peerid(ab, mpdu_info); break; } case HAL_RXPCU_PPDU_END_INFO: { diff --git a/drivers/net/wireless/ath/ath11k/testmode.c b/drivers/net/wireless/ath/ath11k/testmode.c index a9751ea2a0b7..c72eed358f6d 100644 --- a/drivers/net/wireless/ath/ath11k/testmode.c +++ b/drivers/net/wireless/ath/ath11k/testmode.c @@ -457,6 +457,7 @@ static int ath11k_tm_cmd_wmi_ftm(struct ath11k *ar, struct nlattr *tb[]) ret = ath11k_wmi_cmd_send(wmi, skb, cmd_id); if (ret) { ath11k_warn(ar->ab, "failed to send wmi ftm command: %d\n", ret); + dev_kfree_skb(skb); goto out; } diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 110035dae8a6..e1b00dc811e7 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -9191,6 +9191,7 @@ int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar) struct wmi_wow_host_wakeup_ind *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9204,14 +9205,20 @@ int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar) ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow host wakeup ind\n"); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_wow_enable(struct ath11k *ar) { struct wmi_wow_enable_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9226,7 +9233,13 @@ int ath11k_wmi_wow_enable(struct ath11k *ar) cmd->pause_iface_config = WOW_IFACE_PAUSE_ENABLED; ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow enable\n"); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index f6f52d297a72..e67ba3a24d02 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation */ #include "constants.h" @@ -501,7 +501,6 @@ void iwl_mld_remove_link(struct iwl_mld *mld, struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif); struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); bool is_deflink = link == &mld_vif->deflink; - u8 fw_id = link->fw_id; if (WARN_ON(!link || link->active)) return; @@ -509,15 +508,15 @@ void iwl_mld_remove_link(struct iwl_mld *mld, iwl_mld_rm_link_from_fw(mld, bss_conf); /* Continue cleanup on failure */ - if (!is_deflink) - kfree_rcu(link, rcu_head); - RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL); - if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links)) + if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links)) return; - RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL); + RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL); + + if (!is_deflink) + kfree_rcu(link, rcu_head); } void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tx.c b/drivers/net/wireless/intel/iwlwifi/mld/tx.c index e3fb4fc4f452..a60bfb1a2ab2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tx.c @@ -828,7 +828,7 @@ static int iwl_mld_tx_tso_segment(struct iwl_mld *mld, struct sk_buff *skb, return -EINVAL; max_tid_amsdu_len = sta->cur->max_tid_amsdu_len[tid]; - if (!max_tid_amsdu_len) + if (!max_tid_amsdu_len || max_tid_amsdu_len == 1) return iwl_tx_tso_segment(skb, 1, netdev_flags, mpdus_skbs); /* Sub frame header + SNAP + IP header + TCP header + MSS */ @@ -840,6 +840,9 @@ static int iwl_mld_tx_tso_segment(struct iwl_mld *mld, struct sk_buff *skb, */ num_subframes = (max_tid_amsdu_len + pad) / (subf_len + pad); + if (WARN_ON_ONCE(!num_subframes)) + return iwl_tx_tso_segment(skb, 1, netdev_flags, mpdus_skbs); + if (sta->max_amsdu_subframes && num_subframes > sta->max_amsdu_subframes) num_subframes = sta->max_amsdu_subframes; @@ -965,6 +968,16 @@ void iwl_mld_tx_from_txq(struct iwl_mld *mld, struct ieee80211_txq *txq) u8 zero_addr[ETH_ALEN] = {}; /* + * Don't transmit during firmware restart. The firmware is dead, + * so iwl_trans_tx() would return -EIO for each frame. Avoid the + * overhead of dequeuing from mac80211 only to immediately free + * the skbs, and the potential memory pressure from rapid skb + * allocation churn during high-throughput restart scenarios. + */ + if (unlikely(mld->fw_status.in_hw_restart)) + return; + + /* * No need for threads to be pending here, they can leave the first * taker all the work. * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 49ffc4ecee85..44380ebfe09d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -938,13 +938,18 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm, u16 iwl_mvm_mac_ctxt_get_beacon_flags(const struct iwl_fw *fw, u8 rate_idx) { - u16 flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); bool is_new_rate = iwl_fw_lookup_cmd_ver(fw, BEACON_TEMPLATE_CMD, 0) > 10; + u16 flags = 0; if (rate_idx <= IWL_LAST_CCK_RATE) flags |= is_new_rate ? IWL_MAC_BEACON_CCK : IWL_MAC_BEACON_CCK_V1; + if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) + flags |= iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); + else + flags |= iwl_fw_rate_idx_to_plcp(rate_idx); + return flags; } @@ -973,6 +978,7 @@ static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm, { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct ieee80211_tx_info *info; + u32 rate_n_flags = 0; u8 rate; u32 tx_flags; @@ -992,18 +998,21 @@ static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm, IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION)) { iwl_mvm_toggle_tx_ant(mvm, &mvm->mgmt_last_antenna_idx); - tx_params->rate_n_flags = - cpu_to_le32(BIT(mvm->mgmt_last_antenna_idx) << - RATE_MCS_ANT_POS); + rate_n_flags |= BIT(mvm->mgmt_last_antenna_idx) << + RATE_MCS_ANT_POS; } rate = iwl_mvm_mac_ctxt_get_beacon_rate(mvm, info, vif); - tx_params->rate_n_flags |= - cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate)); - if (rate == IWL_FIRST_CCK_RATE) - tx_params->rate_n_flags |= cpu_to_le32(RATE_MCS_CCK_MSK_V1); + if (rate < IWL_FIRST_OFDM_RATE) + rate_n_flags |= RATE_MCS_MOD_TYPE_CCK; + else + rate_n_flags |= RATE_MCS_MOD_TYPE_LEGACY_OFDM; + + rate_n_flags |= iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate); + tx_params->rate_n_flags = iwl_mvm_v3_rate_to_fw(rate_n_flags, + mvm->fw_rates_ver); } int iwl_mvm_mac_ctxt_send_beacon_cmd(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index fa995e235d9b..63e84efdaa46 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -159,15 +159,9 @@ int iwl_mvm_legacy_rate_to_mac80211_idx(u32 rate_n_flags, u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx) { - if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) - /* In the new rate legacy rates are indexed: - * 0 - 3 for CCK and 0 - 7 for OFDM. - */ - return (rate_idx >= IWL_FIRST_OFDM_RATE ? - rate_idx - IWL_FIRST_OFDM_RATE : - rate_idx); - - return iwl_fw_rate_idx_to_plcp(rate_idx); + return rate_idx >= IWL_FIRST_OFDM_RATE ? + rate_idx - IWL_FIRST_OFDM_RATE : + rate_idx; } u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac) diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c index fedc7d59216a..1cc4ee62987d 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan.c @@ -1265,7 +1265,7 @@ int wilc_wlan_firmware_download(struct wilc *wilc, const u8 *buffer, ret = acquire_bus(wilc, WILC_BUS_ACQUIRE_AND_WAKEUP); if (ret) - return ret; + goto fail; wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, ®); reg &= ~BIT(10); diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index 530a3ea47a1a..ce7604500486 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -1426,6 +1426,8 @@ imem_config_fail: protocol_init_fail: cancel_work_sync(&ipc_imem->run_state_worker); ipc_task_deinit(ipc_imem->ipc_task); + if (ipc_imem->ipc_protocol) + ipc_protocol_deinit(ipc_imem->ipc_protocol); ipc_task_init_fail: kfree(ipc_imem->ipc_task); ipc_task_fail: diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index c212fa952c0f..5bbaf257fd6c 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -122,7 +122,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; bool has_metadata = meta_buffer && meta_len; - struct bio *bio = NULL; int ret; if (!nvme_ctrl_sgl_supported(ctrl)) @@ -154,8 +153,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, return ret; out_unmap: - if (bio) - blk_rq_unmap_user(bio); + if (req->bio) + blk_rq_unmap_user(req->bio); return ret; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2e32242bed67..5e36a5926fe0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2320,11 +2320,13 @@ static void nvme_free_host_mem_multi(struct nvme_dev *dev) static void nvme_free_host_mem(struct nvme_dev *dev) { - if (dev->hmb_sgt) + if (dev->hmb_sgt) { dma_free_noncontiguous(dev->dev, dev->host_mem_size, dev->hmb_sgt, DMA_BIDIRECTIONAL); - else + dev->hmb_sgt = NULL; + } else { nvme_free_host_mem_multi(dev); + } dma_free_coherent(dev->dev, dev->host_mem_descs_size, dev->host_mem_descs, dev->host_mem_descs_dma); diff --git a/drivers/phy/marvell/phy-mvebu-a3700-utmi.c b/drivers/phy/marvell/phy-mvebu-a3700-utmi.c index 04f4fb4bed70..f882bc57649c 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-utmi.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-utmi.c @@ -168,9 +168,8 @@ static int mvebu_a3700_utmi_phy_power_off(struct phy *phy) u32 reg; /* Disable PHY pull-up and enable USB2 suspend */ - reg = readl(utmi->regs + USB2_PHY_CTRL(usb32)); - reg &= ~(RB_USB2PHY_PU | RB_USB2PHY_SUSPM(usb32)); - writel(reg, utmi->regs + USB2_PHY_CTRL(usb32)); + regmap_update_bits(utmi->usb_misc, USB2_PHY_CTRL(usb32), + RB_USB2PHY_PU | RB_USB2PHY_SUSPM(usb32), 0); /* Power down OTG module */ if (usb32) { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index dda877561f8c..8af777f7ddd0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1050,6 +1050,7 @@ static const struct qmp_phy_init_tbl sm8750_ufsphy_pcs[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0x40), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x68), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S4, 0x0e), diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 1c8bf80119f1..93ac302de1eb 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1905,13 +1905,14 @@ const struct exynos5_usbdrd_phy_tuning exynos7870_tunes_utmi_postinit[] = { PHYPARAM0_TXPREEMPAMPTUNE | PHYPARAM0_TXHSXVTUNE | PHYPARAM0_TXFSLSTUNE | PHYPARAM0_SQRXTUNE | PHYPARAM0_OTGTUNE | PHYPARAM0_COMPDISTUNE), - (FIELD_PREP_CONST(PHYPARAM0_TXVREFTUNE, 14) | + (FIELD_PREP_CONST(PHYPARAM0_TXVREFTUNE, 3) | FIELD_PREP_CONST(PHYPARAM0_TXRISETUNE, 1) | - FIELD_PREP_CONST(PHYPARAM0_TXRESTUNE, 3) | + FIELD_PREP_CONST(PHYPARAM0_TXRESTUNE, 2) | + FIELD_PREP_CONST(PHYPARAM0_TXPREEMPPULSETUNE, 0) | FIELD_PREP_CONST(PHYPARAM0_TXPREEMPAMPTUNE, 0) | FIELD_PREP_CONST(PHYPARAM0_TXHSXVTUNE, 0) | FIELD_PREP_CONST(PHYPARAM0_TXFSLSTUNE, 3) | - FIELD_PREP_CONST(PHYPARAM0_SQRXTUNE, 6) | + FIELD_PREP_CONST(PHYPARAM0_SQRXTUNE, 5) | FIELD_PREP_CONST(PHYPARAM0_OTGTUNE, 2) | FIELD_PREP_CONST(PHYPARAM0_COMPDISTUNE, 3))), PHY_TUNING_ENTRY_LAST diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index bec9616c4a2e..4452e73fb82a 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -20,8 +20,8 @@ /* FUSE USB_CALIB registers */ #define HS_CURR_LEVEL_PADX_SHIFT(x) ((x) ? (11 + (x - 1) * 6) : 0) #define HS_CURR_LEVEL_PAD_MASK 0x3f -#define HS_TERM_RANGE_ADJ_SHIFT 7 -#define HS_TERM_RANGE_ADJ_MASK 0xf +#define HS_TERM_RANGE_ADJ_PADX_SHIFT(x) ((x) ? (5 + (x - 1) * 4) : 7) +#define HS_TERM_RANGE_ADJ_PAD_MASK 0xf #define HS_SQUELCH_SHIFT 29 #define HS_SQUELCH_MASK 0x7 @@ -253,7 +253,7 @@ struct tegra_xusb_fuse_calibration { u32 *hs_curr_level; u32 hs_squelch; - u32 hs_term_range_adj; + u32 *hs_term_range_adj; u32 rpd_ctrl; }; @@ -930,7 +930,7 @@ static int tegra186_utmi_phy_power_on(struct phy *phy) value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); value &= ~TERM_RANGE_ADJ(~0); - value |= TERM_RANGE_ADJ(priv->calib.hs_term_range_adj); + value |= TERM_RANGE_ADJ(priv->calib.hs_term_range_adj[index]); value &= ~RPD_CTRL(~0); value |= RPD_CTRL(priv->calib.rpd_ctrl); padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); @@ -1464,17 +1464,23 @@ static const char * const tegra186_usb3_functions[] = { static int tegra186_xusb_read_fuse_calibration(struct tegra186_xusb_padctl *padctl) { + const struct tegra_xusb_padctl_soc *soc = padctl->base.soc; struct device *dev = padctl->base.dev; unsigned int i, count; u32 value, *level; + u32 *hs_term_range_adj; int err; - count = padctl->base.soc->ports.usb2.count; + count = soc->ports.usb2.count; level = devm_kcalloc(dev, count, sizeof(u32), GFP_KERNEL); if (!level) return -ENOMEM; + hs_term_range_adj = devm_kcalloc(dev, count, sizeof(u32), GFP_KERNEL); + if (!hs_term_range_adj) + return -ENOMEM; + err = tegra_fuse_readl(TEGRA_FUSE_SKU_CALIB_0, &value); if (err) return dev_err_probe(dev, err, @@ -1490,8 +1496,8 @@ tegra186_xusb_read_fuse_calibration(struct tegra186_xusb_padctl *padctl) padctl->calib.hs_squelch = (value >> HS_SQUELCH_SHIFT) & HS_SQUELCH_MASK; - padctl->calib.hs_term_range_adj = (value >> HS_TERM_RANGE_ADJ_SHIFT) & - HS_TERM_RANGE_ADJ_MASK; + hs_term_range_adj[0] = (value >> HS_TERM_RANGE_ADJ_PADX_SHIFT(0)) & + HS_TERM_RANGE_ADJ_PAD_MASK; err = tegra_fuse_readl(TEGRA_FUSE_USB_CALIB_EXT_0, &value); if (err) { @@ -1503,6 +1509,17 @@ tegra186_xusb_read_fuse_calibration(struct tegra186_xusb_padctl *padctl) padctl->calib.rpd_ctrl = (value >> RPD_CTRL_SHIFT) & RPD_CTRL_MASK; + for (i = 1; i < count; i++) { + if (soc->has_per_pad_term) + hs_term_range_adj[i] = + (value >> HS_TERM_RANGE_ADJ_PADX_SHIFT(i)) & + HS_TERM_RANGE_ADJ_PAD_MASK; + else + hs_term_range_adj[i] = hs_term_range_adj[0]; + } + + padctl->calib.hs_term_range_adj = hs_term_range_adj; + return 0; } @@ -1708,6 +1725,7 @@ const struct tegra_xusb_padctl_soc tegra194_xusb_padctl_soc = { .num_supplies = ARRAY_SIZE(tegra194_xusb_padctl_supply_names), .supports_gen2 = true, .poll_trk_completed = true, + .has_per_pad_term = true, }; EXPORT_SYMBOL_GPL(tegra194_xusb_padctl_soc); @@ -1732,6 +1750,7 @@ const struct tegra_xusb_padctl_soc tegra234_xusb_padctl_soc = { .trk_hw_mode = false, .trk_update_on_idle = true, .supports_lp_cfg_en = true, + .has_per_pad_term = true, }; EXPORT_SYMBOL_GPL(tegra234_xusb_padctl_soc); #endif diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h index d2b5f9565132..810b410672f3 100644 --- a/drivers/phy/tegra/xusb.h +++ b/drivers/phy/tegra/xusb.h @@ -436,6 +436,7 @@ struct tegra_xusb_padctl_soc { bool trk_hw_mode; bool trk_update_on_idle; bool supports_lp_cfg_en; + bool has_per_pad_term; }; struct tegra_xusb_padctl { diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index e2293a872dcb..35d27626a336 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -292,7 +292,7 @@ static int aml_calc_reg_and_bit(struct pinctrl_gpio_range *range, static int aml_pinconf_get_pull(struct aml_pinctrl *info, unsigned int pin) { struct pinctrl_gpio_range *range = - pinctrl_find_gpio_range_from_pin(info->pctl, pin); + pinctrl_find_gpio_range_from_pin_nolock(info->pctl, pin); struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); unsigned int reg, bit, val; int ret, conf; @@ -326,7 +326,7 @@ static int aml_pinconf_get_drive_strength(struct aml_pinctrl *info, u16 *drive_strength_ua) { struct pinctrl_gpio_range *range = - pinctrl_find_gpio_range_from_pin(info->pctl, pin); + pinctrl_find_gpio_range_from_pin_nolock(info->pctl, pin); struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); unsigned int reg, bit; unsigned int val; @@ -365,7 +365,7 @@ static int aml_pinconf_get_gpio_bit(struct aml_pinctrl *info, unsigned int reg_type) { struct pinctrl_gpio_range *range = - pinctrl_find_gpio_range_from_pin(info->pctl, pin); + pinctrl_find_gpio_range_from_pin_nolock(info->pctl, pin); struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); unsigned int reg, bit, val; int ret; diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c index 6ede3149b6e1..07df812fb728 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c @@ -480,7 +480,7 @@ static const struct pinfunction ipq4019_functions[] = { QCA_PIN_FUNCTION(blsp_uart0), QCA_PIN_FUNCTION(blsp_uart1), QCA_PIN_FUNCTION(chip_rst), - QCA_PIN_FUNCTION(gpio), + QCA_GPIO_PIN_FUNCTION(gpio), QCA_PIN_FUNCTION(i2s_rx), QCA_PIN_FUNCTION(i2s_spdif_in), QCA_PIN_FUNCTION(i2s_spdif_out), diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index 4625fa5320a9..120217012a9f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -39,6 +39,11 @@ struct pinctrl_pin_desc; fname##_groups, \ ARRAY_SIZE(fname##_groups)) +#define QCA_GPIO_PIN_FUNCTION(fname) \ + [qca_mux_##fname] = PINCTRL_GPIO_PINFUNCTION(#fname, \ + fname##_groups, \ + ARRAY_SIZE(fname##_groups)) + /** * struct msm_pingroup - Qualcomm pingroup definition * @grp: Generic data of the pin group (name and pins) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index f1c827ddbfbf..4d474c312c10 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1043,11 +1043,11 @@ static const struct msm_pingroup qcs615_groups[] = { static const struct msm_gpio_wakeirq_map qcs615_pdc_map[] = { { 1, 45 }, { 3, 31 }, { 7, 55 }, { 9, 110 }, { 11, 34 }, { 13, 33 }, { 14, 35 }, { 17, 46 }, { 19, 48 }, { 21, 83 }, - { 22, 36 }, { 26, 38 }, { 35, 37 }, { 39, 125 }, { 41, 47 }, - { 47, 49 }, { 48, 51 }, { 50, 52 }, { 51, 123 }, { 55, 56 }, + { 22, 36 }, { 26, 38 }, { 35, 37 }, { 39, 118 }, { 41, 47 }, + { 47, 49 }, { 48, 51 }, { 50, 52 }, { 51, 116 }, { 55, 56 }, { 56, 57 }, { 57, 58 }, { 60, 60 }, { 71, 54 }, { 80, 73 }, { 81, 64 }, { 82, 50 }, { 83, 65 }, { 84, 92 }, { 85, 99 }, - { 86, 67 }, { 87, 84 }, { 88, 124 }, { 89, 122 }, { 90, 69 }, + { 86, 67 }, { 87, 84 }, { 88, 117 }, { 89, 115 }, { 90, 69 }, { 92, 88 }, { 93, 75 }, { 94, 91 }, { 95, 72 }, { 96, 82 }, { 97, 74 }, { 98, 95 }, { 99, 94 }, { 100, 100 }, { 101, 40 }, { 102, 93 }, { 103, 77 }, { 104, 78 }, { 105, 96 }, { 107, 97 }, diff --git a/drivers/pinctrl/qcom/pinctrl-sm8150.c b/drivers/pinctrl/qcom/pinctrl-sm8150.c index ad861cd66958..e4c561a9c50a 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8150.c @@ -1496,18 +1496,18 @@ static const struct msm_gpio_wakeirq_map sm8150_pdc_map[] = { { 3, 31 }, { 5, 32 }, { 8, 33 }, { 9, 34 }, { 10, 100 }, { 12, 104 }, { 24, 37 }, { 26, 38 }, { 27, 41 }, { 28, 42 }, { 30, 39 }, { 36, 43 }, { 37, 44 }, { 38, 30 }, { 39, 118 }, - { 39, 125 }, { 41, 47 }, { 42, 48 }, { 46, 50 }, { 47, 49 }, - { 48, 51 }, { 49, 53 }, { 50, 52 }, { 51, 116 }, { 51, 123 }, + { 41, 47 }, { 42, 48 }, { 46, 50 }, { 47, 49 }, + { 48, 51 }, { 49, 53 }, { 50, 52 }, { 51, 116 }, { 53, 54 }, { 54, 55 }, { 55, 56 }, { 56, 57 }, { 58, 58 }, { 60, 60 }, { 61, 61 }, { 68, 62 }, { 70, 63 }, { 76, 71 }, { 77, 66 }, { 81, 64 }, { 83, 65 }, { 86, 67 }, { 87, 84 }, - { 88, 117 }, { 88, 124 }, { 90, 69 }, { 91, 70 }, { 93, 75 }, + { 88, 117 }, { 90, 69 }, { 91, 70 }, { 93, 75 }, { 95, 72 }, { 96, 73 }, { 97, 74 }, { 101, 40 }, { 103, 77 }, { 104, 78 }, { 108, 79 }, { 112, 80 }, { 113, 81 }, { 114, 82 }, { 117, 85 }, { 118, 101 }, { 119, 87 }, { 120, 88 }, { 121, 89 }, { 122, 90 }, { 123, 91 }, { 124, 92 }, { 125, 93 }, { 129, 94 }, { 132, 105 }, { 133, 83 }, { 134, 36 }, { 136, 97 }, { 142, 103 }, - { 144, 115 }, { 144, 122 }, { 147, 102 }, { 150, 107 }, + { 144, 115 }, { 147, 102 }, { 150, 107 }, { 152, 108 }, { 153, 109 } }; diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index ca360185740a..aaa783c79e6d 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -335,7 +335,7 @@ struct rzg2l_pinctrl_reg_cache { u32 *iolh[2]; u32 *ien[2]; u32 *pupd[2]; - u32 *smt; + u32 *smt[2]; u8 sd_ch[2]; u8 eth_poc[2]; u8 oen; @@ -2723,10 +2723,6 @@ static int rzg2l_pinctrl_reg_cache_alloc(struct rzg2l_pinctrl *pctrl) if (!cache->pfc) return -ENOMEM; - cache->smt = devm_kcalloc(pctrl->dev, nports, sizeof(*cache->smt), GFP_KERNEL); - if (!cache->smt) - return -ENOMEM; - for (u8 i = 0; i < 2; i++) { u32 n_dedicated_pins = pctrl->data->n_dedicated_pins; @@ -2745,6 +2741,11 @@ static int rzg2l_pinctrl_reg_cache_alloc(struct rzg2l_pinctrl *pctrl) if (!cache->pupd[i]) return -ENOMEM; + cache->smt[i] = devm_kcalloc(pctrl->dev, nports, sizeof(*cache->smt[i]), + GFP_KERNEL); + if (!cache->smt[i]) + return -ENOMEM; + /* Allocate dedicated cache. */ dedicated_cache->iolh[i] = devm_kcalloc(pctrl->dev, n_dedicated_pins, sizeof(*dedicated_cache->iolh[i]), @@ -3035,7 +3036,7 @@ static void rzg2l_pinctrl_pm_setup_regs(struct rzg2l_pinctrl *pctrl, bool suspen RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + PUPD(off), cache->pupd[0][port]); if (pincnt >= 4) { - RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + PUPD(off), + RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + PUPD(off) + 4, cache->pupd[1][port]); } } @@ -3052,8 +3053,14 @@ static void rzg2l_pinctrl_pm_setup_regs(struct rzg2l_pinctrl *pctrl, bool suspen } } - if (has_smt) - RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + SMT(off), cache->smt[port]); + if (has_smt) { + RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + SMT(off), + cache->smt[0][port]); + if (pincnt >= 4) { + RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + SMT(off) + 4, + cache->smt[1][port]); + } + } } } diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index a594d5fcfcfd..d29158faba3e 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -295,8 +295,6 @@ static const struct software_node *ssam_node_group_sl6[] = { /* Devices for Surface Laptop 7. */ static const struct software_node *ssam_node_group_sl7[] = { &ssam_node_root, - &ssam_node_bat_ac, - &ssam_node_bat_main, &ssam_node_tmp_perf_profile_with_fan, &ssam_node_fan_speed, &ssam_node_hid_sam_keyboard, diff --git a/drivers/platform/x86/adv_swbutton.c b/drivers/platform/x86/adv_swbutton.c index 6fa60f3fc53c..8f7a26e6de81 100644 --- a/drivers/platform/x86/adv_swbutton.c +++ b/drivers/platform/x86/adv_swbutton.c @@ -48,10 +48,14 @@ static int adv_swbutton_probe(struct platform_device *device) { struct adv_swbutton *button; struct input_dev *input; - acpi_handle handle = ACPI_HANDLE(&device->dev); + acpi_handle handle; acpi_status status; int error; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + button = devm_kzalloc(&device->dev, sizeof(*button), GFP_KERNEL); if (!button) return -ENOMEM; diff --git a/drivers/platform/x86/hp/hp_accel.c b/drivers/platform/x86/hp/hp_accel.c index 10d5af18d639..39b73dc473f1 100644 --- a/drivers/platform/x86/hp/hp_accel.c +++ b/drivers/platform/x86/hp/hp_accel.c @@ -300,6 +300,9 @@ static int lis3lv02d_probe(struct platform_device *device) int ret; lis3_dev.bus_priv = ACPI_COMPANION(&device->dev); + if (!lis3_dev.bus_priv) + return -ENODEV; + lis3_dev.init = lis3lv02d_acpi_init; lis3_dev.read = lis3lv02d_acpi_read; lis3_dev.write = lis3lv02d_acpi_write; diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index c5e80887d0cb..f7e358be1af3 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -682,12 +682,16 @@ static bool button_array_present(struct platform_device *device) static int intel_hid_probe(struct platform_device *device) { - acpi_handle handle = ACPI_HANDLE(&device->dev); unsigned long long mode, dummy; struct intel_hid_priv *priv; + acpi_handle handle; acpi_status status; int err; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + intel_hid_init_dsm(handle); if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_HDMM_FN, &mode)) { diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 232cd12e3c9f..03b5bf461a86 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -275,12 +275,16 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel) static int intel_vbtn_probe(struct platform_device *device) { - acpi_handle handle = ACPI_HANDLE(&device->dev); bool dual_accel, has_buttons, has_switches; struct intel_vbtn_priv *priv; + acpi_handle handle; acpi_status status; int err; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + dual_accel = dual_accel_detect(); has_buttons = acpi_has_method(handle, "VBDL"); has_switches = intel_vbtn_has_switches(handle, dual_accel); diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index d77ca486879f..324c3a33af8a 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -346,8 +346,9 @@ static irqreturn_t tps65219_regulator_irq_handler(int irq, void *data) return IRQ_HANDLED; } - regulator_notifier_call_chain(irq_data->rdev, - irq_data->type->event, NULL); + if (irq_data->rdev) + regulator_notifier_call_chain(irq_data->rdev, + irq_data->type->event, NULL); dev_err(irq_data->dev, "Error IRQ trap %s for %s\n", irq_data->type->event_name, irq_data->type->regulator_name); @@ -398,14 +399,65 @@ static struct tps65219_chip_data chip_info_table[] = { }, }; -static int tps65219_regulator_probe(struct platform_device *pdev) +static bool tps65219_is_regulator_name(const struct tps65219_chip_data *pmic, + const char *name) +{ + int i; + + for (i = 0; i < pmic->common_rdesc_size; i++) + if (!strcmp(pmic->common_rdesc[i].name, name)) + return true; + for (i = 0; i < pmic->rdesc_size; i++) + if (!strcmp(pmic->rdesc[i].name, name)) + return true; + return false; +} + +static int tps65219_register_irqs(struct platform_device *pdev, + struct tps65219 *tps, + struct regulator_dev *rdev, + struct tps65219_regulator_irq_type *irq_types, + int nirqs, + const char *regulator_name) { struct tps65219_regulator_irq_data *irq_data; + int i, irq, error; + + for (i = 0; i < nirqs; i++) { + if (strcmp(irq_types[i].regulator_name, regulator_name)) + continue; + + irq = platform_get_irq_byname(pdev, irq_types[i].irq_name); + if (irq < 0) + return -EINVAL; + + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = &irq_types[i]; + irq_data->rdev = rdev; + + error = devm_request_threaded_irq(tps->dev, irq, NULL, + tps65219_regulator_irq_handler, + IRQF_ONESHOT, + irq_types[i].irq_name, + irq_data); + if (error) + return dev_err_probe(tps->dev, error, + "Failed to request %s IRQ %d\n", + irq_types[i].irq_name, irq); + } + return 0; +} + +static int tps65219_regulator_probe(struct platform_device *pdev) +{ struct tps65219_regulator_irq_type *irq_type; struct tps65219_chip_data *pmic; struct regulator_dev *rdev; int error; - int irq; int i; struct tps65219 *tps = dev_get_drvdata(pdev->dev.parent); @@ -425,6 +477,19 @@ static int tps65219_regulator_probe(struct platform_device *pdev) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to register %s regulator\n", pmic->common_rdesc[i].name); + + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->common_irq_types, + pmic->common_irq_size, + pmic->common_rdesc[i].name); + if (error) + return error; + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->irq_types, + pmic->dev_irq_size, + pmic->common_rdesc[i].name); + if (error) + return error; } for (i = 0; i < pmic->rdesc_size; i++) { @@ -434,52 +499,42 @@ static int tps65219_regulator_probe(struct platform_device *pdev) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to register %s regulator\n", pmic->rdesc[i].name); + + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->common_irq_types, + pmic->common_irq_size, + pmic->rdesc[i].name); + if (error) + return error; + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->irq_types, + pmic->dev_irq_size, + pmic->rdesc[i].name); + if (error) + return error; } + /* Register non-regulator IRQs (TIMEOUT, SENSOR) with rdev=NULL */ for (i = 0; i < pmic->common_irq_size; ++i) { irq_type = &pmic->common_irq_types[i]; - irq = platform_get_irq_byname(pdev, irq_type->irq_name); - if (irq < 0) - return -EINVAL; - - irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - - irq_data->dev = tps->dev; - irq_data->type = irq_type; - error = devm_request_threaded_irq(tps->dev, irq, NULL, - tps65219_regulator_irq_handler, - IRQF_ONESHOT, - irq_type->irq_name, - irq_data); + if (tps65219_is_regulator_name(pmic, irq_type->regulator_name)) + continue; + error = tps65219_register_irqs(pdev, tps, NULL, + irq_type, 1, + irq_type->regulator_name); if (error) - return dev_err_probe(tps->dev, error, - "Failed to request %s IRQ %d\n", - irq_type->irq_name, irq); + return error; } for (i = 0; i < pmic->dev_irq_size; ++i) { irq_type = &pmic->irq_types[i]; - irq = platform_get_irq_byname(pdev, irq_type->irq_name); - if (irq < 0) - return -EINVAL; - - irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - - irq_data->dev = tps->dev; - irq_data->type = irq_type; - error = devm_request_threaded_irq(tps->dev, irq, NULL, - tps65219_regulator_irq_handler, - IRQF_ONESHOT, - irq_type->irq_name, - irq_data); + if (tps65219_is_regulator_name(pmic, irq_type->regulator_name)) + continue; + error = tps65219_register_irqs(pdev, tps, NULL, + irq_type, 1, + irq_type->regulator_name); if (error) - return dev_err_probe(tps->dev, error, - "Failed to request %s IRQ %d\n", - irq_type->irq_name, irq); + return error; } return 0; diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 6d2f4c831df7..ff199bab5d1a 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -1252,6 +1252,9 @@ void isci_host_deinit(struct isci_host *ihost) wait_for_stop(ihost); + /* No further IRQ-driven scheduling can happen past wait_for_stop(). */ + tasklet_kill(&ihost->completion_tasklet); + /* phy stop is after controller stop to allow port and device to * go idle before shutting down the phys, but the expectation is * that i/o has been shut off well before we reach this diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 072d4c4add33..d9bae23cb929 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2398,8 +2398,7 @@ sd_spinup_disk(struct scsi_disk *sdkp) { static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; - int spintime, sense_valid = 0; - unsigned int the_result; + int the_result, spintime, sense_valid = 0; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 4d1dce4f4974..71a6e5c475b0 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -868,7 +868,7 @@ static int amd_spi_probe(struct platform_device *pdev) dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr); amd_spi->version = (uintptr_t)device_get_match_data(dev); - host->bus_num = 0; + host->bus_num = (amd_spi->version == AMD_HID2_SPI) ? 2 : 0; return amd_spi_probe_common(dev, host); } diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index b5bed02b7e50..31063f927092 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -271,7 +271,7 @@ static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed) msecs_to_jiffies(ms)); if (ms == 0) { - dev_err(&dws->host->cur_msg->spi->dev, + dev_err(&dws->host->dev, "DMA transaction timed out\n"); return -ETIMEDOUT; } diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index e1d097091925..52d2f8920cd7 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -582,12 +582,14 @@ static int ep93xx_spi_setup_dma(struct device *dev, struct ep93xx_spi *espi) espi->dma_rx = dma_request_chan(dev, "rx"); if (IS_ERR(espi->dma_rx)) { ret = dev_err_probe(dev, PTR_ERR(espi->dma_rx), "rx DMA setup failed"); + espi->dma_rx = NULL; goto fail_free_page; } espi->dma_tx = dma_request_chan(dev, "tx"); if (IS_ERR(espi->dma_tx)) { ret = dev_err_probe(dev, PTR_ERR(espi->dma_tx), "tx DMA setup failed"); + espi->dma_tx = NULL; goto fail_release_rx; } diff --git a/drivers/spi/spi-mtk-snfi.c b/drivers/spi/spi-mtk-snfi.c index a026b0e61994..9e6038a5a2ad 100644 --- a/drivers/spi/spi-mtk-snfi.c +++ b/drivers/spi/spi-mtk-snfi.c @@ -961,7 +961,7 @@ static int mtk_snand_read_page_cache(struct mtk_snand *snf, &snf->op_done, usecs_to_jiffies(SNFI_POLL_INTERVAL))) { dev_err(snf->dev, "DMA timed out for reading from cache.\n"); ret = -ETIMEDOUT; - goto cleanup; + goto cleanup2; } // Wait for BUS_SEC_CNTR returning expected value diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index a5c549479c7d..3134b9c1480a 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -996,8 +996,11 @@ static int spi_qup_init_dma(struct spi_controller *host, resource_size_t base) err: dma_release_channel(host->dma_tx); + host->dma_tx = NULL; err_tx: dma_release_channel(host->dma_rx); + host->dma_rx = NULL; + return ret; } diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c index 218c38841f05..827a307c7c82 100644 --- a/drivers/spi/spi-sprd.c +++ b/drivers/spi/spi-sprd.c @@ -992,7 +992,8 @@ err_rpm_put: disable_clk: clk_disable_unprepare(ss->clk); release_dma: - sprd_spi_dma_release(ss); + if (ss->dma.enable) + sprd_spi_dma_release(ss); free_controller: spi_controller_put(sctlr); diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index ba54d222d0e0..28a867c164c4 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -868,6 +868,7 @@ static int ti_qspi_probe(struct platform_device *pdev) dev_err(qspi->dev, "dma_alloc_coherent failed, using PIO mode\n"); dma_release_channel(qspi->rx_chan); + qspi->rx_chan = NULL; goto no_dma; } host->dma_rx = qspi->rx_chan; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 45b870b55feb..b6315cfe52bc 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -176,6 +176,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; + u64 pfn; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; @@ -215,10 +216,11 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (!req.certs_data) return -ENOMEM; + pfn = PHYS_PFN(virt_to_phys(req.certs_data)); ret = set_memory_decrypted((unsigned long)req.certs_data, npages); if (ret) { pr_err("failed to mark page shared, ret=%d\n", ret); - free_pages_exact(req.certs_data, npages << PAGE_SHIFT); + snp_leak_pages(pfn, npages); return -EFAULT; } @@ -272,10 +274,12 @@ e_free: kfree(report_resp); e_free_data: if (npages) { - if (set_memory_encrypted((unsigned long)req.certs_data, npages)) + if (set_memory_encrypted((unsigned long)req.certs_data, npages)) { WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); - else + snp_leak_pages(pfn, npages); + } else { free_pages_exact(req.certs_data, npages << PAGE_SHIFT); + } } return ret; } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 89d36e3e5c79..fa84610e0b0d 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -2207,7 +2207,14 @@ int afs_single_writepages(struct address_space *mapping, /* Need to lock to prevent the folio queue and folios from being thrown * away. */ - down_read(&dvnode->validate_lock); + if (!down_read_trylock(&dvnode->validate_lock)) { + if (wbc->sync_mode == WB_SYNC_NONE) { + /* The VFS will have undirtied the inode. */ + netfs_single_mark_inode_dirty(&dvnode->netfs.inode); + return 0; + } + down_read(&dvnode->validate_lock); + } if (is_dir ? test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) : @@ -2215,6 +2222,8 @@ int afs_single_writepages(struct address_space *mapping, iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size_read(&dvnode->netfs.inode)); ret = netfs_writeback_single(mapping, wbc, &iter); + if (ret == 1) + ret = 0; /* Skipped write due to lock conflict. */ } up_read(&dvnode->validate_lock); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2ab550a1e715..e050d0938dc4 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -666,10 +666,9 @@ static int resolve_indirect_ref(struct btrfs_backref_walk_ctx *ctx, ret = btrfs_search_old_slot(root, &search_key, path, ctx->time_seq); btrfs_debug(ctx->fs_info, - "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", - ref->root_id, level, ref->count, ret, - ref->key_for_search.objectid, ref->key_for_search.type, - ref->key_for_search.offset); +"search slot in root %llu (level %d, ref count %d) returned %d for key " BTRFS_KEY_FMT, + ref->root_id, level, ref->count, ret, + BTRFS_KEY_FMT_VALUE(&ref->key_for_search)); if (ret < 0) goto out; @@ -3323,9 +3322,9 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans, eb = path->nodes[level]; if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) { btrfs_err(fs_info, -"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", +"couldn't find block (%llu) (level %d) in tree (%llu) with key " BTRFS_KEY_FMT, cur->bytenr, level - 1, btrfs_root_id(root), - tree_key->objectid, tree_key->type, tree_key->offset); + BTRFS_KEY_FMT_VALUE(tree_key)); btrfs_put_root(root); ret = -ENOENT; goto out; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a277c8cc9166..1e57f7d04c47 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1065,7 +1065,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_block_group *block_group; struct btrfs_free_cluster *cluster; struct inode *inode; @@ -1305,7 +1305,6 @@ out: btrfs_put_block_group(block_group); if (remove_rsv) btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); - btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6e053caa6e10..27e2adc2ee71 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2599,12 +2599,11 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, if (unlikely(btrfs_comp_keys(&disk_key, new_key) >= 0)) { btrfs_print_leaf(eb); btrfs_crit(fs_info, - "slot %u key (%llu %u %llu) new key (%llu %u %llu)", + "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT, slot, btrfs_disk_key_objectid(&disk_key), btrfs_disk_key_type(&disk_key), btrfs_disk_key_offset(&disk_key), - new_key->objectid, new_key->type, - new_key->offset); + BTRFS_KEY_FMT_VALUE(new_key)); BUG(); } } @@ -2613,12 +2612,11 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, if (unlikely(btrfs_comp_keys(&disk_key, new_key) <= 0)) { btrfs_print_leaf(eb); btrfs_crit(fs_info, - "slot %u key (%llu %u %llu) new key (%llu %u %llu)", + "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT, slot, btrfs_disk_key_objectid(&disk_key), btrfs_disk_key_type(&disk_key), btrfs_disk_key_offset(&disk_key), - new_key->objectid, new_key->type, - new_key->offset); + BTRFS_KEY_FMT_VALUE(new_key)); BUG(); } } @@ -2677,10 +2675,9 @@ static bool check_sibling_keys(const struct extent_buffer *left, btrfs_crit(left->fs_info, "right extent buffer:"); btrfs_print_tree(right, false); btrfs_crit(left->fs_info, -"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", - left_last.objectid, left_last.type, - left_last.offset, right_first.objectid, - right_first.type, right_first.offset); +"bad key order, sibling blocks, left last " BTRFS_KEY_FMT " right first " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(&left_last), + BTRFS_KEY_FMT_VALUE(&right_first)); return true; } return false; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 77e1bcb2a74b..085a83ae9e62 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -112,7 +112,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, int ret = 0; int ret2 = 0; struct btrfs_root *root = dir->root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_dir_item *dir_item; struct extent_buffer *leaf; unsigned long name_ptr; @@ -164,7 +164,6 @@ second_insert: ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir, &disk_key, type, index); out_free: - btrfs_free_path(path); if (ret) return ret; if (ret2) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 863b45092a19..663526d909ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,8 +165,8 @@ search_again: if (unlikely(num_refs == 0)) { ret = -EUCLEAN; btrfs_err(fs_info, - "unexpected zero reference count for extent item (%llu %u %llu)", - key.objectid, key.type, key.offset); + "unexpected zero reference count for extent item " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(&key)); btrfs_abort_transaction(trans, ret); return ret; } @@ -597,8 +597,8 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, num_refs = btrfs_shared_data_ref_count(leaf, ref2); } else { btrfs_err(trans->fs_info, - "unrecognized backref key (%llu %u %llu)", - key.objectid, key.type, key.offset); + "unrecognized backref key " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(&key)); btrfs_abort_transaction(trans, -EUCLEAN); return -EUCLEAN; } @@ -3084,7 +3084,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_key key; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_root *extent_root; struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -3119,7 +3119,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, node->bytenr, refs_to_drop); ret = -EINVAL; btrfs_abort_transaction(trans, ret); - goto out; + return ret; } if (is_data) @@ -3164,15 +3164,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, abort_and_dump(trans, path, "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref", path->slots[0]); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } /* Must be SHARED_* item, remove the backref first */ ret = remove_extent_backref(trans, extent_root, path, NULL, refs_to_drop, is_data); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } btrfs_release_path(path); @@ -3221,7 +3220,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } if (unlikely(ret < 0)) { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } extent_slot = path->slots[0]; } @@ -3230,10 +3229,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d", bytenr, node->parent, node->ref_root, owner_objectid, owner_offset, path->slots[0]); - goto out; + return ret; } else { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } leaf = path->nodes[0]; @@ -3244,7 +3243,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, "unexpected extent item size, has %u expect >= %zu", item_size, sizeof(*ei)); btrfs_abort_transaction(trans, ret); - goto out; + return ret; } ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); @@ -3258,8 +3257,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, key.objectid, key.type, key.offset, path->slots[0], owner_objectid, item_size, sizeof(*ei) + sizeof(*bi)); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } bi = (struct btrfs_tree_block_info *)(ei + 1); WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); @@ -3270,8 +3268,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, abort_and_dump(trans, path, "trying to drop %d refs but we only have %llu for bytenr %llu slot %u", refs_to_drop, refs, bytenr, path->slots[0]); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } refs -= refs_to_drop; @@ -3287,8 +3284,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, abort_and_dump(trans, path, "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u", path->slots[0]); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } } else { btrfs_set_extent_refs(leaf, ei, refs); @@ -3298,7 +3294,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, iref, refs_to_drop, is_data); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } } } else { @@ -3318,17 +3314,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u", extent_data_ref_count(path, iref), refs_to_drop, path->slots[0]); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } if (iref) { if (unlikely(path->slots[0] != extent_slot)) { abort_and_dump(trans, path, -"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref", - key.objectid, key.type, - key.offset, path->slots[0]); - ret = -EUCLEAN; - goto out; +"invalid iref, extent item key " BTRFS_KEY_FMT " slot %u doesn't have wanted iref", + BTRFS_KEY_FMT_VALUE(&key), + path->slots[0]); + return -EUCLEAN; } } else { /* @@ -3341,8 +3335,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, abort_and_dump(trans, path, "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM", path->slots[0]); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } path->slots[0] = extent_slot; num_to_del = 2; @@ -3363,7 +3356,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, num_to_del); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } btrfs_release_path(path); @@ -3371,8 +3364,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); -out: - btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index d86541073d42..c3734892d654 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -841,7 +841,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, u64 start, u64 size) { struct btrfs_block_group *block_group; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); int ret; if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) @@ -851,7 +851,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, if (unlikely(!path)) { ret = -ENOMEM; btrfs_abort_transaction(trans, ret); - goto out; + return ret; } block_group = btrfs_lookup_block_group(trans->fs_info, start); @@ -859,7 +859,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, DEBUG_WARN("no block group found for start=%llu", start); ret = -ENOENT; btrfs_abort_transaction(trans, ret); - goto out; + return ret; } mutex_lock(&block_group->free_space_lock); @@ -869,8 +869,7 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, ret); btrfs_put_block_group(block_group); -out: - btrfs_free_path(path); + return ret; } @@ -1023,7 +1022,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, u64 start, u64 size) { struct btrfs_block_group *block_group; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); int ret; if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) @@ -1033,7 +1032,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, if (unlikely(!path)) { ret = -ENOMEM; btrfs_abort_transaction(trans, ret); - goto out; + return ret; } block_group = btrfs_lookup_block_group(trans->fs_info, start); @@ -1041,7 +1040,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, DEBUG_WARN("no block group found for start=%llu", start); ret = -ENOENT; btrfs_abort_transaction(trans, ret); - goto out; + return ret; } mutex_lock(&block_group->free_space_lock); @@ -1051,8 +1050,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, ret); btrfs_put_block_group(block_group); -out: - btrfs_free_path(path); + return ret; } @@ -1466,7 +1464,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group) { struct btrfs_root *root = btrfs_free_space_root(block_group); - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key, found_key; struct extent_buffer *leaf; u64 start, end; @@ -1485,7 +1483,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, if (unlikely(!path)) { ret = -ENOMEM; btrfs_abort_transaction(trans, ret); - goto out; + return ret; } start = block_group->start; @@ -1499,7 +1497,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } leaf = path->nodes[0]; @@ -1530,14 +1528,13 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, root, path, path->slots[0], nr); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); - goto out; + return ret; } btrfs_release_path(path); } ret = 0; -out: - btrfs_free_path(path); + return ret; } diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 37aa8d141a83..eccc61463947 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -73,6 +73,9 @@ struct btrfs_space_info; #define BTRFS_SUPER_INFO_SIZE 4096 static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); +#define BTRFS_KEY_FMT "(%llu %u %llu)" +#define BTRFS_KEY_FMT_VALUE(key) (key)->objectid, (key)->type, (key)->offset + /* * Number of metadata items necessary for an unlink operation: * @@ -133,6 +136,7 @@ enum { BTRFS_FS_LOG_RECOVERING, BTRFS_FS_OPEN, BTRFS_FS_QUOTA_ENABLED, + BTRFS_FS_SQUOTA_ENABLING, BTRFS_FS_UPDATE_UUID_TREE_GEN, BTRFS_FS_CREATING_FREE_SPACE_TREE, BTRFS_FS_BTREE_ERR, diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 1bd73b80f9fa..7e14e1bbcf38 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -444,7 +444,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_truncate_control *control) { struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; struct btrfs_key key; @@ -730,6 +730,5 @@ out: if (!ret && control->last_size > new_size) control->last_size = new_size; - btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2c361e0691fc..a4f1810db079 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4448,7 +4448,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, { struct btrfs_root *root = dir->root; struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key key; @@ -4541,7 +4541,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, if (ret) btrfs_abort_transaction(trans, ret); out: - btrfs_free_path(path); fscrypt_free_filename(&fname); return ret; } @@ -5668,9 +5667,9 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry, location->type != BTRFS_ROOT_ITEM_KEY)) { ret = -EUCLEAN; btrfs_warn(root->fs_info, -"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", +"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location " BTRFS_KEY_FMT ")", __func__, fname.disk_name.name, btrfs_ino(dir), - location->objectid, location->type, location->offset); + BTRFS_KEY_FMT_VALUE(location)); } if (!ret) *type = btrfs_dir_ftype(path->nodes[0], di); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c0691e93e0a5..2f1c5f5e2e72 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1613,7 +1613,7 @@ static noinline int search_ioctl(struct btrfs_root *root, { struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); int ret; int num_found = 0; unsigned long sk_offset = 0; @@ -1633,10 +1633,8 @@ static noinline int search_ioctl(struct btrfs_root *root, } else { /* Look up the root from the arguments. */ root = btrfs_get_fs_root(info, sk->tree_id, true); - if (IS_ERR(root)) { - btrfs_free_path(path); + if (IS_ERR(root)) return PTR_ERR(root); - } } key.objectid = sk->min_objectid; @@ -1670,7 +1668,6 @@ static noinline int search_ioctl(struct btrfs_root *root, sk->nr_items = num_found; btrfs_put_root(root); - btrfs_free_path(path); return ret; } @@ -1753,7 +1750,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, int total_len = 0; struct btrfs_inode_ref *iref; struct extent_buffer *l; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); if (dirid == BTRFS_FIRST_FREE_OBJECTID) { name[0]='\0'; @@ -1814,7 +1811,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, ret = 0; out: btrfs_put_root(root); - btrfs_free_path(path); return ret; } @@ -1831,8 +1827,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, struct btrfs_inode_ref *iref; struct btrfs_root_ref *rref; struct btrfs_root *root = NULL; - struct btrfs_path *path; - struct btrfs_key key, key2; + BTRFS_PATH_AUTO_FREE(path); + struct btrfs_key key; struct extent_buffer *leaf; char *ptr; int slot; @@ -1852,10 +1848,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; root = btrfs_get_fs_root(fs_info, treeid, true); - if (IS_ERR(root)) { - ret = PTR_ERR(root); - goto out; - } + if (IS_ERR(root)) + return PTR_ERR(root); key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; @@ -1887,24 +1881,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, read_extent_buffer(leaf, ptr, (unsigned long)(iref + 1), len); - /* Check the read+exec permission of this directory */ - ret = btrfs_previous_item(root, path, dirid, - BTRFS_INODE_ITEM_KEY); - if (ret < 0) { - goto out_put; - } else if (ret > 0) { - ret = -ENOENT; - goto out_put; - } - - leaf = path->nodes[0]; - slot = path->slots[0]; - btrfs_item_key_to_cpu(leaf, &key2, slot); - if (key2.objectid != dirid) { - ret = -ENOENT; - goto out_put; - } - /* * We don't need the path anymore, so release it and * avoid deadlocks and lockdep warnings in case @@ -1912,11 +1888,12 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, * btree and lock the same leaf. */ btrfs_release_path(path); - temp_inode = btrfs_iget(key2.objectid, root); + temp_inode = btrfs_iget(key.offset, root); if (IS_ERR(temp_inode)) { ret = PTR_ERR(temp_inode); goto out_put; } + /* Check the read+exec permission of this directory. */ ret = inode_permission(idmap, &temp_inode->vfs_inode, MAY_READ | MAY_EXEC); iput(&temp_inode->vfs_inode); @@ -1947,12 +1924,10 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, key.type = BTRFS_ROOT_REF_KEY; key.offset = args->treeid; ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); - if (ret < 0) { - goto out; - } else if (ret > 0) { - ret = -ENOENT; - goto out; - } + if (ret < 0) + return ret; + else if (ret > 0) + return -ENOENT; leaf = path->nodes[0]; slot = path->slots[0]; @@ -1962,10 +1937,8 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, item_len = btrfs_item_size(leaf, slot); /* Check if dirid in ROOT_REF corresponds to passed dirid */ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); - if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) { - ret = -EINVAL; - goto out; - } + if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) + return -EINVAL; /* Copy subvolume's name */ item_off += sizeof(struct btrfs_root_ref); @@ -1975,8 +1948,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, out_put: btrfs_put_root(root); -out: - btrfs_free_path(path); + return ret; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 62b993fae54f..06edc5cdb00d 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -131,7 +131,7 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); - pr_info("\t\ttree block key (%llu %u %llu) level %d\n", + pr_info("\t\ttree block key " BTRFS_KEY_FMT " level %d\n", btrfs_disk_key_objectid(&key), key.type, btrfs_disk_key_offset(&key), btrfs_tree_block_level(eb, info)); @@ -277,9 +277,8 @@ static void print_dir_item(const struct extent_buffer *eb, int i) struct btrfs_key location; btrfs_dir_item_key_to_cpu(eb, di, &location); - pr_info("\t\tlocation key (%llu %u %llu) type %d\n", - location.objectid, location.type, location.offset, - btrfs_dir_ftype(eb, di)); + pr_info("\t\tlocation key " BTRFS_KEY_FMT " type %d\n", + BTRFS_KEY_FMT_VALUE(&location), btrfs_dir_ftype(eb, di)); pr_info("\t\ttransid %llu data_len %u name_len %u\n", btrfs_dir_transid(eb, di), data_len, name_len); di = (struct btrfs_dir_item *)((char *)di + len); @@ -598,10 +597,9 @@ void btrfs_print_tree(const struct extent_buffer *c, bool follow) print_eb_refs_lock(c); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", - i, key.objectid, key.type, key.offset, - btrfs_node_blockptr(c, i), - btrfs_node_ptr_generation(c, i)); + pr_info("\tkey %d " BTRFS_KEY_FMT " block %llu gen %llu\n", + i, BTRFS_KEY_FMT_VALUE(&key), btrfs_node_blockptr(c, i), + btrfs_node_ptr_generation(c, i)); } if (!follow) return; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 302bb3ecf39a..261aa6501920 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -346,6 +346,42 @@ int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid } #endif +static bool squota_check_parent_usage(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *parent) +{ + u64 excl_sum = 0; + u64 rfer_sum = 0; + u64 excl_cmpr_sum = 0; + u64 rfer_cmpr_sum = 0; + struct btrfs_qgroup_list *glist; + int nr_members = 0; + bool mismatch; + + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) + return false; + if (btrfs_qgroup_level(parent->qgroupid) == 0) + return false; + + /* Eligible parent qgroup. Squota; level > 0; empty members list. */ + list_for_each_entry(glist, &parent->members, next_member) { + excl_sum += glist->member->excl; + rfer_sum += glist->member->rfer; + excl_cmpr_sum += glist->member->excl_cmpr; + rfer_cmpr_sum += glist->member->rfer_cmpr; + nr_members++; + } + mismatch = (parent->excl != excl_sum || parent->rfer != rfer_sum || + parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != excl_cmpr_sum); + + WARN(mismatch, + "parent squota qgroup %hu/%llu has mismatched usage from its %d members. " + "%llu %llu %llu %llu vs %llu %llu %llu %llu\n", + btrfs_qgroup_level(parent->qgroupid), + btrfs_qgroup_subvolid(parent->qgroupid), nr_members, parent->excl, + parent->rfer, parent->excl_cmpr, parent->rfer_cmpr, excl_sum, + rfer_sum, excl_cmpr_sum, rfer_cmpr_sum); + return mismatch; +} + __printf(2, 3) static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info, const char *fmt, ...) { @@ -660,7 +696,7 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, { int ret; struct btrfs_root *quota_root = trans->fs_info->quota_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; path = btrfs_alloc_path(); @@ -672,7 +708,6 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, key.offset = dst; ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); - btrfs_free_path(path); return ret; } @@ -681,7 +716,7 @@ static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, { int ret; struct btrfs_root *quota_root = trans->fs_info->quota_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; path = btrfs_alloc_path(); @@ -694,24 +729,19 @@ static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); if (ret < 0) - goto out; + return ret; - if (ret > 0) { - ret = -ENOENT; - goto out; - } + if (ret > 0) + return -ENOENT; - ret = btrfs_del_item(trans, quota_root, path); -out: - btrfs_free_path(path); - return ret; + return btrfs_del_item(trans, quota_root, path); } static int add_qgroup_item(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root, u64 qgroupid) { int ret; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_qgroup_info_item *qgroup_info; struct btrfs_qgroup_limit_item *qgroup_limit; struct extent_buffer *leaf; @@ -737,7 +767,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*qgroup_info)); if (ret && ret != -EEXIST) - goto out; + return ret; leaf = path->nodes[0]; qgroup_info = btrfs_item_ptr(leaf, path->slots[0], @@ -754,7 +784,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*qgroup_limit)); if (ret && ret != -EEXIST) - goto out; + return ret; leaf = path->nodes[0]; qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], @@ -765,17 +795,14 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); - ret = 0; -out: - btrfs_free_path(path); - return ret; + return 0; } static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) { int ret; struct btrfs_root *quota_root = trans->fs_info->quota_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; path = btrfs_alloc_path(); @@ -787,33 +814,27 @@ static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) key.offset = qgroupid; ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); if (ret < 0) - goto out; + return ret; - if (ret > 0) { - ret = -ENOENT; - goto out; - } + if (ret > 0) + return -ENOENT; ret = btrfs_del_item(trans, quota_root, path); if (ret) - goto out; + return ret; btrfs_release_path(path); key.type = BTRFS_QGROUP_LIMIT_KEY; ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); if (ret < 0) - goto out; + return ret; - if (ret > 0) { - ret = -ENOENT; - goto out; - } + if (ret > 0) + return -ENOENT; ret = btrfs_del_item(trans, quota_root, path); -out: - btrfs_free_path(path); return ret; } @@ -821,7 +842,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, struct btrfs_qgroup *qgroup) { struct btrfs_root *quota_root = trans->fs_info->quota_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct extent_buffer *l; struct btrfs_qgroup_limit_item *qgroup_limit; @@ -841,7 +862,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, ret = -ENOENT; if (ret) - goto out; + return ret; l = path->nodes[0]; slot = path->slots[0]; @@ -851,8 +872,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); -out: - btrfs_free_path(path); + return ret; } @@ -861,7 +881,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root = fs_info->quota_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct extent_buffer *l; struct btrfs_qgroup_info_item *qgroup_info; @@ -884,7 +904,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, ret = -ENOENT; if (ret) - goto out; + return ret; l = path->nodes[0]; slot = path->slots[0]; @@ -894,8 +914,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); -out: - btrfs_free_path(path); + return ret; } @@ -903,7 +922,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root = fs_info->quota_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct extent_buffer *l; struct btrfs_qgroup_status_item *ptr; @@ -923,7 +942,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans) ret = -ENOENT; if (ret) - goto out; + return ret; l = path->nodes[0]; slot = path->slots[0]; @@ -933,8 +952,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans) btrfs_set_qgroup_status_generation(l, ptr, trans->transid); btrfs_set_qgroup_status_rescan(l, ptr, fs_info->qgroup_rescan_progress.objectid); -out: - btrfs_free_path(path); + return ret; } @@ -944,7 +962,7 @@ out: static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct extent_buffer *leaf = NULL; int ret; @@ -961,7 +979,7 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, while (1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) - goto out; + return ret; leaf = path->nodes[0]; nr = btrfs_header_nritems(leaf); if (!nr) @@ -974,14 +992,12 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, path->slots[0] = 0; ret = btrfs_del_items(trans, root, path, 0, nr); if (ret) - goto out; + return ret; btrfs_release_path(path); } - ret = 0; -out: - btrfs_free_path(path); - return ret; + + return 0; } int btrfs_quota_enable(struct btrfs_fs_info *fs_info, @@ -1095,7 +1111,13 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, if (simple) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE; btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA); - btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid); + /* + * Set the enable generation to the next transaction, as we cannot + * ensure that extents written during this transaction will see any + * state we have set here. So we should treat all extents of the + * transaction as coming in before squotas was enabled. + */ + btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid + 1); } else { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; } @@ -1198,7 +1220,15 @@ out_add_root: goto out_free_path; } - fs_info->qgroup_enable_gen = trans->transid; + /* + * Set fs_info->qgroup_enable_gen and BTRFS_FS_SQUOTA_ENABLING + * under the transaction handle. We want to ensure that all extents in + * the next transaction definitely see them. + */ + if (simple) { + fs_info->qgroup_enable_gen = trans->transid + 1; + set_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); + } mutex_unlock(&fs_info->qgroup_ioctl_lock); /* @@ -1212,9 +1242,15 @@ out_add_root: */ ret = btrfs_commit_transaction(trans); trans = NULL; + mutex_lock(&fs_info->qgroup_ioctl_lock); - if (ret) + if (ret) { + if (simple) { + clear_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); + fs_info->qgroup_enable_gen = 0; + } goto out_free_path; + } /* * Set quota enabled flag after committing the transaction, to avoid @@ -1224,6 +1260,8 @@ out_add_root: spin_lock(&fs_info->qgroup_lock); fs_info->quota_root = quota_root; set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + if (simple) + clear_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); spin_unlock(&fs_info->qgroup_lock); /* Skip rescan for simple qgroups. */ @@ -1585,6 +1623,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst goto out; } ret = quick_update_accounting(fs_info, src, dst, 1); + squota_check_parent_usage(fs_info, parent); spin_unlock(&fs_info->qgroup_lock); out: kfree(prealloc); @@ -1643,6 +1682,8 @@ delete_item: spin_lock(&fs_info->qgroup_lock); del_relation_rb(fs_info, src, dst); ret = quick_update_accounting(fs_info, src, dst, -1); + ASSERT(parent); + squota_check_parent_usage(fs_info, parent); spin_unlock(&fs_info->qgroup_lock); } out: @@ -1704,6 +1745,28 @@ out: return ret; } +static bool can_delete_parent_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) +{ + ASSERT(btrfs_qgroup_level(qgroup->qgroupid)); + if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) + squota_check_parent_usage(fs_info, qgroup); + return list_empty(&qgroup->members); +} + +/* + * Because a shared extent can outlive its owning subvolume, we cannot delete a + * subvol squota qgroup until all of the extents it owns are gone, even if the + * subvolume itself has been deleted. + */ +static bool can_delete_squota_subvol_qgroup(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup *qgroup) +{ + ASSERT(btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE); + ASSERT(btrfs_qgroup_level(qgroup->qgroupid) == 0); + + return !(qgroup->rfer || qgroup->excl || qgroup->rfer_cmpr || qgroup->excl_cmpr); +} + /* * Return 0 if we can not delete the qgroup (not empty or has children etc). * Return >0 if we can delete the qgroup. @@ -1712,26 +1775,12 @@ out: static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) { struct btrfs_key key; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); int ret; - /* - * Squota would never be inconsistent, but there can still be case - * where a dropped subvolume still has qgroup numbers, and squota - * relies on such qgroup for future accounting. - * - * So for squota, do not allow dropping any non-zero qgroup. - */ - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && - (qgroup->rfer || qgroup->excl || qgroup->excl_cmpr || qgroup->rfer_cmpr)) - return 0; - /* For higher level qgroup, we can only delete it if it has no child. */ - if (btrfs_qgroup_level(qgroup->qgroupid)) { - if (!list_empty(&qgroup->members)) - return 0; - return 1; - } + if (btrfs_qgroup_level(qgroup->qgroupid)) + return can_delete_parent_qgroup(fs_info, qgroup); /* * For level-0 qgroups, we can only delete it if it has no subvolume @@ -1746,13 +1795,22 @@ static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup if (!path) return -ENOMEM; + /* + * Any subvol qgroup, regardless of mode, cannot be deleted if the + * subvol still exists. + */ ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL); - btrfs_free_path(path); /* - * The @ret from btrfs_find_root() exactly matches our definition for - * the return value, thus can be returned directly. + * btrfs_find_root returns <0 on error, 0 if found, and >0 if not, + * so the "found" and "error" cases match our desired return values. */ - return ret; + if (ret <= 0) + return ret; + + /* Squotas require additional checks, even if the subvol is deleted. */ + if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) + return can_delete_squota_subvol_qgroup(fs_info, qgroup); + return 1; } int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) @@ -2301,7 +2359,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, bool trace_leaf) { struct btrfs_key key; - struct btrfs_path *src_path; + BTRFS_PATH_AUTO_FREE(src_path); struct btrfs_fs_info *fs_info = trans->fs_info; u32 nodesize = fs_info->nodesize; int cur_level = root_level; @@ -2313,10 +2371,8 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, return -EINVAL; src_path = btrfs_alloc_path(); - if (!src_path) { - ret = -ENOMEM; - goto out; - } + if (!src_path) + return -ENOMEM; if (dst_level) btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); @@ -2342,10 +2398,8 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, parent_slot = src_path->slots[cur_level + 1]; eb = btrfs_read_node_slot(eb, parent_slot); - if (IS_ERR(eb)) { - ret = PTR_ERR(eb); - goto out; - } + if (IS_ERR(eb)) + return PTR_ERR(eb); src_path->nodes[cur_level] = eb; @@ -2366,10 +2420,8 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, &src_key, src_path->slots[cur_level]); } /* Content mismatch, something went wrong */ - if (btrfs_comp_cpu_keys(&dst_key, &src_key)) { - ret = -ENOENT; - goto out; - } + if (btrfs_comp_cpu_keys(&dst_key, &src_key)) + return -ENOENT; cur_level--; } @@ -2380,21 +2432,20 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, nodesize); if (ret < 0) - goto out; + return ret; ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start, nodesize); if (ret < 0) - goto out; + return ret; /* Record leaf file extents */ if (dst_level == 0 && trace_leaf) { ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); if (ret < 0) - goto out; + return ret; ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); } -out: - btrfs_free_path(src_path); + return ret; } @@ -2595,7 +2646,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, int level; u8 drop_subptree_thres; struct extent_buffer *eb = root_eb; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); ASSERT(0 <= root_level && root_level < BTRFS_MAX_LEVEL); ASSERT(root_eb != NULL); @@ -2628,12 +2679,12 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, ret = btrfs_read_extent_buffer(root_eb, &check); if (ret) - goto out; + return ret; } if (root_level == 0) { ret = btrfs_qgroup_trace_leaf_items(trans, root_eb); - goto out; + return ret; } path = btrfs_alloc_path(); @@ -2669,10 +2720,8 @@ walk_down: child_bytenr = btrfs_node_blockptr(eb, parent_slot); eb = btrfs_read_node_slot(eb, parent_slot); - if (IS_ERR(eb)) { - ret = PTR_ERR(eb); - goto out; - } + if (IS_ERR(eb)) + return PTR_ERR(eb); path->nodes[level] = eb; path->slots[level] = 0; @@ -2683,14 +2732,14 @@ walk_down: ret = btrfs_qgroup_trace_extent(trans, child_bytenr, fs_info->nodesize); if (ret) - goto out; + return ret; } if (level == 0) { ret = btrfs_qgroup_trace_leaf_items(trans, path->nodes[level]); if (ret) - goto out; + return ret; /* Nonzero return here means we completed our search */ ret = adjust_slots_upwards(path, root_level); @@ -2704,11 +2753,7 @@ walk_down: level--; } - ret = 0; -out: - btrfs_free_path(path); - - return ret; + return 0; } static void qgroup_iterator_nested_add(struct list_head *head, struct btrfs_qgroup *qgroup) @@ -3734,10 +3779,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, path, 1, 0); btrfs_debug(fs_info, - "current progress key (%llu %u %llu), search_slot ret %d", - fs_info->qgroup_rescan_progress.objectid, - fs_info->qgroup_rescan_progress.type, - fs_info->qgroup_rescan_progress.offset, ret); + "current progress key " BTRFS_KEY_FMT ", search_slot ret %d", + BTRFS_KEY_FMT_VALUE(&fs_info->qgroup_rescan_progress), ret); if (ret) { /* @@ -4909,7 +4952,8 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, u64 num_bytes = delta->num_bytes; const int sign = (delta->is_inc ? 1 : -1); - if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE && + !test_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags)) return 0; if (!btrfs_is_fstree(root)) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0765e06d00b8..fc76013b1a3e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -615,8 +615,8 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); btrfs_err(fs_info, - "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", - objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); + "cannot relocate partially dropped subvolume %llu, drop progress key " BTRFS_KEY_FMT, + objectid, BTRFS_KEY_FMT_VALUE(&cpu_key)); ret = -EUCLEAN; goto fail; } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index d07eab70f759..6a7e297ab0a7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -147,8 +147,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (unlikely(ret > 0)) { btrfs_crit(fs_info, - "unable to find root key (%llu %u %llu) in tree %llu", - key->objectid, key->type, key->offset, btrfs_root_id(root)); + "unable to find root key " BTRFS_KEY_FMT " in tree %llu", + BTRFS_KEY_FMT_VALUE(key), btrfs_root_id(root)); ret = -EUCLEAN; btrfs_abort_transaction(trans, ret); return ret; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9012ce7a742f..04473387ee8b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1053,10 +1053,8 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, } if (unlikely(start < p->buf)) { btrfs_err(root->fs_info, - "send: path ref buffer underflow for key (%llu %u %llu)", - found_key->objectid, - found_key->type, - found_key->offset); + "send: path ref buffer underflow for key " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(found_key)); ret = -EINVAL; goto out; } @@ -7276,8 +7274,8 @@ static int search_key_again(const struct send_ctx *sctx, if (unlikely(ret > 0)) { btrfs_print_tree(path->nodes[path->lowest_level], false); btrfs_err(root->fs_info, -"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d", - key->objectid, key->type, key->offset, +"send: key " BTRFS_KEY_FMT" not found in %s root %llu, lowest_level %d, slot %d", + BTRFS_KEY_FMT_VALUE(key), (root == sctx->parent_root ? "parent" : "send"), btrfs_root_id(root), path->lowest_level, path->slots[path->lowest_level]); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c40944ca7b94..f1bfe97beacf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -805,17 +805,15 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, struct btrfs_root_ref *root_ref; struct btrfs_inode_ref *inode_ref; struct btrfs_key key; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); char *name = NULL, *ptr; u64 dirid; int len; int ret; path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto err; - } + if (!path) + return ERR_PTR(-ENOMEM); name = kmalloc(PATH_MAX, GFP_KERNEL); if (!name) { @@ -903,7 +901,6 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, fs_root = NULL; } - btrfs_free_path(path); if (ptr == name + PATH_MAX - 1) { name[0] = '/'; name[1] = '\0'; @@ -914,7 +911,6 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, err: btrfs_put_root(fs_root); - btrfs_free_path(path); kfree(name); return ERR_PTR(ret); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 33a45737c4cf..db7402836340 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1635,10 +1635,9 @@ static int check_extent_item(struct extent_buffer *leaf, if (unlikely(prev_end > key->objectid)) { extent_err(leaf, slot, - "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", - prev_key->objectid, prev_key->type, - prev_key->offset, key->objectid, key->type, - key->offset); + "previous extent " BTRFS_KEY_FMT " overlaps current extent " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(prev_key), + BTRFS_KEY_FMT_VALUE(key)); return -EUCLEAN; } } @@ -2077,10 +2076,9 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) /* Make sure the keys are in the right order */ if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) { generic_err(leaf, slot, - "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", - prev_key.objectid, prev_key.type, - prev_key.offset, key.objectid, key.type, - key.offset); + "bad key order, prev " BTRFS_KEY_FMT " current " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(&prev_key), + BTRFS_KEY_FMT_VALUE(&key)); return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; } @@ -2198,10 +2196,9 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node) if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) { generic_err(node, slot, - "bad key order, current (%llu %u %llu) next (%llu %u %llu)", - key.objectid, key.type, key.offset, - next_key.objectid, next_key.type, - next_key.offset); + "bad key order, current " BTRFS_KEY_FMT " next " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(&key), + BTRFS_KEY_FMT_VALUE(&next_key)); return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; } } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c45c5112c035..a0e12b4fb956 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -199,9 +199,9 @@ static void do_abort_log_replay(struct walk_control *wc, const char *function, if (wc->log_leaf) { btrfs_crit(fs_info, - "log tree (for root %llu) leaf currently being processed (slot %d key %llu %u %llu):", +"log tree (for root %llu) leaf currently being processed (slot %d key " BTRFS_KEY_FMT "):", btrfs_root_id(wc->root), wc->log_slot, - wc->log_key.objectid, wc->log_key.type, wc->log_key.offset); + BTRFS_KEY_FMT_VALUE(&wc->log_key)); btrfs_print_leaf(wc->log_leaf); } @@ -511,9 +511,9 @@ static int overwrite_item(struct walk_control *wc) ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0); if (ret < 0) { btrfs_abort_log_replay(wc, ret, - "failed to search subvolume tree for key (%llu %u %llu) root %llu", - wc->log_key.objectid, wc->log_key.type, - wc->log_key.offset, btrfs_root_id(root)); + "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu", + BTRFS_KEY_FMT_VALUE(&wc->log_key), + btrfs_root_id(root)); return ret; } @@ -619,9 +619,8 @@ insert: btrfs_extend_item(trans, wc->subvol_path, item_size - found_size); } else if (ret) { btrfs_abort_log_replay(wc, ret, - "failed to insert item for key (%llu %u %llu)", - wc->log_key.objectid, wc->log_key.type, - wc->log_key.offset); + "failed to insert item for key " BTRFS_KEY_FMT, + BTRFS_KEY_FMT_VALUE(&wc->log_key)); return ret; } dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); @@ -830,9 +829,9 @@ static noinline int replay_one_extent(struct walk_control *wc) &wc->log_key, sizeof(*item)); if (ret) { btrfs_abort_log_replay(wc, ret, - "failed to insert item with key (%llu %u %llu) root %llu", - wc->log_key.objectid, wc->log_key.type, - wc->log_key.offset, btrfs_root_id(root)); + "failed to insert item with key " BTRFS_KEY_FMT " root %llu", + BTRFS_KEY_FMT_VALUE(&wc->log_key), + btrfs_root_id(root)); goto out; } dest_offset = btrfs_item_ptr_offset(wc->subvol_path->nodes[0], @@ -1349,9 +1348,9 @@ again: ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0); if (ret < 0) { btrfs_abort_log_replay(wc, ret, - "failed to search subvolume tree for key (%llu %u %llu) root %llu", - search_key.objectid, search_key.type, - search_key.offset, btrfs_root_id(root)); + "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu", + BTRFS_KEY_FMT_VALUE(&search_key), + btrfs_root_id(root)); return ret; } else if (ret == 0) { /* @@ -1484,9 +1483,9 @@ again: } if (ret < 0) { btrfs_abort_log_replay(wc, ret, - "failed to search subvolume tree for key (%llu %u %llu) root %llu", - wc->log_key.objectid, wc->log_key.type, - wc->log_key.offset, btrfs_root_id(root)); + "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu", + BTRFS_KEY_FMT_VALUE(&wc->log_key), + btrfs_root_id(root)); goto out; } @@ -2648,7 +2647,7 @@ static noinline int replay_dir_deletes(struct walk_control *wc, int ret = 0; struct btrfs_key dir_key; struct btrfs_key found_key; - struct btrfs_path *log_path; + BTRFS_PATH_AUTO_FREE(log_path); struct btrfs_inode *dir; dir_key.objectid = dirid; @@ -2665,7 +2664,6 @@ static noinline int replay_dir_deletes(struct walk_control *wc, * we replay the deletes before we copy in the inode item from the log. */ if (IS_ERR(dir)) { - btrfs_free_path(log_path); ret = PTR_ERR(dir); if (ret == -ENOENT) ret = 0; @@ -2701,10 +2699,9 @@ static noinline int replay_dir_deletes(struct walk_control *wc, wc->subvol_path, 0, 0); if (ret < 0) { btrfs_abort_log_replay(wc, ret, - "failed to search root %llu for key (%llu %u %llu)", + "failed to search root %llu for key " BTRFS_KEY_FMT, btrfs_root_id(root), - dir_key.objectid, dir_key.type, - dir_key.offset); + BTRFS_KEY_FMT_VALUE(&dir_key)); goto out; } @@ -2746,7 +2743,6 @@ static noinline int replay_dir_deletes(struct walk_control *wc, ret = 0; out: btrfs_release_path(wc->subvol_path); - btrfs_free_path(log_path); iput(&dir->vfs_inode); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ef9f24076cca..630fb5885692 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4205,7 +4205,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) struct btrfs_root *chunk_root = fs_info->chunk_root; u64 chunk_type; struct btrfs_chunk *chunk; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -4382,7 +4382,6 @@ loop: goto again; } error: - btrfs_free_path(path); if (enospc_errors) { btrfs_info(fs_info, "%d enospc errors during balance", enospc_errors); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 79fb1614bd0c..b6f01d6c79e7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -85,7 +85,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, { struct btrfs_dir_item *di = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); size_t name_len = strlen(name); int ret = 0; @@ -212,7 +212,6 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, */ } out: - btrfs_free_path(path); if (!ret) { set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 71f01f0a0743..33932d56d3a4 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1511,8 +1511,15 @@ repeat: DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); folio = page_folio(zbv.page); - /* For preallocated managed folios, add them to page cache here */ + /* + * Preallocated folios are added to the managed cache here rather than + * in z_erofs_bind_cache() in order to keep these folios locked in + * increasing (physical) address order. + * Clear folio->private before these folios become visible to others in + * the managed cache to avoid duplicate additions for unaligned extents. + */ if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) { + folio->private = NULL; tocache = true; goto out_tocache; } @@ -1548,14 +1555,8 @@ repeat: } return; } - /* - * Already linked with another pcluster, which only appears in - * crafted images by fuzzers for now. But handle this anyway. - */ - tocache = false; /* use temporary short-lived pages */ } else { DBG_BUGON(1); /* referenced managed folios can't be truncated */ - tocache = true; } folio_unlock(folio); folio_put(folio); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ecaec0fea3a1..1bc6982b5d6a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -283,21 +283,33 @@ invalid: goto out; } -#if BITS_PER_LONG < 64 static int fuse_dentry_init(struct dentry *dentry) { + int ret = 0; + + /* + * Initialising d_time (epoch) to '0' ensures the dentry is invalid + * if compared to fc->epoch, which is initialized to '1'. + */ + dentry->d_time = 0; + +#if BITS_PER_LONG < 64 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); - return dentry->d_fsdata ? 0 : -ENOMEM; + ret = dentry->d_fsdata ? 0 : -ENOMEM; +#endif + + return ret; } static void fuse_dentry_release(struct dentry *dentry) { +#if BITS_PER_LONG < 64 union fuse_dentry *fd = dentry->d_fsdata; kfree_rcu(fd, rcu); -} #endif +} static int fuse_dentry_delete(const struct dentry *dentry) { @@ -331,10 +343,8 @@ static struct vfsmount *fuse_dentry_automount(struct path *path) const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_delete = fuse_dentry_delete, -#if BITS_PER_LONG < 64 .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, -#endif .d_automount = fuse_dentry_automount, }; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 7879c049632b..553fca69cf42 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -308,7 +308,7 @@ static struct dentry *jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip, out1: jfs_info("jfs_mkdir: rc:%d", rc); - return ERR_PTR(rc); + return rc ? ERR_PTR(rc) : NULL; } /* diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index a37991fdb194..3640230a4d43 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -375,6 +375,8 @@ int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_ continue; seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); + if (seq_has_overflowed(seq)) + return -EAGAIN; seq->count++; /* mappings are separated by \0 */ if (seq_has_overflowed(seq)) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 88361e8c7096..fab3181c7f86 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -156,9 +156,8 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, netfs_cache_read_terminated, subreq); } -static void netfs_queue_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq, - bool last_subreq) +void netfs_queue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -178,11 +177,6 @@ static void netfs_queue_read(struct netfs_io_request *rreq, } } - if (last_subreq) { - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - } - spin_unlock(&rreq->lock); } @@ -233,6 +227,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, subreq->start = start; subreq->len = size; + netfs_queue_read(rreq, subreq); + source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); subreq->source = source; if (source == NETFS_DOWNLOAD_FROM_SERVER) { @@ -253,6 +249,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, rreq->debug_id, subreq->debug_index, subreq->len, size, subreq->start, ictx->zero_point, rreq->i_size); + netfs_cancel_read(subreq, ret); break; } subreq->len = len; @@ -261,12 +258,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - subreq->error = ret; - /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, - netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, - netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); break; } trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); @@ -289,24 +281,29 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, pr_err("Unexpected read source %u\n", source); WARN_ON_ONCE(1); + netfs_cancel_read(subreq, ret); break; issue: slice = netfs_prepare_read_iterator(subreq, ractl); if (slice < 0) { ret = slice; - subreq->error = ret; - trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); - /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); break; } - size -= slice; start += slice; + size -= slice; + if (size <= 0) { + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); + } - netfs_queue_read(rreq, subreq, size <= 0); netfs_issue_read(rreq, subreq); + + if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + netfs_wait_for_paused_read(rreq); + if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) + break; cond_resched(); } while (size > 0); @@ -397,6 +394,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) { struct netfs_io_request *rreq; struct address_space *mapping = folio->mapping; + struct netfs_group *group = netfs_folio_group(folio); struct netfs_folio *finfo = netfs_folio_info(folio); struct netfs_inode *ctx = netfs_inode(mapping->host); struct folio *sink = NULL; @@ -458,14 +456,20 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) netfs_read_to_pagecache(rreq, NULL); - if (sink) - folio_put(sink); - ret = netfs_wait_for_read(rreq); if (ret >= 0) { + if (group) + folio_change_private(folio, group); + else + folio_detach_private(folio); + kfree(finfo); + trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); flush_dcache_folio(folio); folio_mark_uptodate(folio); } + + if (sink) + folio_put(sink); folio_unlock(folio); netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; @@ -498,10 +502,10 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct netfs_inode *ctx = netfs_inode(mapping->host); int ret; - if (folio_test_dirty(folio)) { - trace_netfs_folio(folio, netfs_folio_trace_read_gaps); + folio_wait_writeback(folio); + + if (folio_test_dirty(folio)) return netfs_read_gaps(file, folio); - } _enter("%lx", folio->index); @@ -667,7 +671,7 @@ retry: ret = PTR_ERR(rreq); goto error; } - rreq->no_unlock_folio = folio->index; + rreq->no_unlock_folio = folio; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); @@ -684,9 +688,9 @@ retry: netfs_read_to_pagecache(rreq, NULL); ret = netfs_wait_for_read(rreq); + netfs_put_request(rreq, netfs_rreq_trace_put_return); if (ret < 0) goto error; - netfs_put_request(rreq, netfs_rreq_trace_put_return); have_folio: ret = folio_wait_private_2_killable(folio); @@ -733,7 +737,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, goto error; } - rreq->no_unlock_folio = folio->index; + rreq->no_unlock_folio = folio; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 09394ac2c180..dd0ce7b769ce 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -13,24 +13,6 @@ #include <linux/pagevec.h> #include "internal.h" -static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) -{ - if (netfs_group) - folio_attach_private(folio, netfs_get_group(netfs_group)); -} - -static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) -{ - void *priv = folio_get_private(folio); - - if (unlikely(priv != netfs_group)) { - if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) - folio_attach_private(folio, netfs_get_group(netfs_group)); - else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) - folio_detach_private(folio); - } -} - /* * Grab a folio for writing and lock it. Attempt to allocate as large a folio * as possible to hold as much of the remaining length as possible in one go. @@ -150,6 +132,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, } do { + enum netfs_folio_trace trace; struct netfs_folio *finfo; struct netfs_group *group; unsigned long long fpos; @@ -157,6 +140,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, size_t offset; /* Offset into pagecache folio */ size_t part; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + void *priv; offset = pos & (max_chunk - 1); part = min(max_chunk - offset, iov_iter_count(iter)); @@ -202,29 +186,40 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto error_folio_unlock; } - /* Decide how we should modify a folio. We might be attempting - * to do write-streaming, in which case we don't want to a - * local RMW cycle if we can avoid it. If we're doing local - * caching or content crypto, we award that priority over - * avoiding RMW. If the file is open readably, then we also - * assume that we may want to read what we wrote. - */ finfo = netfs_folio_info(folio); group = netfs_folio_group(folio); + /* If the requested group differs from the group set on the + * page, then we need to flush out the folio if it has a group + * set (ie. is non-NULL). Note that COPY_TO_CACHE is a special + * case, being a netfs annotation rather than an actual group. + * + * The filesystem isn't permitted to mix writes with groups and + * writes without groups as the NULL group is used to indicate + * that no group is set. + */ if (unlikely(group != netfs_group) && - group != NETFS_FOLIO_COPY_TO_CACHE) + group != NETFS_FOLIO_COPY_TO_CACHE && + group) { + WARN_ON_ONCE(!netfs_group); goto flush_content; + } + /* Decide how we should modify a folio. We might be attempting + * to do write-streaming, as we don't want to a local RMW cycle + * if we can avoid it. If we're doing local caching or content + * crypto, we award that priority over avoiding RMW. If the + * file is open readably, then we let ->read_folio() fill in + * the gaps. + */ if (folio_test_uptodate(folio)) { if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; - netfs_set_group(folio, netfs_group); - trace_netfs_folio(folio, netfs_folio_is_uptodate); - goto copied; + trace = netfs_folio_is_uptodate; + goto copied_uptodate; } /* If the page is above the zero-point then we assume that the @@ -237,38 +232,53 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(copied == 0)) goto copy_failed; folio_zero_segment(folio, offset + copied, flen); - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); - trace_netfs_folio(folio, netfs_modify_and_clear); - goto copied; + if (finfo) + trace = netfs_modify_and_clear_rm_finfo; + else + trace = netfs_modify_and_clear; + goto mark_uptodate; } /* See if we can write a whole folio in one go. */ if (!maybe_trouble && offset == 0 && part >= flen) { copied = copy_folio_from_iter_atomic(folio, offset, part, iter); - if (unlikely(copied == 0)) + if (likely(copied == part)) { + if (finfo) + trace = netfs_whole_folio_modify_filled; + else + trace = netfs_whole_folio_modify; + goto mark_uptodate; + } + if (copied == 0) goto copy_failed; - if (unlikely(copied < part)) { + if (!finfo || copied <= finfo->dirty_offset) { maybe_trouble = true; iov_iter_revert(iter, copied); copied = 0; folio_unlock(folio); goto retry; } - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); - trace_netfs_folio(folio, netfs_whole_folio_modify); + + /* We overwrote some existing dirty data, so we have to + * accept the partial write. + */ + finfo->dirty_len += finfo->dirty_offset; + if (finfo->dirty_len == flen) { + trace = netfs_whole_folio_modify_filled_efault; + goto mark_uptodate; + } + if (copied > finfo->dirty_len) + finfo->dirty_len = copied; + finfo->dirty_offset = 0; + trace = netfs_whole_folio_modify_efault; goto copied; } /* We don't want to do a streaming write on a file that loses * caching service temporarily because the backing store got - * culled and we don't really want to get a streaming write on - * a file that's open for reading as ->read_folio() then has to - * be able to flush it. + * culled. */ - if ((file->f_mode & FMODE_READ) || - netfs_is_cache_enabled(ctx)) { + if (netfs_is_cache_enabled(ctx)) { if (finfo) { netfs_stat(&netfs_n_wh_wstream_conflict); goto flush_content; @@ -283,11 +293,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; - netfs_set_group(folio, netfs_group); - trace_netfs_folio(folio, netfs_just_prefetch); - goto copied; + trace = netfs_just_prefetch; + goto copied_uptodate; } + /* Do a streaming write on a folio that has nothing in it yet. */ if (!finfo) { ret = -EIO; if (WARN_ON(folio_get_private(folio))) @@ -296,10 +306,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(copied == 0)) goto copy_failed; if (offset == 0 && copied == flen) { - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); - trace_netfs_folio(folio, netfs_streaming_filled_page); - goto copied; + trace = netfs_streaming_filled_page; + goto mark_uptodate; } finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); @@ -313,7 +321,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, finfo->dirty_len = copied; folio_attach_private(folio, (void *)((unsigned long)finfo | NETFS_FOLIO_INFO)); - trace_netfs_folio(folio, netfs_streaming_write); + trace = netfs_streaming_write; goto copied; } @@ -327,16 +335,10 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto copy_failed; finfo->dirty_len += copied; if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { - if (finfo->netfs_group) - folio_change_private(folio, finfo->netfs_group); - else - folio_detach_private(folio); - folio_mark_uptodate(folio); - kfree(finfo); - trace_netfs_folio(folio, netfs_streaming_cont_filled_page); - } else { - trace_netfs_folio(folio, netfs_streaming_write_cont); + trace = netfs_streaming_cont_filled_page; + goto mark_uptodate; } + trace = netfs_streaming_write_cont; goto copied; } @@ -350,7 +352,38 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto out; continue; + /* Mark a folio as being up to data when we've filled it + * completely. If the folio has a group attached, then it must + * be the same group, otherwise we should have flushed it out + * above. We have to get rid of the netfs_folio struct if + * there was one. + */ + mark_uptodate: + folio_mark_uptodate(folio); + + copied_uptodate: + priv = folio_get_private(folio); + if (likely(priv == netfs_group)) { + /* Already set correctly; no change required. */ + } else if (priv == NETFS_FOLIO_COPY_TO_CACHE) { + if (!netfs_group) + folio_detach_private(folio); + else + folio_change_private(folio, netfs_get_group(netfs_group)); + } else if (!priv) { + folio_attach_private(folio, netfs_get_group(netfs_group)); + } else { + WARN_ON_ONCE(!finfo); + if (netfs_group) + /* finfo->netfs_group has a ref */ + folio_change_private(folio, netfs_group); + else + folio_detach_private(folio); + kfree(finfo); + } + copied: + trace_netfs_folio(folio, trace); flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ @@ -511,6 +544,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr struct inode *inode = file_inode(file); struct netfs_inode *ictx = netfs_inode(inode); vm_fault_t ret = VM_FAULT_NOPAGE; + void *priv; int err; _enter("%lx", folio->index); @@ -531,7 +565,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr } group = netfs_folio_group(folio); - if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { + if (group && + group != netfs_group && + group != NETFS_FOLIO_COPY_TO_CACHE) { folio_unlock(folio); err = filemap_fdatawrite_range(mapping, folio_pos(folio), @@ -553,7 +589,19 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); else trace_netfs_folio(folio, netfs_folio_trace_mkwrite); - netfs_set_group(folio, netfs_group); + + priv = folio_get_private(folio); + if (priv != netfs_group) { + if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) + folio_detach_private(folio); + else if (netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) + folio_change_private(folio, netfs_get_group(netfs_group)); + else if (netfs_group && !priv) + folio_attach_private(folio, netfs_get_group(netfs_group)); + else + WARN_ON_ONCE(1); + } + file_update_time(file); set_bit(NETFS_ICTX_MODIFIED_ATTR, &ictx->flags); if (ictx->ops->post_modify) diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index f72e6da88cca..6a8fb0d55e04 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -45,12 +45,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq) * Perform a read to a buffer from the server, slicing up the region to be read * according to the network rsize. */ -static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) +static void netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) { - struct netfs_io_stream *stream = &rreq->io_streams[0]; unsigned long long start = rreq->start; ssize_t size = rreq->len; - int ret = 0; + int ret; do { struct netfs_io_subrequest *subreq; @@ -58,7 +57,10 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) subreq = netfs_alloc_subrequest(rreq); if (!subreq) { - ret = -ENOMEM; + /* Stash the error in the request if there's not + * already an error set. + */ + cmpxchg(&rreq->error, 0, -ENOMEM); break; } @@ -66,25 +68,13 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) subreq->start = start; subreq->len = size; - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - spin_lock(&rreq->lock); - list_add_tail(&subreq->rreq_link, &stream->subrequests); - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - if (!stream->active) { - stream->collected_to = subreq->start; - /* Store list pointers before active flag */ - smp_store_release(&stream->active, true); - } - } - trace_netfs_sreq(subreq, netfs_sreq_trace_added); - spin_unlock(&rreq->lock); + netfs_queue_read(rreq, subreq); netfs_stat(&netfs_n_rh_download); if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); break; } } @@ -113,8 +103,6 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); netfs_wake_collector(rreq); } - - return ret; } /* @@ -137,21 +125,17 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) // TODO: Use bounce buffer if requested inode_dio_begin(rreq->inode); + netfs_dispatch_unbuffered_reads(rreq); - ret = netfs_dispatch_unbuffered_reads(rreq); - - if (!rreq->submitted) { - netfs_put_request(rreq, netfs_rreq_trace_put_no_submit); - inode_dio_end(rreq->inode); - ret = 0; - goto out; - } + /* The collector will get run, even if we don't manage to submit any + * subreqs, so we shouldn't call inode_dio_end() here. + */ if (sync) ret = netfs_wait_for_read(rreq); else ret = -EIOCBQUEUED; -out: + _leave(" = %zd", ret); return ret; } diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index d436e20d3418..645996ecfc80 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -23,6 +23,8 @@ /* * buffered_read.c */ +void netfs_queue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq); void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); @@ -108,6 +110,7 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, */ bool netfs_read_collection(struct netfs_io_request *rreq); void netfs_read_collection_worker(struct work_struct *work); +void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error); void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); /* diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 429e4396e1b0..b375567e0520 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -72,21 +72,24 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, break; } - if (ret > count) { - pr_err("get_pages rc=%zd more than %zu\n", ret, count); + if (WARN(ret > count, + "%s: extract_pages overrun %zd > %zu bytes\n", + __func__, ret, count)) { + ret = -EIO; break; } - count -= ret; - ret += offset; - cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE); - - if (npages + cur_npages > max_pages) { - pr_err("Out of bvec array capacity (%u vs %u)\n", - npages + cur_npages, max_pages); + cur_npages = DIV_ROUND_UP(offset + ret, PAGE_SIZE); + if (WARN(cur_npages > max_pages - npages, + "%s: extract_pages overrun %u > %u pages\n", + __func__, npages + cur_npages, max_pages)) { + ret = -EIO; break; } + count -= ret; + ret += offset; + for (i = 0; i < cur_npages; i++) { len = ret > PAGE_SIZE ? PAGE_SIZE : ret; bvec_set_page(bv + npages + i, *pages++, len - offset, offset); @@ -97,6 +100,11 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, npages += cur_npages; } + /* Note: Don't try to clean up after EIO. Either we got no pages, so + * nothing to clean up, or we got a buffer overrun, memory corruption + * and can't trust the stuff in the buffer (a WARN was emitted). + */ + if (ret < 0 && (ret == -ENOMEM || npages == 0)) { for (i = 0; i < npages; i++) unpin_user_page(bv[i].bv_page); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 486166460e17..1109ac379128 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -255,7 +255,8 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) goto erase_completely; /* Move the start of the data. */ finfo->dirty_len = fend - iend; - finfo->dirty_offset = offset; + finfo->dirty_offset = iend; + trace_netfs_folio(folio, netfs_folio_trace_invalidate_front); return; } @@ -264,12 +265,14 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) */ if (iend >= fend) { finfo->dirty_len = offset - fstart; + trace_netfs_folio(folio, netfs_folio_trace_invalidate_tail); return; } /* A partial write was split. The caller has already zeroed * it, so just absorb the hole. */ + trace_netfs_folio(folio, netfs_folio_trace_invalidate_middle); } return; @@ -277,8 +280,9 @@ erase_completely: netfs_put_group(netfs_folio_group(folio)); folio_detach_private(folio); folio_clear_uptodate(folio); + folio_cancel_dirty(folio); kfree(finfo); - return; + trace_netfs_folio(folio, netfs_folio_trace_invalidate_all); } EXPORT_SYMBOL(netfs_invalidate_folio); diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index e5f6665b3341..4c7312a4c859 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -83,7 +83,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, } just_unlock: - if (folio->index == rreq->no_unlock_folio && + if (folio == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { _debug("no unlock"); } else { @@ -576,6 +576,17 @@ skip_error_checks: EXPORT_SYMBOL(netfs_read_subreq_terminated); /* + * Cancel a read subrequest due to preparation failure. + */ +void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error) +{ + trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); + subreq->error = error; + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); + netfs_read_subreq_terminated(subreq); +} + +/* * Handle termination of a read from the cache. */ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index cca9ac43c077..999177426141 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -288,8 +288,15 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq) struct folio *folio = folioq_folio(p, slot); if (folio && !folioq_is_marked2(p, slot)) { - trace_netfs_folio(folio, netfs_folio_trace_abandon); - folio_unlock(folio); + if (folio == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, + &rreq->flags)) { + _debug("no unlock"); + } else { + trace_netfs_folio(folio, + netfs_folio_trace_abandon); + folio_unlock(folio); + } } } } diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index 9d48ced80d1f..cb422de66d0c 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -89,7 +89,6 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq, */ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) { - struct netfs_io_stream *stream = &rreq->io_streams[0]; struct netfs_io_subrequest *subreq; int ret = 0; @@ -102,14 +101,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) subreq->len = rreq->len; subreq->io_iter = rreq->buffer.iter; - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - spin_lock(&rreq->lock); - list_add_tail(&subreq->rreq_link, &stream->subrequests); - trace_netfs_sreq(subreq, netfs_sreq_trace_added); - /* Store list pointers before active flag */ - smp_store_release(&stream->active, true); - spin_unlock(&rreq->lock); + netfs_queue_read(rreq, subreq); netfs_single_cache_prepare_read(rreq, subreq); switch (subreq->source) { @@ -121,10 +113,14 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) goto cancel; } + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); rreq->netfs_ops->issue_read(subreq); rreq->submitted += subreq->len; break; case NETFS_READ_FROM_CACHE: + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); netfs_single_read_cache(rreq, subreq); rreq->submitted += subreq->len; @@ -134,14 +130,15 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) pr_warn("Unexpected single-read source %u\n", subreq->source); WARN_ON_ONCE(true); ret = -EIO; - break; + goto cancel; } - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); return ret; cancel: - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); + netfs_wake_collector(rreq); return ret; } diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 2db688f94125..03d170b9022b 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -413,12 +413,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (streamw) netfs_issue_write(wreq, cache); - /* Flip the page to the writeback state and unlock. If we're called - * from write-through, then the page has already been put into the wb - * state. - */ - if (wreq->origin == NETFS_WRITEBACK) - folio_start_writeback(folio); + folio_start_writeback(folio); folio_unlock(folio); if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { @@ -646,29 +641,41 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache) { + int ret; + _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end); - if (!*writethrough_cache) { - if (folio_test_dirty(folio)) - /* Sigh. mmap. */ - folio_clear_dirty_for_io(folio); + /* The folio is locked. */ + if (*writethrough_cache != folio) { + if (*writethrough_cache) { + /* Did the folio get moved? */ + folio_put(*writethrough_cache); + *writethrough_cache = NULL; + } /* We can make multiple writes to the folio... */ - folio_start_writeback(folio); if (wreq->len == 0) trace_netfs_folio(folio, netfs_folio_trace_wthru); else trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); *writethrough_cache = folio; + folio_get(folio); } wreq->len += copied; - if (!to_page_end) + + if (!to_page_end) { + folio_mark_dirty(folio); + folio_unlock(folio); return 0; + } + ret = netfs_write_folio(wreq, wbc, folio); + folio_put(*writethrough_cache); *writethrough_cache = NULL; - return netfs_write_folio(wreq, wbc, folio); + wreq->submitted = wreq->len; + return ret; } /* @@ -682,8 +689,12 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c _enter("R=%x", wreq->debug_id); - if (writethrough_cache) + if (writethrough_cache) { + folio_lock(writethrough_cache); netfs_write_folio(wreq, wbc, writethrough_cache); + folio_put(writethrough_cache); + wreq->submitted = wreq->len; + } netfs_end_issue_write(wreq); @@ -818,6 +829,9 @@ static int netfs_write_folio_single(struct netfs_io_request *wreq, * * Write a monolithic, non-pagecache object back to the server and/or * the cache. + * + * Return: 0 if successful; 1 if skipped due to lock conflict and WB_SYNC_NONE; + * or a negative error code. */ int netfs_writeback_single(struct address_space *mapping, struct writeback_control *wbc, @@ -834,8 +848,10 @@ int netfs_writeback_single(struct address_space *mapping, if (!mutex_trylock(&ictx->wb_lock)) { if (wbc->sync_mode == WB_SYNC_NONE) { + /* The VFS will have undirtied the inode. */ + netfs_single_mark_inode_dirty(&ictx->inode); netfs_stat(&netfs_n_wb_lock_skip); - return 0; + return 1; } netfs_stat(&netfs_n_wb_lock_wait); mutex_lock(&ictx->wb_lock); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cb8096e94f51..a9e95df2fdb6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1848,6 +1848,13 @@ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb) break; case SC_TYPE_LAYOUT: ls = layoutstateid(stid); + spin_lock(&clp->cl_lock); + if (stid->sc_status == 0) { + stid->sc_status |= + SC_STATUS_ADMIN_REVOKED; + atomic_inc(&clp->cl_admin_revoked); + } + spin_unlock(&clp->cl_lock); nfsd4_close_layout(ls); break; } diff --git a/fs/nsfs.c b/fs/nsfs.c index f22c2a636e8f..e2f9a725883c 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -261,7 +261,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, else tsk = find_task_by_pid_ns(arg, pid_ns); if (!tsk) - break; + return ret; switch (ioctl) { case NS_GET_PID_FROM_PIDNS: diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index cd7aaeef45fe..5042eb321eb8 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -458,8 +458,8 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) { struct super_block *sb = inode->i_sb; struct ntfs_inode *ni = ntfs_i(inode); - int err, dirty = 0; u64 new_valid; + int err; if (!S_ISREG(inode->i_mode)) return 0; @@ -475,7 +475,6 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) } new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); - truncate_setsize(inode, new_size); ni_lock(ni); @@ -489,22 +488,19 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) ni->i_valid = new_valid; ni_unlock(ni); + if (unlikely(err)) + return err; ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (!IS_DIRSYNC(inode)) { - dirty = 1; + mark_inode_dirty(inode); } else { err = ntfs_sync_inode(inode); if (err) return err; } - if (dirty) - mark_inode_dirty(inode); - - /*ntfs_flush_inodes(inode->i_sb, inode, NULL);*/ - return 0; } diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index bec5475de094..75e65e72c2d6 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -362,7 +362,7 @@ static struct dentry *orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir, __orangefs_setattr(dir, &iattr); out: op_release(new_op); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : NULL; } static int orangefs_rename(struct mnt_idmap *idmap, diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 9891f55bac1e..60b4147d0eea 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -8,6 +8,7 @@ */ #include <linux/list.h> +#include <linux/cred.h> #include <linux/slab.h> #include <linux/string.h> #include <keys/user-type.h> @@ -40,12 +41,27 @@ cifs_spnego_key_destroy(struct key *key) kfree(key->payload.data[0]); } +static int +cifs_spnego_key_vet_description(const char *description) +{ + /* + * cifs.spnego descriptions are authority-bearing inputs to cifs.upcall. + * They are only valid when produced by CIFS while using the private + * spnego_cred installed below. Do not let userspace create this type + * of key through request_key(2)/add_key(2), since the helper treats + * pid/uid/creduid/upcall_target as kernel-originating fields. + */ + if (current_cred() != spnego_cred) + return -EPERM; + return 0; +} /* * keytype for CIFS spnego keys */ struct key_type cifs_spnego_key_type = { .name = "cifs.spnego", + .vet_description = cifs_spnego_key_vet_description, .instantiate = cifs_spnego_key_instantiate, .destroy = cifs_spnego_key_destroy, .describe = user_describe, diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 4b34a4304edb..9059c2efbcc0 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -338,6 +338,8 @@ static void cifs_kill_sb(struct super_block *sb) /* Wait for all pending oplock breaks to complete */ flush_workqueue(cifsoplockd_wq); + /* Wait for all opened files to release */ + flush_workqueue(deferredclose_wq); /* finally release root dentry */ dput(cifs_sb->root); diff --git a/fs/smb/client/netlink.c b/fs/smb/client/netlink.c index 147d9409252c..0dd10913c37a 100644 --- a/fs/smb/client/netlink.c +++ b/fs/smb/client/netlink.c @@ -33,13 +33,17 @@ static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = { static const struct genl_ops cifs_genl_ops[] = { { .cmd = CIFS_GENL_CMD_SWN_NOTIFY, + .flags = GENL_ADMIN_PERM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = cifs_swn_notify, }, }; static const struct genl_multicast_group cifs_genl_mcgrps[] = { - [CIFS_GENL_MCGRP_SWN] = { .name = CIFS_GENL_MCGRP_SWN_NAME }, + [CIFS_GENL_MCGRP_SWN] = { + .name = CIFS_GENL_MCGRP_SWN_NAME, + .flags = GENL_MCAST_CAP_NET_ADMIN, + }, }; struct genl_family cifs_genl_family = { diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index eed3a71171c0..e1a9e89cb85f 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4842,7 +4842,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, } /* Copy the data to the output I/O iterator. */ - rdata->result = cifs_copy_folioq_to_iter(buffer, buffer_len, + rdata->result = cifs_copy_folioq_to_iter(buffer, data_len, cur_off, &rdata->subreq.io_iter); if (rdata->result != 0) { if (is_offloaded) @@ -4851,7 +4851,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, dequeue_mid(mid, rdata->result); return 0; } - rdata->got_bytes = buffer_len; + rdata->got_bytes = data_len; } else if (buf_len >= data_offset + data_len) { /* read response payload is in buf */ diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index db98ced541ba..7483b29367d5 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -176,7 +176,9 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != tid) continue; + spin_lock(&tcon->tc_lock); ++tcon->tc_count; + spin_unlock(&tcon->tc_lock); trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, netfs_trace_tcon_ref_get_find_sess_tcon); return tcon; diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index bbb2cb3782d0..a84c01bceb8b 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -484,8 +484,12 @@ static inline int compare_guid_key(struct oplock_info *opinfo, const char *guid1, const char *key1) { const char *guid2, *key2; + struct ksmbd_conn *conn; - guid2 = opinfo->conn->ClientGUID; + conn = READ_ONCE(opinfo->conn); + if (!conn) + return 0; + guid2 = conn->ClientGUID; key2 = opinfo->o_lease->lease_key; if (!memcmp(guid1, guid2, SMB2_CLIENT_GUID_SIZE) && !memcmp(key1, key2, SMB2_LEASE_KEY_SIZE)) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 756624b4e90e..da7b96707186 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3812,8 +3812,19 @@ err_out2: ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status); } - if (dh_info.reconnected) - ksmbd_put_durable_fd(dh_info.fp); + if (dh_info.reconnected) { + /* + * If reconnect succeeded, fp was republished in the + * session file table. On a later error, ksmbd_fd_put() + * above drops the session reference; drop the durable + * lookup reference through the same session-aware path so + * final close removes the volatile id before freeing fp. + */ + if (rc && fp == dh_info.fp) + ksmbd_fd_put(work, dh_info.fp); + else + ksmbd_put_durable_fd(dh_info.fp); + } kfree(name); kfree(lc); diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index a0e0dc56c730..6c4f9c8c7f13 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -643,8 +643,10 @@ static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, ntace = (struct smb_ace *)((char *)pndace + *size); ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, flags, pace->e_perm, 0777); - if (check_add_overflow(*size, ace_sz, size)) + if (check_add_overflow(*size, ace_sz, size)) { + kfree(sid); break; + } (*num_aces)++; if (pace->e_tag == ACL_USER) ntace->access_req |= @@ -655,8 +657,10 @@ static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, ntace = (struct smb_ace *)((char *)pndace + *size); ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x03, pace->e_perm, 0777); - if (check_add_overflow(*size, ace_sz, size)) + if (check_add_overflow(*size, ace_sz, size)) { + kfree(sid); break; + } (*num_aces)++; if (pace->e_tag == ACL_USER) ntace->access_req |= @@ -698,8 +702,10 @@ posix_default_acl: ntace = (struct smb_ace *)((char *)pndace + *size); ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x0b, pace->e_perm, 0777); - if (check_add_overflow(*size, ace_sz, size)) + if (check_add_overflow(*size, ace_sz, size)) { + kfree(sid); break; + } (*num_aces)++; if (pace->e_tag == ACL_USER) ntace->access_req |= @@ -1090,6 +1096,40 @@ static int smb_append_inherited_ace(struct smb_ace **ace, int *nt_size, return 0; } +static int smb_validate_ntsd_sid(struct smb_ntsd *pntsd, size_t pntsd_size, + unsigned int sid_offset, struct smb_sid **sid, + size_t *sid_size) +{ + size_t sid_end; + + *sid = NULL; + *sid_size = 0; + + if (!sid_offset) + return 0; + + if (sid_offset < sizeof(struct smb_ntsd) || + check_add_overflow(sid_offset, (size_t)CIFS_SID_BASE_SIZE, + &sid_end) || + sid_end > pntsd_size) + return -EINVAL; + + *sid = (struct smb_sid *)((char *)pntsd + sid_offset); + if ((*sid)->num_subauth > SID_MAX_SUB_AUTHORITIES) + return -EINVAL; + + if (check_add_overflow((size_t)CIFS_SID_BASE_SIZE, + sizeof(__le32) * (size_t)(*sid)->num_subauth, + &sid_end)) + return -EINVAL; + + if (sid_offset > pntsd_size || sid_end > pntsd_size - sid_offset) + return -EINVAL; + + *sid_size = sid_end; + return 0; +} + int smb_inherit_dacl(struct ksmbd_conn *conn, const struct path *path, unsigned int uid, unsigned int gid) @@ -1102,28 +1142,28 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct dentry *parent = path->dentry->d_parent; struct mnt_idmap *idmap = mnt_idmap(path->mnt); int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size; - int rc = 0, pntsd_type, pntsd_size, acl_len, aces_size; + int rc = 0, pntsd_type, ppntsd_size, acl_len, aces_size; unsigned int dacloffset; size_t dacl_struct_end; u16 num_aces, ace_cnt = 0; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); - pntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap, + ppntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap, parent, &parent_pntsd); - if (pntsd_size <= 0) + if (ppntsd_size <= 0) return -ENOENT; dacloffset = le32_to_cpu(parent_pntsd->dacloffset); if (!dacloffset || check_add_overflow(dacloffset, sizeof(struct smb_acl), &dacl_struct_end) || - dacl_struct_end > (size_t)pntsd_size) { + dacl_struct_end > (size_t)ppntsd_size) { rc = -EINVAL; goto free_parent_pntsd; } parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); - acl_len = pntsd_size - dacloffset; + acl_len = ppntsd_size - dacloffset; num_aces = le16_to_cpu(parent_pdacl->num_aces); pntsd_type = le16_to_cpu(parent_pntsd->type); pdacl_size = le16_to_cpu(parent_pdacl->size); @@ -1237,19 +1277,19 @@ pass: struct smb_ntsd *pntsd; struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; - int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; + size_t powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; size_t pntsd_alloc_size; - if (parent_pntsd->osidoffset) { - powner_sid = (struct smb_sid *)((char *)parent_pntsd + - le32_to_cpu(parent_pntsd->osidoffset)); - powner_sid_size = 1 + 1 + 6 + (powner_sid->num_subauth * 4); - } - if (parent_pntsd->gsidoffset) { - pgroup_sid = (struct smb_sid *)((char *)parent_pntsd + - le32_to_cpu(parent_pntsd->gsidoffset)); - pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); - } + rc = smb_validate_ntsd_sid(parent_pntsd, ppntsd_size, + le32_to_cpu(parent_pntsd->osidoffset), + &powner_sid, &powner_sid_size); + if (rc) + goto free_aces_base; + rc = smb_validate_ntsd_sid(parent_pntsd, ppntsd_size, + le32_to_cpu(parent_pntsd->gsidoffset), + &pgroup_sid, &pgroup_sid_size); + if (rc) + goto free_aces_base; if (check_add_overflow(sizeof(struct smb_ntsd), (size_t)powner_sid_size, diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index d29cc1d01bd2..7293b7effbc1 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -118,7 +118,7 @@ int ksmbd_query_inode_status(struct dentry *dentry) return ret; down_read(&ci->m_lock); - if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)) + if (ci->m_flags & S_DEL_PENDING) ret = KSMBD_INODE_STATUS_PENDING_DELETE; else ret = KSMBD_INODE_STATUS_OK; @@ -134,7 +134,7 @@ bool ksmbd_inode_pending_delete(struct ksmbd_file *fp) int ret; down_read(&ci->m_lock); - ret = (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)); + ret = (ci->m_flags & S_DEL_PENDING); up_read(&ci->m_lock); return ret; @@ -302,12 +302,20 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) } } + down_write(&ci->m_lock); + /* Promote S_DEL_ON_CLS to S_DEL_PENDING when close */ + if (ci->m_flags & S_DEL_ON_CLS) { + ci->m_flags &= ~S_DEL_ON_CLS; + ci->m_flags |= S_DEL_PENDING; + } + up_write(&ci->m_lock); + if (atomic_dec_and_test(&ci->m_count)) { bool do_unlink = false; down_write(&ci->m_lock); - if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) { - ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING); + if (ci->m_flags & S_DEL_PENDING) { + ci->m_flags &= ~S_DEL_PENDING; do_unlink = true; } up_write(&ci->m_lock); @@ -325,6 +333,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) return; idr_remove(global_ft.idr, fp->persistent_id); + /* + * Clear persistent_id so a later __ksmbd_close_fd() that runs from a + * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode() + * walker held the final reference) does not re-issue idr_remove() on + * an id that idr_alloc_cyclic() may have already handed out to a new + * durable handle. + */ + fp->persistent_id = KSMBD_NO_FID; } static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) @@ -417,6 +433,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft, static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp) { + /* + * Detached durable fp -- session_fd_check() cleared fp->conn at + * preserve, so this fp is no longer tracked by any conn's + * stats.open_files_count. This happens when + * ksmbd_scavenger_dispose_dh() hands the final close off to an + * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn + * is unrelated to the conn that originally opened the handle; close + * via the NULL-ft path so we do not underflow that unrelated + * counter. + */ + if (!fp->conn) { + __ksmbd_close_fd(NULL, fp); + return; + } __ksmbd_close_fd(&work->sess->file_table, fp); atomic_dec(&work->conn->stats.open_files_count); } @@ -788,24 +818,37 @@ static bool ksmbd_durable_scavenger_alive(void) return true; } -static void ksmbd_scavenger_dispose_dh(struct list_head *head) +static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp) { - while (!list_empty(head)) { - struct ksmbd_file *fp; + /* + * Durable-preserved fp can remain linked on f_ci->m_fp_list for + * share-mode checks. Unlink it before final close; fp->node is not + * available as a scavenger-private list node because re-adding it to + * another list corrupts m_fp_list. + */ + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - fp = list_first_entry(head, struct ksmbd_file, node); - list_del_init(&fp->node); + /* + * Drop both the durable lifetime reference and the transient reference + * taken by the scavenger under global_ft.lock. If a concurrent + * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp + * before the unlink above, that holder owns the final close via + * ksmbd_fd_put() -> __ksmbd_close_fd(). Otherwise the scavenger is + * the last putter and finalises fp here. + */ + if (atomic_sub_and_test(2, &fp->refcount)) __ksmbd_close_fd(NULL, fp); - } } static int ksmbd_durable_scavenger(void *dummy) { struct ksmbd_file *fp = NULL; + struct ksmbd_file *expired_fp; unsigned int id; unsigned int min_timeout = 1; bool found_fp_timeout; - LIST_HEAD(scavenger_list); unsigned long remaining_jiffies; __module_get(THIS_MODULE); @@ -815,8 +858,6 @@ static int ksmbd_durable_scavenger(void *dummy) if (try_to_freeze()) continue; - found_fp_timeout = false; - remaining_jiffies = wait_event_timeout(dh_wq, ksmbd_durable_scavenger_alive() == false, __msecs_to_jiffies(min_timeout)); @@ -825,23 +866,39 @@ static int ksmbd_durable_scavenger(void *dummy) else min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; - write_lock(&global_ft.lock); - idr_for_each_entry(global_ft.idr, fp, id) { - if (!fp->durable_timeout) - continue; - - if (atomic_read(&fp->refcount) > 1 || - fp->conn) - continue; - - found_fp_timeout = true; - if (fp->durable_scavenger_timeout <= - jiffies_to_msecs(jiffies)) { - __ksmbd_remove_durable_fd(fp); - list_add(&fp->node, &scavenger_list); - } else { + do { + expired_fp = NULL; + found_fp_timeout = false; + + write_lock(&global_ft.lock); + idr_for_each_entry(global_ft.idr, fp, id) { unsigned long durable_timeout; + if (!fp->durable_timeout) + continue; + + if (atomic_read(&fp->refcount) > 1 || + fp->conn) + continue; + + found_fp_timeout = true; + if (fp->durable_scavenger_timeout <= + jiffies_to_msecs(jiffies)) { + __ksmbd_remove_durable_fd(fp); + /* + * Take a transient reference so fp + * cannot be freed by an in-flight + * ksmbd_lookup_fd_inode() that found + * it through f_ci->m_fp_list while we + * drop global_ft.lock and reach the + * m_fp_list unlink in + * ksmbd_scavenger_dispose_dh(). + */ + atomic_inc(&fp->refcount); + expired_fp = fp; + break; + } + durable_timeout = fp->durable_scavenger_timeout - jiffies_to_msecs(jiffies); @@ -849,10 +906,11 @@ static int ksmbd_durable_scavenger(void *dummy) if (min_timeout > durable_timeout) min_timeout = durable_timeout; } - } - write_unlock(&global_ft.lock); + write_unlock(&global_ft.lock); - ksmbd_scavenger_dispose_dh(&scavenger_list); + if (expired_fp) + ksmbd_scavenger_dispose_dh(expired_fp); + } while (expired_fp); if (found_fp_timeout == false) break; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index e142bac4f9f8..68c75913c978 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -182,7 +182,7 @@ static int internal_create_group(struct kobject *kobj, int update, kernfs_get(kn); error = create_files(kn, kobj, uid, gid, grp, update); if (error) { - if (grp->name) + if (grp->name && !update) kernfs_remove(kn); } kernfs_put(kn); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 70be0b3dda49..7e345a6aa551 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -610,10 +610,14 @@ static long zonefs_fname_to_fno(const struct qstr *fname) return c - '0'; for (i = 0, rname = name + len - 1; i < len; i++, rname--) { + long digit; + c = *rname; if (!isdigit(c)) return -ENOENT; - fno += (c - '0') * shift; + digit = (c - '0') * shift; + if (check_add_overflow(fno, digit, &fno)) + return -ENOENT; shift *= 10; } diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h index 060eab094e5a..5290a2b2e15a 100644 --- a/include/asm-generic/kprobes.h +++ b/include/asm-generic/kprobes.h @@ -14,7 +14,7 @@ static unsigned long __used \ _kbl_addr_##fname = (unsigned long)fname; # define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) /* Use this to forbid a kprobes attach on very low level functions */ -# define __kprobes __section(".kprobes.text") +# define __kprobes notrace __section(".kprobes.text") # define nokprobe_inline __always_inline #else # define NOKPROBE_SYMBOL(fname) diff --git a/include/asm-generic/ring_buffer.h b/include/asm-generic/ring_buffer.h new file mode 100644 index 000000000000..201d2aee1005 --- /dev/null +++ b/include/asm-generic/ring_buffer.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Generic arch dependent ring_buffer macros. + */ +#ifndef __ASM_GENERIC_RING_BUFFER_H__ +#define __ASM_GENERIC_RING_BUFFER_H__ + +#include <linux/cacheflush.h> + +/* Flush cache on ring buffer range if needed. Do nothing by default. */ +#define arch_ring_buffer_flush_range(start, end) do { } while (0) + +#endif /* __ASM_GENERIC_RING_BUFFER_H__ */ diff --git a/include/crypto/krb5.h b/include/crypto/krb5.h index 71dd38f59be1..aac3ecf88467 100644 --- a/include/crypto/krb5.h +++ b/include/crypto/krb5.h @@ -121,9 +121,12 @@ size_t crypto_krb5_how_much_buffer(const struct krb5_enctype *krb5, size_t crypto_krb5_how_much_data(const struct krb5_enctype *krb5, enum krb5_crypto_mode mode, size_t *_buffer_size, size_t *_offset); -void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, - enum krb5_crypto_mode mode, - size_t *_offset, size_t *_len); +int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t *_offset, size_t *_len); +int crypto_krb5_check_data_len(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t len, size_t min_content); struct crypto_aead *crypto_krb5_prepare_encryption(const struct krb5_enctype *krb5, const struct krb5_buffer *TK, u32 usage, gfp_t gfp); diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index ce7c7aeac887..fe32854b7ffe 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -490,6 +490,18 @@ struct drm_crtc_helper_funcs { bool in_vblank_irq, int *vpos, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); + + /** + * @handle_vblank_timeout: Handles timeouts of the vblank timer. + * + * Called by CRTC's the vblank timer on each timeout. Semantics is + * equivalient to drm_crtc_handle_vblank(). Implementations should + * invoke drm_crtc_handle_vblank() as part of processing the timeout. + * + * This callback is optional. If unset, the vblank timer invokes + * drm_crtc_handle_vblank() directly. + */ + bool (*handle_vblank_timeout)(struct drm_crtc *crtc); }; /** diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index 151ab1e85b1b..ffa564d79638 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -25,6 +25,7 @@ #define _DRM_VBLANK_H_ #include <linux/seqlock.h> +#include <linux/hrtimer.h> #include <linux/idr.h> #include <linux/poll.h> #include <linux/kthread.h> @@ -104,6 +105,28 @@ struct drm_vblank_crtc_config { }; /** + * struct drm_vblank_crtc_timer - vblank timer for a CRTC + */ +struct drm_vblank_crtc_timer { + /** + * @timer: The vblank's high-resolution timer + */ + struct hrtimer timer; + /** + * @interval_lock: Protects @interval + */ + spinlock_t interval_lock; + /** + * @interval: Duration between two vblanks + */ + ktime_t interval; + /** + * @crtc: The timer's CRTC + */ + struct drm_crtc *crtc; +}; + +/** * struct drm_vblank_crtc - vblank tracking for a CRTC * * This structure tracks the vblank state for one CRTC. @@ -254,6 +277,11 @@ struct drm_vblank_crtc { * cancelled. */ wait_queue_head_t work_wait_queue; + + /** + * @vblank_timer: Holds the state of the vblank timer + */ + struct drm_vblank_crtc_timer vblank_timer; }; struct drm_vblank_crtc *drm_crtc_vblank_crtc(struct drm_crtc *crtc); @@ -290,6 +318,10 @@ wait_queue_head_t *drm_crtc_vblank_waitqueue(struct drm_crtc *crtc); void drm_crtc_set_max_vblank_count(struct drm_crtc *crtc, u32 max_vblank_count); +int drm_crtc_vblank_start_timer(struct drm_crtc *crtc); +void drm_crtc_vblank_cancel_timer(struct drm_crtc *crtc); +void drm_crtc_vblank_get_vblank_timeout(struct drm_crtc *crtc, ktime_t *vblank_time); + /* * Helpers for struct drm_crtc_funcs */ diff --git a/include/drm/drm_vblank_helper.h b/include/drm/drm_vblank_helper.h new file mode 100644 index 000000000000..fcd8a9b35846 --- /dev/null +++ b/include/drm/drm_vblank_helper.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _DRM_VBLANK_HELPER_H_ +#define _DRM_VBLANK_HELPER_H_ + +#include <linux/hrtimer_types.h> +#include <linux/types.h> + +struct drm_atomic_state; +struct drm_crtc; + +/* + * VBLANK helpers + */ + +void drm_crtc_vblank_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state); +void drm_crtc_vblank_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state); +void drm_crtc_vblank_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *crtc_state); + +/** + * DRM_CRTC_HELPER_VBLANK_FUNCS - Default implementation for VBLANK helpers + * + * This macro initializes struct &drm_crtc_helper_funcs to default helpers + * for VBLANK handling. + */ +#define DRM_CRTC_HELPER_VBLANK_FUNCS \ + .atomic_flush = drm_crtc_vblank_atomic_flush, \ + .atomic_enable = drm_crtc_vblank_atomic_enable, \ + .atomic_disable = drm_crtc_vblank_atomic_disable + +/* + * VBLANK timer + */ + +int drm_crtc_vblank_helper_enable_vblank_timer(struct drm_crtc *crtc); +void drm_crtc_vblank_helper_disable_vblank_timer(struct drm_crtc *crtc); +bool drm_crtc_vblank_helper_get_vblank_timestamp_from_timer(struct drm_crtc *crtc, + int *max_error, + ktime_t *vblank_time, + bool in_vblank_irq); + +/** + * DRM_CRTC_VBLANK_TIMER_FUNCS - Default implementation for VBLANK timers + * + * This macro initializes struct &drm_crtc_funcs to default helpers for + * VBLANK timers. + */ +#define DRM_CRTC_VBLANK_TIMER_FUNCS \ + .enable_vblank = drm_crtc_vblank_helper_enable_vblank_timer, \ + .disable_vblank = drm_crtc_vblank_helper_disable_vblank_timer, \ + .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp_from_timer + +#endif diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 6ed477338b16..7b2807009155 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -713,6 +713,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup scalable recursive statistics. */ +void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu); void css_rstat_updated(struct cgroup_subsys_state *css, int cpu); void css_rstat_flush(struct cgroup_subsys_state *css); diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 19c7e475d3a4..a1194e44b527 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -341,6 +341,11 @@ _label: \ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond +#define DEFINE_CLASS_IS_UNCONDITIONAL(_name) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return (void *)1; } + #define __GUARD_IS_ERR(_ptr) \ ({ \ unsigned long _rc = (__force unsigned long)(_ptr); \ diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 0a3bcd1718f3..be1b38c981d4 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -94,6 +94,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); int unregister_fprobe(struct fprobe *fp); +int unregister_fprobe_async(struct fprobe *fp); bool fprobe_is_registered(struct fprobe *fp); int fprobe_count_ips_from_filter(const char *filter, const char *notfilter); #else @@ -113,6 +114,10 @@ static inline int unregister_fprobe(struct fprobe *fp) { return -EOPNOTSUPP; } +static inline int unregister_fprobe_async(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} static inline bool fprobe_is_registered(struct fprobe *fp) { return false; diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 80b38fbf2121..31df7608737e 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -208,6 +208,7 @@ struct fwnode_operations { static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) { + fwnode->secondary = NULL; fwnode->ops = ops; INIT_LIST_HEAD(&fwnode->consumers); INIT_LIST_HEAD(&fwnode->suppliers); diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 65db9349f905..63bee08c6f31 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -277,11 +277,11 @@ enum { * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself - * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that - * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting - * memory tags at the same time as zeroing memory has minimal additional - * performance impact. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time. Setting memory tags at + * the same time as zeroing memory (e.g., with __GFP_ZERO) has minimal + * additional performance impact. However, __GFP_ZEROTAGS also zeroes the tags + * even if memory is not getting zeroed at allocation time (e.g., + * with init_on_free). * * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. * Used for userspace and vmalloc pages; the latter are unpoisoned by diff --git a/include/linux/highmem.h b/include/linux/highmem.h index abc20f9810fd..029172383b76 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -251,10 +251,11 @@ static inline void clear_highpage_kasan_tagged(struct page *page) #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES -/* Return false to let people know we did not initialize the pages */ -static inline bool tag_clear_highpages(struct page *page, int numpages) +/* Returns true if the caller has to initialize the pages */ +static inline bool tag_clear_highpages(struct page *page, int numpages, + bool clear_pages) { - return false; + return clear_pages; } #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index 3b8bdea8516d..5c3f98a418c3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -335,6 +335,7 @@ enum { /* return values for ->qc_defer */ ATA_DEFER_LINK = 1, ATA_DEFER_PORT = 2, + ATA_DEFER_LINK_EXCL = 3, /* desc_len for ata_eh_info and context */ ATA_EH_DESC_LEN = 80, @@ -854,6 +855,9 @@ struct ata_link { unsigned int sata_spd; /* current SATA PHY speed */ enum ata_lpm_policy lpm_policy; + struct work_struct deferred_qc_work; + struct ata_queued_cmd *deferred_qc; + /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; /* EH context */ @@ -899,9 +903,6 @@ struct ata_port { u64 qc_active; int nr_active_links; /* #links with active qcs */ - struct work_struct deferred_qc_work; - struct ata_queued_cmd *deferred_qc; - struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 77c778d84d4c..3aef60abd362 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -299,7 +299,8 @@ struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); -void *xt_unregister_table(struct xt_table *table); +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name); +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name); struct xt_table_info *xt_replace_table(struct xt_table *table, unsigned int num_counters, diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index a40aaf645fa4..05631a25e622 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,7 +53,6 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); -void arpt_unregister_table_pre_exit(struct net *net, const char *name); extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 132b0e4a6d4d..13593391d605 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -26,7 +26,6 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops); -void ipt_unregister_table_pre_exit(struct net *net, const char *name); void ipt_unregister_table_exit(struct net *net, const char *name); /* Standard entry. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8b8885a73c76..c6d5b927830d 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,7 +27,6 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *); int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *ops); -void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ba17ac5bf356..62a528f90666 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -252,7 +252,7 @@ struct netfs_io_request { unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ unsigned long long abandon_to; /* Position to abandon folios to */ - pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + const struct folio *no_unlock_folio; /* Don't unlock this folio after read */ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; unsigned int rsize; /* Maximum read size (0 for none) */ diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 0e82f1f4d36c..d4f6e8124a49 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -70,9 +70,9 @@ static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, #define NPU_RX1_DESC_NUM 512 /* CTRL */ -#define NPU_RX_DMA_DESC_LAST_MASK BIT(27) -#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(26, 14) -#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(13, 1) +#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) #define NPU_RX_DMA_DESC_DONE_MASK BIT(0) /* INFO */ #define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 29) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d46ed9011ee5..3791915e4a0a 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -389,6 +389,7 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src); struct bt_sock { struct sock sk; struct list_head accept_q; + spinlock_t accept_q_lock; /* protects accept_q */ struct sock *parent; unsigned long flags; void (*skb_msg_name)(struct sk_buff *, void *, int *); diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h index 5c3f49b52fe9..3939b816b001 100644 --- a/include/net/net_shaper.h +++ b/include/net/net_shaper.h @@ -53,6 +53,7 @@ struct net_shaper { /* private: */ u32 leaves; /* accounted only for NODE scope */ + bool valid; struct rcu_head rcu; }; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d17035d14d96..3978c3174cdb 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -14,6 +14,7 @@ struct nf_queue_entry { struct list_head list; struct rhash_head hash_node; struct sk_buff *skb; + struct net_device *skb_dev; unsigned int id; unsigned int hook_index; /* index in hook_entries->hook[] */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) diff --git a/include/net/tcp.h b/include/net/tcp.h index cf507b989bff..f460d2c391de 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void) this_cpu_dec(tcp_orphan_count); } -DECLARE_PER_CPU(u32, tcp_tw_isn); - void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -1028,10 +1026,13 @@ struct tcp_skb_cb { __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ union { - /* Note : + /* Notes : + * tcp_tw_isn is used in input path only + * (isn chosen by tcp_timewait_state_process()) * tcp_gso_segs/size are used in write queue only, * cf tcp_skb_pcount()/tcp_skb_mss() */ + u32 tcp_tw_isn; struct { u16 tcp_gso_segs; u16 tcp_gso_size; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0864700f76e0..fa090a455037 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -771,10 +771,8 @@ TRACE_EVENT(btrfs_sync_file, TP_fast_assign( struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); - struct dentry *parent = dget_parent(dentry); - struct inode *parent_inode = d_inode(parent); + struct inode *parent_inode = d_inode(dentry->d_parent); - dput(parent); TP_fast_assign_fsid(btrfs_sb(inode->i_sb)); __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index cbe28211106c..3fe3980902c2 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -177,7 +177,11 @@ EM(netfs_folio_is_uptodate, "mod-uptodate") \ EM(netfs_just_prefetch, "mod-prefetch") \ EM(netfs_whole_folio_modify, "mod-whole-f") \ + EM(netfs_whole_folio_modify_efault, "mod-whole-f!") \ + EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \ + EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \ EM(netfs_modify_and_clear, "mod-n-clear") \ + EM(netfs_modify_and_clear_rm_finfo, "mod-n-clear+") \ EM(netfs_streaming_write, "mod-streamw") \ EM(netfs_streaming_write_cont, "mod-streamw+") \ EM(netfs_flush_content, "flush") \ @@ -194,6 +198,10 @@ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ + EM(netfs_folio_trace_invalidate_all, "inval-all") \ + EM(netfs_folio_trace_invalidate_front, "inval-front") \ + EM(netfs_folio_trace_invalidate_middle, "inval-mid") \ + EM(netfs_folio_trace_invalidate_tail, "inval-tail") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ EM(netfs_folio_trace_kill_g, "kill-g") \ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 573f2df3a2c9..704a10de6670 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -71,6 +71,7 @@ EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ EM(rxkad_abort_resp_version, "rxkad-resp-version") \ /* RxGK security errors */ \ + EM(rxgk_abort_1_short_header, "rxgk1-short-hdr") \ EM(rxgk_abort_1_verify_mic_eproto, "rxgk1-vfy-mic-eproto") \ EM(rxgk_abort_2_decrypt_eproto, "rxgk2-dec-eproto") \ EM(rxgk_abort_2_short_data, "rxgk2-short-data") \ diff --git a/io_uring/net.c b/io_uring/net.c index ad08f693bccb..7595850c2217 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -4,6 +4,7 @@ #include <linux/file.h> #include <linux/slab.h> #include <linux/net.h> +#include <linux/un.h> #include <linux/compat.h> #include <net/compat.h> #include <linux/io_uring.h> @@ -1837,11 +1838,29 @@ out: return IOU_COMPLETE; } +/* + * Check if bind request would potentially end up with filename_create(), + * which in turn end up in mnt_want_write() which will grab the fs + * percpu start write sem. This can trigger a lockdep warning. + */ +static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len) +{ + const struct sockaddr_un *sun; + + if (io->addr.ss_family != AF_UNIX) + return 0; + if (addr_len <= offsetof(struct sockaddr_un, sun_path)) + return 0; + sun = (const struct sockaddr_un *) &io->addr; + return sun->sun_path[0] != '\0'; +} + int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); struct sockaddr __user *uaddr; struct io_async_msghdr *io; + int ret; if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; @@ -1852,7 +1871,12 @@ int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) io = io_msg_alloc_async(req); if (unlikely(!io)) return -ENOMEM; - return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); + ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); + if (unlikely(ret)) + return ret; + if (io_bind_file_create(io, bind->addr_len)) + req->flags |= REQ_F_FORCE_ASYNC; + return 0; } int io_bind(struct io_kiocb *req, unsigned int issue_flags) diff --git a/io_uring/nop.c b/io_uring/nop.c index 3caf07878f8a..f5c9969e7f64 100644 --- a/io_uring/nop.c +++ b/io_uring/nop.c @@ -79,9 +79,9 @@ done: if (ret < 0) req_set_fail(req); if (nop->flags & IORING_NOP_CQE32) - io_req_set_res32(req, nop->result, 0, nop->extra1, nop->extra2); + io_req_set_res32(req, ret, 0, nop->extra1, nop->extra2); else - io_req_set_res(req, nop->result, 0); + io_req_set_res(req, ret, 0); if (nop->flags & IORING_NOP_TW) { req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 53532ae6256c..921b4de3a31c 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -258,6 +258,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) iw->upid = READ_ONCE(sqe->fd); iw->options = READ_ONCE(sqe->file_index); iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + memset(&iw->info, 0, sizeof(iw->info)); return 0; } diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 08b0c264bd26..5057cf44342b 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3163,16 +3163,13 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); if (unlikely(cpu >= nr_cpu_ids)) { - reset_migrate_dl_data(cs); ret = -EINVAL; goto out_unlock; } ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); - if (ret) { - reset_migrate_dl_data(cs); + if (ret) goto out_unlock; - } } out_success: @@ -3181,7 +3178,10 @@ out_success: * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; + out_unlock: + if (ret) + reset_migrate_dl_data(cs); mutex_unlock(&cpuset_mutex); return ret; } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 150e5871e66f..de816a43db9f 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include "cgroup-internal.h" +#include <linux/cpumask.h> #include <linux/sched/cputime.h> #include <linux/bpf.h> @@ -53,7 +54,7 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu) } /** - * css_rstat_updated - keep track of updated rstat_cpu + * __css_rstat_updated - keep track of updated rstat_cpu * @css: target cgroup subsystem state * @cpu: cpu on which rstat_cpu was updated * @@ -63,31 +64,27 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu) * * NOTE: if the user needs the guarantee that the updater either add itself in * the lockless list or the concurrent flusher flushes its updated stats, a - * memory barrier is needed before the call to css_rstat_updated() i.e. a + * memory barrier is needed before the call to __css_rstat_updated() i.e. a * barrier after updating the per-cpu stats and before calling - * css_rstat_updated(). + * __css_rstat_updated(). */ -__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) +void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu) { struct llist_head *lhead; struct css_rstat_cpu *rstatc; struct llist_node *self; - /* - * Since bpf programs can call this function, prevent access to - * uninitialized rstat pointers. - */ + /* Prevent access to uninitialized rstat pointers. */ if (!css_uses_rstat(css)) return; lockdep_assert_preemption_disabled(); /* - * For archs withnot nmi safe cmpxchg or percpu ops support, ignore - * the requests from nmi context. + * The lockless insertion below relies on NMI-safe cmpxchg; + * bail out in NMI on archs that don't provide it. */ - if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) || - !IS_ENABLED(CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS)) && in_nmi()) + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && in_nmi()) return; rstatc = css_rstat_cpu(css, cpu); @@ -125,6 +122,18 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) llist_add(&rstatc->lnode, lhead); } +/* + * BPF-facing wrapper for __css_rstat_updated(). Validate the caller-provided + * CPU before passing it to the internal rstat updater. + */ +__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) +{ + if (unlikely(cpu < 0 || cpu >= nr_cpu_ids || !cpu_possible(cpu))) + return; + + __css_rstat_updated(css, cpu); +} + static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu) { /* put @css and all ancestors on the corresponding updated lists */ @@ -170,7 +179,7 @@ static void css_process_update_tree(struct cgroup_subsys *ss, int cpu) * flusher flush the stats updated by the updater who have * observed that they are already on the list. The * corresponding barrier pair for this one should be before - * css_rstat_updated() by the user. + * __css_rstat_updated() by the user. * * For now, there aren't any such user, so not adding the * barrier here but if such a use-case arise, please add @@ -614,7 +623,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, unsigned long flags) { u64_stats_update_end_irqrestore(&rstatbc->bsync, flags); - css_rstat_updated(&cgrp->self, smp_processor_id()); + __css_rstat_updated(&cgrp->self, smp_processor_id()); put_cpu_ptr(rstatbc); } diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 21db33118591..fa4aac333917 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1250,7 +1250,14 @@ void debug_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, entry->direction = direction; entry->map_err_type = MAP_ERR_NOT_CHECKED; - if (!(attrs & DMA_ATTR_MMIO)) { + if (attrs & DMA_ATTR_MMIO) { + unsigned long pfn = PHYS_PFN(phys); + + if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) + err_printk(dev, entry, + "dma_map_resource called for RAM address %pa\n", + &phys); + } else { check_for_stack(dev, phys); if (!PhysHighMem(phys)) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index fe7472f13b10..35e4556b9556 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -366,10 +366,6 @@ EXPORT_SYMBOL(dma_unmap_sg_attrs); dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (IS_ENABLED(CONFIG_DMA_API_DEBUG) && - WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) - return DMA_MAPPING_ERROR; - return dma_map_phys(dev, phys_addr, size, dir, attrs | DMA_ATTR_MMIO); } EXPORT_SYMBOL(dma_map_resource); diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 73f7e1fd4ab4..bf411656c316 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -292,6 +292,12 @@ void irq_work_sync(struct irq_work *work) !arch_irq_work_has_interrupt()) { rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), TASK_UNINTERRUPTIBLE); + /* + * Ensure irq_work_single() does not access @work + * after removing IRQ_WORK_BUSY. It is always + * accessed within a RCU-read section. + */ + synchronize_rcu(); return; } @@ -302,6 +308,7 @@ EXPORT_SYMBOL_GPL(irq_work_sync); static void run_irq_workd(unsigned int cpu) { + guard(rcu)(); irq_work_run_list(this_cpu_ptr(&lazy_list)); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0d93f60fed20..46fc94f2338e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7332,7 +7332,7 @@ void rt_mutex_post_schedule(void) */ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) { - int prio, oldprio, queued, running, queue_flag = + int prio, oldprio, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; const struct sched_class *prev_class, *next_class; struct rq_flags rf; @@ -7397,52 +7397,42 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) if (prev_class != next_class && p->se.sched_delayed) dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); - queued = task_on_rq_queued(p); - running = task_current_donor(rq, p); - if (queued) - dequeue_task(rq, p, queue_flag); - if (running) - put_prev_task(rq, p); - - /* - * Boosting condition are: - * 1. -rt task is running and holds mutex A - * --> -dl task blocks on mutex A - * - * 2. -dl task is running and holds mutex A - * --> -dl task blocks on mutex A and could preempt the - * running task - */ - if (dl_prio(prio)) { - if (!dl_prio(p->normal_prio) || - (pi_task && dl_prio(pi_task->prio) && - dl_entity_preempt(&pi_task->dl, &p->dl))) { - p->dl.pi_se = pi_task->dl.pi_se; - queue_flag |= ENQUEUE_REPLENISH; + scoped_guard (sched_change, p, queue_flag) { + /* + * Boosting condition are: + * 1. -rt task is running and holds mutex A + * --> -dl task blocks on mutex A + * + * 2. -dl task is running and holds mutex A + * --> -dl task blocks on mutex A and could preempt the + * running task + */ + if (dl_prio(prio)) { + if (!dl_prio(p->normal_prio) || + (pi_task && dl_prio(pi_task->prio) && + dl_entity_preempt(&pi_task->dl, &p->dl))) { + p->dl.pi_se = pi_task->dl.pi_se; + scope->flags |= ENQUEUE_REPLENISH; + } else { + p->dl.pi_se = &p->dl; + } + } else if (rt_prio(prio)) { + if (dl_prio(oldprio)) + p->dl.pi_se = &p->dl; + if (oldprio < prio) + scope->flags |= ENQUEUE_HEAD; } else { - p->dl.pi_se = &p->dl; + if (dl_prio(oldprio)) + p->dl.pi_se = &p->dl; + if (rt_prio(oldprio)) + p->rt.timeout = 0; } - } else if (rt_prio(prio)) { - if (dl_prio(oldprio)) - p->dl.pi_se = &p->dl; - if (oldprio < prio) - queue_flag |= ENQUEUE_HEAD; - } else { - if (dl_prio(oldprio)) - p->dl.pi_se = &p->dl; - if (rt_prio(oldprio)) - p->rt.timeout = 0; - } - p->sched_class = next_class; - p->prio = prio; + p->sched_class = next_class; + p->prio = prio; - check_class_changing(rq, p, prev_class); - - if (queued) - enqueue_task(rq, p, queue_flag); - if (running) - set_next_task(rq, p); + check_class_changing(rq, p, prev_class); + } check_class_changed(rq, p, prev_class, oldprio); out_unlock: @@ -8090,26 +8080,9 @@ int migrate_task_to(struct task_struct *p, int target_cpu) */ void sched_setnuma(struct task_struct *p, int nid) { - bool queued, running; - struct rq_flags rf; - struct rq *rq; - - rq = task_rq_lock(p, &rf); - queued = task_on_rq_queued(p); - running = task_current_donor(rq, p); - - if (queued) - dequeue_task(rq, p, DEQUEUE_SAVE); - if (running) - put_prev_task(rq, p); - - p->numa_preferred_nid = nid; - - if (queued) - enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); - if (running) - set_next_task(rq, p); - task_rq_unlock(rq, p, &rf); + guard(task_rq_lock)(p); + scoped_guard (sched_change, p, DEQUEUE_SAVE) + p->numa_preferred_nid = nid; } #endif /* CONFIG_NUMA_BALANCING */ @@ -9215,8 +9188,9 @@ static void sched_change_group(struct task_struct *tsk) */ void sched_move_task(struct task_struct *tsk, bool for_autogroup) { - int queued, running, queue_flags = + unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; + bool resched = false; struct rq *rq; CLASS(task_rq_lock, rq_guard)(tsk); @@ -9224,29 +9198,16 @@ void sched_move_task(struct task_struct *tsk, bool for_autogroup) update_rq_clock(rq); - running = task_current_donor(rq, tsk); - queued = task_on_rq_queued(tsk); - - if (queued) - dequeue_task(rq, tsk, queue_flags); - if (running) - put_prev_task(rq, tsk); - - sched_change_group(tsk); - if (!for_autogroup) - scx_cgroup_move_task(tsk); + scoped_guard (sched_change, tsk, queue_flags) { + sched_change_group(tsk); + if (!for_autogroup) + scx_cgroup_move_task(tsk); + if (scope->running) + resched = true; + } - if (queued) - enqueue_task(rq, tsk, queue_flags); - if (running) { - set_next_task(rq, tsk); - /* - * After changing group, the running task may have joined a - * throttled one but it's still the running task. Trigger a - * resched to make sure that task can still run. - */ + if (resched) resched_curr(rq); - } } static struct cgroup_subsys_state * @@ -10902,37 +10863,39 @@ void sched_mm_cid_fork(struct task_struct *t) } #endif /* CONFIG_SCHED_MM_CID */ -#ifdef CONFIG_SCHED_CLASS_EXT -void sched_deq_and_put_task(struct task_struct *p, int queue_flags, - struct sched_enq_and_set_ctx *ctx) +static DEFINE_PER_CPU(struct sched_change_ctx, sched_change_ctx); + +struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int flags) { + struct sched_change_ctx *ctx = this_cpu_ptr(&sched_change_ctx); struct rq *rq = task_rq(p); lockdep_assert_rq_held(rq); - *ctx = (struct sched_enq_and_set_ctx){ + *ctx = (struct sched_change_ctx){ .p = p, - .queue_flags = queue_flags, + .flags = flags, .queued = task_on_rq_queued(p), - .running = task_current(rq, p), + .running = task_current_donor(rq, p), }; - update_rq_clock(rq); if (ctx->queued) - dequeue_task(rq, p, queue_flags | DEQUEUE_NOCLOCK); + dequeue_task(rq, p, flags); if (ctx->running) put_prev_task(rq, p); + + return ctx; } -void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx) +void sched_change_end(struct sched_change_ctx *ctx) { - struct rq *rq = task_rq(ctx->p); + struct task_struct *p = ctx->p; + struct rq *rq = task_rq(p); lockdep_assert_rq_held(rq); if (ctx->queued) - enqueue_task(rq, ctx->p, ctx->queue_flags | ENQUEUE_NOCLOCK); + enqueue_task(rq, p, ctx->flags | ENQUEUE_NOCLOCK); if (ctx->running) - set_next_task(rq, ctx->p); + set_next_task(rq, p); } -#endif /* CONFIG_SCHED_CLASS_EXT */ diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 35c0b31924d3..7b750bf42698 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2800,7 +2800,8 @@ static void scx_set_task_state(struct task_struct *p, enum scx_task_state state) warn = prev_state != SCX_TASK_READY; break; default: - warn = true; + WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]", + prev_state, state, p->comm, p->pid); return; } @@ -3866,11 +3867,10 @@ static void scx_bypass(bool bypass) */ list_for_each_entry_safe_reverse(p, n, &rq->scx.runnable_list, scx.runnable_node) { - struct sched_enq_and_set_ctx ctx; - /* cycling deq/enq is enough, see the function comment */ - sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); - sched_enq_and_set_task(&ctx); + scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) { + /* nothing */ ; + } } /* resched to restore ticks and idle state */ @@ -4021,17 +4021,16 @@ static void scx_disable_workfn(struct kthread_work *work) while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; const struct sched_class *new_class = scx_setscheduler_class(p); - struct sched_enq_and_set_ctx ctx; - - if (old_class != new_class && p->se.sched_delayed) - dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); - sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); + update_rq_clock(task_rq(p)); - p->sched_class = new_class; - check_class_changing(task_rq(p), p, old_class); + if (old_class != new_class && p->se.sched_delayed) + dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); - sched_enq_and_set_task(&ctx); + scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK) { + p->sched_class = new_class; + check_class_changing(task_rq(p), p, old_class); + } check_class_changed(task_rq(p), p, old_class, p->prio); scx_exit_task(p); @@ -4813,10 +4812,10 @@ static void scx_enable_workfn(struct kthread_work *work) ret = scx_init_task(p, task_group(p), false); if (ret) { - put_task_struct(p); scx_task_iter_stop(&sti); scx_error(sch, "ops.init_task() failed (%d) for %s[%d]", ret, p->comm, p->pid); + put_task_struct(p); goto err_disable_unlock_all; } @@ -4845,21 +4844,20 @@ static void scx_enable_workfn(struct kthread_work *work) while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; const struct sched_class *new_class = scx_setscheduler_class(p); - struct sched_enq_and_set_ctx ctx; if (!tryget_task_struct(p)) continue; - if (old_class != new_class && p->se.sched_delayed) - dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); - - sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); + update_rq_clock(task_rq(p)); - p->scx.slice = SCX_SLICE_DFL; - p->sched_class = new_class; - check_class_changing(task_rq(p), p, old_class); + if (old_class != new_class && p->se.sched_delayed) + dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); - sched_enq_and_set_task(&ctx); + scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK) { + p->scx.slice = SCX_SLICE_DFL; + p->sched_class = new_class; + check_class_changing(task_rq(p), p, old_class); + } check_class_changed(task_rq(p), p, old_class, p->prio); put_task_struct(p); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f750dea7b787..668841022dbf 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3891,23 +3891,38 @@ extern void check_class_changed(struct rq *rq, struct task_struct *p, extern struct balance_callback *splice_balance_callbacks(struct rq *rq); extern void balance_callbacks(struct rq *rq, struct balance_callback *head); -#ifdef CONFIG_SCHED_CLASS_EXT /* - * Used by SCX in the enable/disable paths to move tasks between sched_classes - * and establish invariants. + * The 'sched_change' pattern is the safe, easy and slow way of changing a + * task's scheduling properties. It dequeues a task, such that the scheduler + * is fully unaware of it; at which point its properties can be modified; + * after which it is enqueued again. + * + * Typically this must be called while holding task_rq_lock, since most/all + * properties are serialized under those locks. There is currently one + * exception to this rule in sched/ext which only holds rq->lock. + */ + +/* + * This structure is a temporary, used to preserve/convey the queueing state + * of the task between sched_change_begin() and sched_change_end(). Ensuring + * the task's queueing state is idempotent across the operation. */ -struct sched_enq_and_set_ctx { +struct sched_change_ctx { struct task_struct *p; - int queue_flags; + int flags; bool queued; bool running; }; -void sched_deq_and_put_task(struct task_struct *p, int queue_flags, - struct sched_enq_and_set_ctx *ctx); -void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx); +struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int flags); +void sched_change_end(struct sched_change_ctx *ctx); -#endif /* CONFIG_SCHED_CLASS_EXT */ +DEFINE_CLASS(sched_change, struct sched_change_ctx *, + sched_change_end(_T), + sched_change_begin(p, flags), + struct task_struct *p, unsigned int flags) + +DEFINE_CLASS_IS_UNCONDITIONAL(sched_change) #include "ext.h" diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 6805a63d47af..77b663a5dfb2 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -64,7 +64,6 @@ static int effective_prio(struct task_struct *p) void set_user_nice(struct task_struct *p, long nice) { - bool queued, running; struct rq *rq; int old_prio; @@ -90,22 +89,12 @@ void set_user_nice(struct task_struct *p, long nice) return; } - queued = task_on_rq_queued(p); - running = task_current_donor(rq, p); - if (queued) - dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); - if (running) - put_prev_task(rq, p); - - p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p, true); - old_prio = p->prio; - p->prio = effective_prio(p); - - if (queued) - enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); - if (running) - set_next_task(rq, p); + scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) { + p->static_prio = NICE_TO_PRIO(nice); + set_load_weight(p, true); + old_prio = p->prio; + p->prio = effective_prio(p); + } /* * If the task increased its priority or is running and @@ -333,6 +322,35 @@ static bool check_same_owner(struct task_struct *p) uid_eq(cred->euid, pcred->uid)); } +#ifdef CONFIG_RT_MUTEXES +static inline void __setscheduler_dl_pi(int newprio, int policy, + struct task_struct *p, + struct sched_change_ctx *scope) +{ + /* + * In case a DEADLINE task (either proper or boosted) gets + * setscheduled to a lower priority class, check if it neeeds to + * inherit parameters from a potential pi_task. In that case make + * sure replenishment happens with the next enqueue. + */ + + if (dl_prio(newprio) && !dl_policy(policy)) { + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + if (pi_task) { + p->dl.pi_se = pi_task->dl.pi_se; + scope->flags |= ENQUEUE_REPLENISH; + } + } +} +#else /* !CONFIG_RT_MUTEXES */ +static inline void __setscheduler_dl_pi(int newprio, int policy, + struct task_struct *p, + struct sched_change_ctx *scope) +{ +} +#endif /* !CONFIG_RT_MUTEXES */ + #ifdef CONFIG_UCLAMP_TASK static int uclamp_validate(struct task_struct *p, @@ -515,7 +533,7 @@ int __sched_setscheduler(struct task_struct *p, bool user, bool pi) { int oldpolicy = -1, policy = attr->sched_policy; - int retval, oldprio, newprio, queued, running; + int retval, oldprio, newprio; const struct sched_class *prev_class, *next_class; struct balance_callback *head; struct rq_flags rf; @@ -698,33 +716,26 @@ change: if (prev_class != next_class && p->se.sched_delayed) dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); - queued = task_on_rq_queued(p); - running = task_current_donor(rq, p); - if (queued) - dequeue_task(rq, p, queue_flags); - if (running) - put_prev_task(rq, p); - - if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { - __setscheduler_params(p, attr); - p->sched_class = next_class; - p->prio = newprio; - } - __setscheduler_uclamp(p, attr); - check_class_changing(rq, p, prev_class); + scoped_guard (sched_change, p, queue_flags) { - if (queued) { - /* - * We enqueue to tail when the priority of a task is - * increased (user space view). - */ - if (oldprio < p->prio) - queue_flags |= ENQUEUE_HEAD; + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { + __setscheduler_params(p, attr); + p->sched_class = next_class; + p->prio = newprio; + __setscheduler_dl_pi(newprio, policy, p, scope); + } + __setscheduler_uclamp(p, attr); + check_class_changing(rq, p, prev_class); - enqueue_task(rq, p, queue_flags); + if (scope->queued) { + /* + * We enqueue to tail when the priority of a task is + * increased (user space view). + */ + if (oldprio < p->prio) + scope->flags |= ENQUEUE_HEAD; + } } - if (running) - set_next_task(rq, p); check_class_changed(rq, p, prev_class, oldprio); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 70f1292b7ddb..88f470b31375 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2371,7 +2371,8 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link) struct bpf_kprobe_multi_link *kmulti_link; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); - unregister_fprobe(&kmulti_link->fp); + /* Don't wait for RCU GP here. */ + unregister_fprobe_async(&kmulti_link->fp); kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); } diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 8fa5bff2c26f..b9346f4efa6d 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -45,6 +45,7 @@ static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE]; static struct rhltable fprobe_ip_table; static DEFINE_MUTEX(fprobe_mutex); +static struct fgraph_ops fprobe_graph_ops; static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed) { @@ -91,11 +92,8 @@ static int insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp) return ret; } -/* Return true if there are synonims */ -static bool delete_fprobe_node(struct fprobe_hlist_node *node) +static void delete_fprobe_node(struct fprobe_hlist_node *node) { - bool ret; - lockdep_assert_held(&fprobe_mutex); /* Avoid double deleting and non-inserted nodes */ @@ -104,13 +102,6 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node) rhltable_remove(&fprobe_ip_table, &node->hlist, fprobe_rht_params); } - - rcu_read_lock(); - ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr, - fprobe_rht_params); - rcu_read_unlock(); - - return ret; } /* Check existence of the fprobe */ @@ -259,7 +250,7 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent return ret; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) || defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS) /* ftrace_ops callback, this processes fprobes which have only entry_handler. */ static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) @@ -300,7 +291,7 @@ NOKPROBE_SYMBOL(fprobe_ftrace_entry); static struct ftrace_ops fprobe_ftrace_ops = { .func = fprobe_ftrace_entry, - .flags = FTRACE_OPS_FL_SAVE_REGS, + .flags = FTRACE_OPS_FL_SAVE_ARGS, }; static int fprobe_ftrace_active; @@ -341,6 +332,40 @@ static bool fprobe_is_ftrace(struct fprobe *fp) { return !fp->exit_handler; } + +static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace) +{ + struct rhlist_head *head, *pos; + struct fprobe_hlist_node *node; + struct fprobe *fp; + + guard(rcu)(); + head = rhltable_lookup(&fprobe_ip_table, &ip, + fprobe_rht_params); + if (!head) + return false; + /* We have to check the same type on the list. */ + rhl_for_each_entry_rcu(node, pos, head, hlist) { + if (node->addr != ip) + break; + fp = READ_ONCE(node->fp); + if (likely(fp)) { + if ((!ftrace && fp->exit_handler) || + (ftrace && !fp->exit_handler)) + return true; + } + } + + return false; +} + +#ifdef CONFIG_MODULES +static void fprobe_remove_ips(unsigned long *ips, unsigned int cnt) +{ + ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, 1, 0); + ftrace_set_filter_ips(&fprobe_ftrace_ops, ips, cnt, 1, 0); +} +#endif #else static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) { @@ -355,7 +380,37 @@ static bool fprobe_is_ftrace(struct fprobe *fp) { return false; } + +static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace __maybe_unused) +{ + struct rhlist_head *head, *pos; + struct fprobe_hlist_node *node; + struct fprobe *fp; + + guard(rcu)(); + head = rhltable_lookup(&fprobe_ip_table, &ip, + fprobe_rht_params); + if (!head) + return false; + /* We only need to check fp is there. */ + rhl_for_each_entry_rcu(node, pos, head, hlist) { + if (node->addr != ip) + break; + fp = READ_ONCE(node->fp); + if (likely(fp)) + return true; + } + + return false; +} + +#ifdef CONFIG_MODULES +static void fprobe_remove_ips(unsigned long *ips, unsigned int cnt) +{ + ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, 1, 0); +} #endif +#endif /* !CONFIG_DYNAMIC_FTRACE_WITH_ARGS && !CONFIG_DYNAMIC_FTRACE_WITH_REGS */ /* fgraph_ops callback, this processes fprobes which have exit_handler. */ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, @@ -525,7 +580,7 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) #ifdef CONFIG_MODULES -#define FPROBE_IPS_BATCH_INIT 8 +#define FPROBE_IPS_BATCH_INIT 128 /* instruction pointer address list */ struct fprobe_addr_list { int index; @@ -533,46 +588,29 @@ struct fprobe_addr_list { unsigned long *addrs; }; -static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr) -{ - unsigned long *addrs; - - /* Previously we failed to expand the list. */ - if (alist->index == alist->size) - return -ENOSPC; - - alist->addrs[alist->index++] = addr; - if (alist->index < alist->size) - return 0; - - /* Expand the address list */ - addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL); - if (!addrs) - return -ENOMEM; - - memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs)); - alist->size *= 2; - kfree(alist->addrs); - alist->addrs = addrs; - - return 0; -} - -static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node, +static int fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node, struct fprobe_addr_list *alist) { - int ret = 0; + lockdep_assert_in_rcu_read_lock(); if (!within_module(node->addr, mod)) - return; - if (delete_fprobe_node(node)) - return; + return 0; + + delete_fprobe_node(node); + /* If no address list is available, we can't track this address. */ + if (!alist->addrs) + return 0; /* - * If failed to update alist, just continue to update hlist. - * Therefore, at list user handler will not hit anymore. + * Don't care the type here, because all fprobes on the same + * address must be removed eventually. */ - if (!ret) - ret = fprobe_addr_list_add(alist, node->addr); + if (!rhltable_lookup(&fprobe_ip_table, &node->addr, fprobe_rht_params)) { + alist->addrs[alist->index++] = node->addr; + if (alist->index == alist->size) + return -ENOSPC; + } + + return 0; } /* Handle module unloading to manage fprobe_ip_table. */ @@ -583,35 +621,50 @@ static int fprobe_module_callback(struct notifier_block *nb, struct fprobe_hlist_node *node; struct rhashtable_iter iter; struct module *mod = data; + bool retry; if (val != MODULE_STATE_GOING) return NOTIFY_DONE; alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL); - /* If failed to alloc memory, we can not remove ips from hash. */ - if (!alist.addrs) - return NOTIFY_DONE; + /* + * If failed to alloc memory, ftrace_ops will not be able to remove ips from + * hash, but we can still remove nodes from fprobe_ip_table, so we can avoid + * the potential wrong callback. So just print a warning here and try to + * continue without address list. + */ + WARN_ONCE(!alist.addrs, + "Failed to allocate memory for fprobe_addr_list, ftrace_ops will not be updated"); mutex_lock(&fprobe_mutex); +again: + retry = false; + alist.index = 0; rhltable_walk_enter(&fprobe_ip_table, &iter); do { rhashtable_walk_start(&iter); while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node)) - fprobe_remove_node_in_module(mod, node, &alist); + if (fprobe_remove_node_in_module(mod, node, &alist) < 0) { + retry = true; + break; + } rhashtable_walk_stop(&iter); - } while (node == ERR_PTR(-EAGAIN)); + } while (node == ERR_PTR(-EAGAIN) && !retry); rhashtable_walk_exit(&iter); - + /* Remove any ips from hash table(s) */ if (alist.index > 0) { - ftrace_set_filter_ips(&fprobe_graph_ops.ops, - alist.addrs, alist.index, 1, 0); -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - ftrace_set_filter_ips(&fprobe_ftrace_ops, - alist.addrs, alist.index, 1, 0); -#endif + fprobe_remove_ips(alist.addrs, alist.index); + /* + * If we break rhashtable walk loop except for -EAGAIN, we need + * to restart looping from start for safety. Anyway, this is + * not a hotpath. + */ + if (retry) + goto again; } + mutex_unlock(&fprobe_mutex); kfree(alist.addrs); @@ -927,7 +980,9 @@ static int unregister_fprobe_nolock(struct fprobe *fp) /* Remove non-synonim ips from table and hash */ count = 0; for (i = 0; i < hlist_array->size; i++) { - if (!delete_fprobe_node(&hlist_array->array[i]) && addrs) + delete_fprobe_node(&hlist_array->array[i]); + if (addrs && !fprobe_exists_on_hash(hlist_array->array[i].addr, + fprobe_is_ftrace(fp))) addrs[count++] = hlist_array->array[i].addr; } del_fprobe_hash(fp); @@ -945,14 +1000,15 @@ static int unregister_fprobe_nolock(struct fprobe *fp) } /** - * unregister_fprobe() - Unregister fprobe. + * unregister_fprobe_async() - Unregister fprobe without RCU GP wait * @fp: A fprobe data structure to be unregistered. * * Unregister fprobe (and remove ftrace hooks from the function entries). + * This function will NOT wait until the fprobe is no longer used. * * Return 0 if @fp is unregistered successfully, -errno if not. */ -int unregister_fprobe(struct fprobe *fp) +int unregister_fprobe_async(struct fprobe *fp) { guard(mutex)(&fprobe_mutex); if (!fp || !fprobe_registered(fp)) @@ -960,6 +1016,24 @@ int unregister_fprobe(struct fprobe *fp) return unregister_fprobe_nolock(fp); } + +/** + * unregister_fprobe() - Unregister fprobe with RCU GP wait + * @fp: A fprobe data structure to be unregistered. + * + * Unregister fprobe (and remove ftrace hooks from the function entries). + * This function will block until the fprobe is no longer used. + * + * Return 0 if @fp is unregistered successfully, -errno if not. + */ +int unregister_fprobe(struct fprobe *fp) +{ + int ret = unregister_fprobe_async(fp); + + if (!ret) + synchronize_rcu(); + return ret; +} EXPORT_SYMBOL_GPL(unregister_fprobe); static int __init fprobe_initcall(void) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a31fb4b7a52e..bef1c05b9b71 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5,6 +5,7 @@ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ #include <linux/trace_recursion.h> +#include <linux/panic_notifier.h> #include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> @@ -29,6 +30,7 @@ #include <linux/oom.h> #include <linux/mm.h> +#include <asm/ring_buffer.h> #include <asm/local64.h> #include <asm/local.h> #include <asm/setup.h> @@ -553,6 +555,7 @@ struct trace_buffer { unsigned long range_addr_start; unsigned long range_addr_end; + struct notifier_block flush_nb; struct ring_buffer_meta *meta; @@ -2455,6 +2458,16 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) kfree(cpu_buffer); } +/* Stop recording on a persistent buffer and flush cache if needed. */ +static int rb_flush_buffer_cb(struct notifier_block *nb, unsigned long event, void *data) +{ + struct trace_buffer *buffer = container_of(nb, struct trace_buffer, flush_nb); + + ring_buffer_record_off(buffer); + arch_ring_buffer_flush_range(buffer->range_addr_start, buffer->range_addr_end); + return NOTIFY_DONE; +} + static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long end, @@ -2574,6 +2587,12 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, mutex_init(&buffer->mutex); + /* Persistent ring buffer needs to flush cache before reboot. */ + if (start && end) { + buffer->flush_nb.notifier_call = rb_flush_buffer_cb; + atomic_notifier_chain_register(&panic_notifier_list, &buffer->flush_nb); + } + return_ptr(buffer); fail_free_buffers: @@ -2661,6 +2680,9 @@ ring_buffer_free(struct trace_buffer *buffer) { int cpu; + if (buffer->range_addr_start && buffer->range_addr_end) + atomic_notifier_chain_unregister(&panic_notifier_list, &buffer->flush_nb); + cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); irq_work_sync(&buffer->irq_work.work); @@ -5249,6 +5271,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->head_page = cpu_buffer->reader_page; iter->head = cpu_buffer->reader_page->read; iter->next_event = iter->head; + iter->missed_events = 0; iter->cache_reader_page = iter->head_page; iter->cache_read = cpu_buffer->read; @@ -5863,10 +5886,7 @@ ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, */ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter) { - bool ret = iter->missed_events != 0; - - iter->missed_events = 0; - return ret; + return iter->missed_events != 0; } EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped); @@ -6028,7 +6048,7 @@ void ring_buffer_iter_advance(struct ring_buffer_iter *iter) unsigned long flags; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - + iter->missed_events = 0; rb_advance_iter(iter); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index cb9e06713868..0089c257b465 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1360,10 +1360,8 @@ static const char *hist_field_name(struct hist_field *field, len = snprintf(full_name, sizeof(full_name), "%s.%s.%s", field->system, field->event_name, field->name); - if (len >= sizeof(full_name)) - return NULL; - - field_name = full_name; + if (len < sizeof(full_name)) + field_name = full_name; } else field_name = field->name; } else if (field->flags & HIST_FIELD_FL_TIMESTAMP) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 7f8da4dab69d..ba52813ce51b 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -386,13 +386,11 @@ static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) } } -static void tracing_map_elt_free(struct tracing_map_elt *elt) +static void __tracing_map_elt_free(struct tracing_map_elt *elt) { if (!elt) return; - if (elt->map->ops && elt->map->ops->elt_free) - elt->map->ops->elt_free(elt); kfree(elt->fields); kfree(elt->vars); kfree(elt->var_set); @@ -400,6 +398,17 @@ static void tracing_map_elt_free(struct tracing_map_elt *elt) kfree(elt); } +static void tracing_map_elt_free(struct tracing_map_elt *elt) +{ + if (!elt) + return; + + /* Only objects initialized with alloc_elt() should be passed to free_elt().*/ + if (elt->map->ops && elt->map->ops->elt_free) + elt->map->ops->elt_free(elt); + __tracing_map_elt_free(elt); +} + static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) { struct tracing_map_elt *elt; @@ -444,7 +453,7 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) } return elt; free: - tracing_map_elt_free(elt); + __tracing_map_elt_free(elt); return ERR_PTR(err); } diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 7a6af361d2fc..889380c2702c 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -16,8 +16,9 @@ menuconfig KUNIT if KUNIT config KUNIT_DEBUGFS - bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" if !KUNIT_ALL_TESTS - default KUNIT_ALL_TESTS + bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" + depends on DEBUG_FS + default y help Enable debugfs representation for kunit. Currently this consists of /sys/kernel/debug/kunit/<test_suite>/results files for each diff --git a/lib/tests/test_kprobes.c b/lib/tests/test_kprobes.c index b7582010125c..06e729e4de05 100644 --- a/lib/tests/test_kprobes.c +++ b/lib/tests/test_kprobes.c @@ -12,6 +12,12 @@ #define div_factor 3 +#define KP_CLEAR(_kp) \ +do { \ + (_kp).addr = NULL; \ + (_kp).flags = 0; \ +} while (0) + static u32 rand1, preh_val, posth_val; static u32 (*target)(u32 value); static u32 (*recursed_target)(u32 value); @@ -125,10 +131,6 @@ static void test_kprobes(struct kunit *test) current_test = test; - /* addr and flags should be cleard for reusing kprobe. */ - kp.addr = NULL; - kp.flags = 0; - KUNIT_EXPECT_EQ(test, 0, register_kprobes(kps, 2)); preh_val = 0; posth_val = 0; @@ -226,9 +228,6 @@ static void test_kretprobes(struct kunit *test) struct kretprobe *rps[2] = {&rp, &rp2}; current_test = test; - /* addr and flags should be cleard for reusing kprobe. */ - rp.kp.addr = NULL; - rp.kp.flags = 0; KUNIT_EXPECT_EQ(test, 0, register_kretprobes(rps, 2)); krph_val = 0; @@ -290,8 +289,6 @@ static void test_stacktrace_on_kretprobe(struct kunit *test) unsigned long myretaddr = (unsigned long)__builtin_return_address(0); current_test = test; - rp3.kp.addr = NULL; - rp3.kp.flags = 0; /* * Run the stacktrace_driver() to record correct return address in @@ -352,8 +349,6 @@ static void test_stacktrace_on_nested_kretprobe(struct kunit *test) struct kretprobe *rps[2] = {&rp3, &rp4}; current_test = test; - rp3.kp.addr = NULL; - rp3.kp.flags = 0; //KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver()); @@ -367,6 +362,18 @@ static void test_stacktrace_on_nested_kretprobe(struct kunit *test) static int kprobes_test_init(struct kunit *test) { + KP_CLEAR(kp); + KP_CLEAR(kp2); + KP_CLEAR(kp_missed); +#ifdef CONFIG_KRETPROBES + KP_CLEAR(rp.kp); + KP_CLEAR(rp2.kp); +#ifdef CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE + KP_CLEAR(rp3.kp); + KP_CLEAR(rp4.kp); +#endif +#endif + target = kprobe_target; target2 = kprobe_target2; recursed_target = kprobe_recursed_target; diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index ca82dc78f0b9..a7be6ea812e4 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2444,6 +2444,7 @@ static int damon_sysfs_memcg_path_to_id(char *memcg_path, unsigned short *id) if (damon_sysfs_memcg_path_eq(memcg, path, memcg_path)) { *id = mem_cgroup_id(memcg); found = true; + mem_cgroup_iter_break(NULL, memcg); break; } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 61cf6af26f3c..4df68e5468ad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -571,7 +571,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, if (!val) return; - css_rstat_updated(&memcg->css, cpu); + __css_rstat_updated(&memcg->css, cpu); statc_pcpu = memcg->vmstats_percpu; for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) { statc = this_cpu_ptr(statc_pcpu); @@ -2525,7 +2525,7 @@ static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id]; /* preemption is disabled in_nmi(). */ - css_rstat_updated(&memcg->css, smp_processor_id()); + __css_rstat_updated(&memcg->css, smp_processor_id()); if (idx == NR_SLAB_RECLAIMABLE_B) atomic_add(nr, &pn->slab_reclaimable); else @@ -2749,7 +2749,7 @@ static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) mod_memcg_state(memcg, MEMCG_KMEM, val); } else { /* preemption is disabled in_nmi(). */ - css_rstat_updated(&memcg->css, smp_processor_id()); + __css_rstat_updated(&memcg->css, smp_processor_id()); atomic_add(val, &memcg->kmem_stat); } } diff --git a/mm/memory.c b/mm/memory.c index 94bf107a47ca..42b733b78523 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -612,6 +612,21 @@ static void print_bad_page_map(struct vm_area_struct *vma, dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } + +static inline bool pgtable_level_has_pxx_special(enum pgtable_level level) +{ + switch (level) { + case PGTABLE_LEVEL_PTE: + return IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL); + case PGTABLE_LEVEL_PMD: + return IS_ENABLED(CONFIG_ARCH_SUPPORTS_PMD_PFNMAP); + case PGTABLE_LEVEL_PUD: + return IS_ENABLED(CONFIG_ARCH_SUPPORTS_PUD_PFNMAP); + default: + return false; + } +} + #define print_bad_pte(vma, addr, pte, page) \ print_bad_page_map(vma, addr, pte_val(pte), page, PGTABLE_LEVEL_PTE) @@ -684,7 +699,7 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, bool special, unsigned long long entry, enum pgtable_level level) { - if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { + if (pgtable_level_has_pxx_special(level)) { if (unlikely(special)) { #ifdef CONFIG_FIND_NORMAL_PAGE if (vma->vm_ops && vma->vm_ops->find_normal_page) @@ -699,8 +714,9 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma, return NULL; } /* - * With CONFIG_ARCH_HAS_PTE_SPECIAL, any special page table - * mappings (incl. shared zero folios) are marked accordingly. + * With working pte_special()/pmd_special()..., any special page + * table mappings (incl. shared zero folios) are marked + * accordingly. */ } else { if (unlikely(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) { @@ -1735,7 +1751,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, * consider uffd-wp bit when zap. For more information, * see zap_install_uffd_wp_if_needed(). */ - WARN_ON_ONCE(!vma_is_anonymous(vma)); + WARN_ON_ONCE(!folio_test_anon(folio)); rss[mm_counter(folio)]--; folio_remove_rmap_pte(folio, page, vma); folio_put(folio); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index aa1f74414307..6a7714179c20 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1439,6 +1439,8 @@ static void remove_memory_blocks_and_altmaps(u64 start, u64 size) altmap = mem->altmap; mem->altmap = NULL; + /* drop the ref. we got via find_memory_block() */ + put_device(&mem->dev); remove_memory_block_devices(cur_start, memblock_size); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f67696618003..775e02b797a3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1847,9 +1847,9 @@ static inline bool should_skip_init(gfp_t flags) inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { + const bool zero_tags = gfp_flags & __GFP_ZEROTAGS; bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); - bool zero_tags = init && (gfp_flags & __GFP_ZEROTAGS); int i; set_page_private(page, 0); @@ -1871,11 +1871,11 @@ inline void post_alloc_hook(struct page *page, unsigned int order, */ /* - * If memory tags should be zeroed - * (which happens only when memory should be initialized as well). + * Clearing tags can efficiently clear the memory for us as well, if + * required. */ if (zero_tags) - init = !tag_clear_highpages(page, 1 << order); + init = tag_clear_highpages(page, 1 << order, /* clear_pages= */init); if (!should_skip_kasan_unpoison(gfp_flags) && kasan_unpoison_pages(page, order, init)) { diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 74ef7dc2b2f9..b8b1b997960a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -224,6 +224,8 @@ static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = NULL; mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + + cancel_delayed_work_sync(&hard_iface->bat_iv.reschedule_work); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) @@ -536,8 +538,10 @@ out: * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm + * + * Return: whether forward packet was scheduled */ -static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, +static bool batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, struct batadv_hard_iface *if_incoming, @@ -561,13 +565,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!skb) - return; + return false; forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, queue_left, bat_priv, skb); if (!forw_packet_aggr) { kfree_skb(skb); - return; + return false; } forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; @@ -590,6 +594,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, batadv_iv_send_outstanding_bat_ogm_packet); batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time); + + return true; } /* aggregate a new packet into the existing ogm packet */ @@ -617,8 +623,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm * @send_time: timestamp (jiffies) when the packet is to be sent + * + * Return: whether forward packet was scheduled */ -static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, +static bool batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, unsigned char *packet_buff, int packet_len, struct batadv_hard_iface *if_incoming, @@ -670,14 +678,16 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, if (!own_packet && atomic_read(&bat_priv->aggregated_ogms)) send_time += max_aggregation_jiffies; - batadv_iv_ogm_aggregate_new(packet_buff, packet_len, - send_time, direct_link, - if_incoming, if_outgoing, - own_packet); + return batadv_iv_ogm_aggregate_new(packet_buff, packet_len, + send_time, direct_link, + if_incoming, if_outgoing, + own_packet); } else { batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, direct_link); spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + return true; } } @@ -790,6 +800,9 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) u32 seqno; u16 tvlv_len = 0; unsigned long send_time; + bool reschedule = false; + bool scheduled; + int ret; lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); @@ -813,9 +826,15 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); - tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, - ogm_buff_len, - BATADV_OGM_HLEN); + ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM_HLEN); + if (ret < 0) { + reschedule = true; + goto out; + } + + tvlv_len = ret; } batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); @@ -834,8 +853,11 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) /* OGMs from secondary interfaces are only scheduled on their * respective interfaces. */ - batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, - hard_iface, hard_iface, 1, send_time); + scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, + hard_iface, hard_iface, 1, send_time); + if (!scheduled) + reschedule = true; + goto out; } @@ -847,15 +869,28 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) continue; - batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, - *ogm_buff_len, hard_iface, - tmp_hard_iface, 1, send_time); - + scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, + *ogm_buff_len, hard_iface, + tmp_hard_iface, 1, send_time); batadv_hardif_put(tmp_hard_iface); + + if (!scheduled && tmp_hard_iface == hard_iface) + reschedule = true; } rcu_read_unlock(); out: + if (reschedule) { + /* there was a failure scheduling the own forward packet. + * as result, the batadv_iv_send_outstanding_bat_ogm_packet() + * work item is no longer scheduled. it is therefore necessary + * to reschedule it manually + */ + queue_delayed_work(batadv_event_workqueue, + &hard_iface->bat_iv.reschedule_work, + msecs_to_jiffies(atomic_read(&bat_priv->orig_interval))); + } + batadv_hardif_put(primary_if); } @@ -870,6 +905,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } +static void batadv_iv_ogm_reschedule(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct batadv_hard_iface *hard_iface; + + hard_iface = container_of(delayed_work, + struct batadv_hard_iface, + bat_iv.reschedule_work); + batadv_iv_ogm_schedule(hard_iface); +} + /** * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface * @orig_node: originator which reproadcasted the OGMs directly @@ -2262,6 +2308,8 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { + INIT_DELAYED_WORK(&hard_iface->bat_iv.reschedule_work, batadv_iv_ogm_reschedule); + /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); } diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e3870492dab7..d66ca77b1aaa 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -113,14 +113,14 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) /** * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface + * @bat_priv: the bat priv with all the mesh interface information * @skb: the OGM to send * @hard_iface: the interface to use to send the OGM */ -static void batadv_v_ogm_send_to_if(struct sk_buff *skb, +static void batadv_v_ogm_send_to_if(struct batadv_priv *bat_priv, + struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); - if (hard_iface->if_status != BATADV_IF_ACTIVE) { kfree_skb(skb); return; @@ -187,6 +187,7 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) /** * batadv_v_ogm_aggr_send() - flush & send aggregation queue + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: the interface with the aggregation queue to flush * * Aggregates all OGMv2 packets currently in the aggregation queue into a @@ -196,7 +197,8 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. */ -static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) +static void batadv_v_ogm_aggr_send(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) { unsigned int aggr_len = hard_iface->bat_v.aggr_len; struct sk_buff *skb_aggr; @@ -226,27 +228,32 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) consume_skb(skb); } - batadv_v_ogm_send_to_if(skb_aggr, hard_iface); + batadv_v_ogm_send_to_if(bat_priv, skb_aggr, hard_iface); } /** * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface + * @bat_priv: the bat priv with all the mesh interface information * @skb: the OGM to queue * @hard_iface: the interface to queue the OGM on */ -static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, +static void batadv_v_ogm_queue_on_if(struct batadv_priv *bat_priv, + struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); + if (hard_iface->mesh_iface != bat_priv->mesh_iface) { + kfree_skb(skb); + return; + } if (!atomic_read(&bat_priv->aggregated_ogms)) { - batadv_v_ogm_send_to_if(skb, hard_iface); + batadv_v_ogm_send_to_if(bat_priv, skb, hard_iface); return; } spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); if (!batadv_v_ogm_queue_left(skb, hard_iface)) - batadv_v_ogm_aggr_send(hard_iface); + batadv_v_ogm_aggr_send(bat_priv, hard_iface); hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); __skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); @@ -262,10 +269,10 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; - unsigned char *ogm_buff; + unsigned char **ogm_buff; struct list_head *iter; - int ogm_buff_len; - u16 tvlv_len = 0; + int *ogm_buff_len; + u16 tvlv_len; int ret; lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); @@ -273,25 +280,27 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; - ogm_buff = bat_priv->bat_v.ogm_buff; - ogm_buff_len = bat_priv->bat_v.ogm_buff_len; + ogm_buff = &bat_priv->bat_v.ogm_buff; + ogm_buff_len = &bat_priv->bat_v.ogm_buff_len; + /* tt changes have to be committed before the tvlv data is * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); - tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff, - &ogm_buff_len, - BATADV_OGM2_HLEN); + ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM2_HLEN); + if (ret < 0) + goto reschedule; - bat_priv->bat_v.ogm_buff = ogm_buff; - bat_priv->bat_v.ogm_buff_len = ogm_buff_len; + tvlv_len = ret; - skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len); + skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + *ogm_buff_len); if (!skb) goto reschedule; skb_reserve(skb, ETH_HLEN); - skb_put_data(skb, ogm_buff, ogm_buff_len); + skb_put_data(skb, *ogm_buff, *ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); @@ -343,7 +352,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) break; } - batadv_v_ogm_queue_on_if(skb_tmp, hard_iface); + batadv_v_ogm_queue_on_if(bat_priv, skb_tmp, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -383,12 +392,14 @@ void batadv_v_ogm_aggr_work(struct work_struct *work) { struct batadv_hard_iface_bat_v *batv; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work); hard_iface = container_of(batv, struct batadv_hard_iface, bat_v); + bat_priv = netdev_priv(hard_iface->mesh_iface); spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); - batadv_v_ogm_aggr_send(hard_iface); + batadv_v_ogm_aggr_send(bat_priv, hard_iface); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); batadv_v_ogm_start_queue_timer(hard_iface); @@ -578,7 +589,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), ogm_forward->ttl, if_incoming->net_dev->name); - batadv_v_ogm_queue_on_if(skb, if_outgoing); + batadv_v_ogm_queue_on_if(bat_priv, skb, if_outgoing); out: batadv_orig_ifinfo_put(orig_ifinfo); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 2d7971424aa0..3072f94275ac 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -357,12 +357,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; - mesh_iface = primary_if->mesh_iface; + mesh_iface = READ_ONCE(primary_if->mesh_iface); + if (!mesh_iface) + goto out; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, /* IP DST: 0.0.0.0 */ zeroip, - primary_if->mesh_iface, + mesh_iface, /* IP SRC: 0.0.0.0 */ zeroip, /* Ethernet DST: Broadcast */ @@ -515,8 +517,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, entry->crc = BATADV_BLA_CRC_INIT; entry->bat_priv = bat_priv; spin_lock_init(&entry->crc_lock); - atomic_set(&entry->request_sent, 0); - atomic_set(&entry->wait_periods, 0); + entry->state = BATADV_BLA_BACKBONE_GW_SYNCED; + entry->wait_periods = 0; ether_addr_copy(entry->orig, orig); INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report); kref_init(&entry->refcount); @@ -545,9 +547,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, batadv_bla_send_announce(bat_priv, entry); /* this will be decreased in the worker thread */ - atomic_inc(&entry->request_sent); - atomic_set(&entry->wait_periods, BATADV_BLA_WAIT_PERIODS); - atomic_inc(&bat_priv->bla.num_requests); + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (entry->state == BATADV_BLA_BACKBONE_GW_SYNCED) { + entry->state = BATADV_BLA_BACKBONE_GW_UNSYNCED; + entry->wait_periods = BATADV_BLA_WAIT_PERIODS; + atomic_inc(&bat_priv->bla.num_requests); + } + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } return entry; @@ -650,10 +656,12 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) backbone_gw->vid, BATADV_CLAIM_TYPE_REQUEST); /* no local broadcasts should be sent or received, for now. */ - if (!atomic_read(&backbone_gw->request_sent)) { + spin_lock_bh(&backbone_gw->bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_SYNCED) { + backbone_gw->state = BATADV_BLA_BACKBONE_GW_UNSYNCED; atomic_inc(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 1); } + spin_unlock_bh(&backbone_gw->bat_priv->bla.num_requests_lock); } /** @@ -874,10 +882,12 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, /* if we have sent a request and the crc was OK, * we can allow traffic again. */ - if (atomic_read(&backbone_gw->request_sent)) { + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) { + backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED; atomic_dec(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 0); } + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } batadv_backbone_gw_put(backbone_gw); @@ -1225,6 +1235,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) struct hlist_head *head; struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ + bool purged; int i; hash = bat_priv->bla.backbone_hash; @@ -1235,30 +1246,49 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) head = &hash->table[i]; list_lock = &hash->list_locks[i]; - spin_lock_bh(list_lock); - hlist_for_each_entry_safe(backbone_gw, node_tmp, - head, hash_entry) { - if (now) - goto purge_now; - if (!batadv_has_timed_out(backbone_gw->lasttime, - BATADV_BLA_BACKBONE_TIMEOUT)) - continue; + do { + purged = false; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(backbone_gw, node_tmp, + head, hash_entry) { + if (now) + goto purge_now; + if (!batadv_has_timed_out(backbone_gw->lasttime, + BATADV_BLA_BACKBONE_TIMEOUT)) + continue; - batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "%s(): backbone gw %pM timed out\n", - __func__, backbone_gw->orig); + batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, + "%s(): backbone gw %pM timed out\n", + __func__, backbone_gw->orig); purge_now: - /* don't wait for the pending request anymore */ - if (atomic_read(&backbone_gw->request_sent)) - atomic_dec(&bat_priv->bla.num_requests); + purged = true; - batadv_bla_del_backbone_claims(backbone_gw); + /* don't wait for the pending request anymore */ + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) + atomic_dec(&bat_priv->bla.num_requests); - hlist_del_rcu(&backbone_gw->hash_entry); - batadv_backbone_gw_put(backbone_gw); - } - spin_unlock_bh(list_lock); + backbone_gw->state = BATADV_BLA_BACKBONE_GW_STOPPED; + spin_unlock_bh(&bat_priv->bla.num_requests_lock); + + batadv_bla_del_backbone_claims(backbone_gw); + + hlist_del_rcu(&backbone_gw->hash_entry); + break; + } + spin_unlock_bh(list_lock); + + if (purged) { + /* reference for pending report_work */ + if (cancel_work_sync(&backbone_gw->report_work)) + batadv_backbone_gw_put(backbone_gw); + + /* reference for hash_entry */ + batadv_backbone_gw_put(backbone_gw); + } + } while (purged); } } @@ -1493,7 +1523,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) batadv_bla_send_loopdetect(bat_priv, backbone_gw); - /* request_sent is only set after creation to avoid + /* state is only set to unsynced after creation to avoid * problems when we are not yet known as backbone gw * in the backbone. * @@ -1502,14 +1532,21 @@ static void batadv_bla_periodic_work(struct work_struct *work) * some grace time. */ - if (atomic_read(&backbone_gw->request_sent) == 0) - continue; + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state != BATADV_BLA_BACKBONE_GW_UNSYNCED) + goto unlock_next; - if (!atomic_dec_and_test(&backbone_gw->wait_periods)) - continue; + if (backbone_gw->wait_periods > 0) + backbone_gw->wait_periods--; + if (backbone_gw->wait_periods > 0) + goto unlock_next; + + backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED; atomic_dec(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 0); + +unlock_next: + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } rcu_read_unlock(); } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8b8132eb0a79..031c295fff1b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -697,6 +697,9 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv, goto free_orig; tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!tmp_skb) + goto free_neigh; + if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index cc14bc41381e..31395281692c 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -17,6 +17,7 @@ #include <linux/lockdep.h> #include <linux/minmax.h> #include <linux/netdevice.h> +#include <linux/overflow.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -80,9 +81,9 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, * * Return: the maximum size of payload that can be fragmented. */ -static int batadv_frag_size_limit(void) +static size_t batadv_frag_size_limit(void) { - int limit = BATADV_FRAG_MAX_FRAG_SIZE; + size_t limit = BATADV_FRAG_MAX_FRAG_SIZE; limit -= sizeof(struct batadv_frag_packet); limit *= BATADV_FRAG_MAX_FRAGMENTS; @@ -143,7 +144,9 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, struct batadv_frag_packet *frag_packet; u8 bucket; u16 seqno, hdr_size = sizeof(struct batadv_frag_packet); + bool overflow = false; bool ret = false; + size_t data_len; /* Linearize packet to avoid linearizing 16 packets in a row when doing * the later merge. Non-linear merge should be added to remove this @@ -153,6 +156,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, goto err; frag_packet = (struct batadv_frag_packet *)skb->data; + data_len = skb->len - hdr_size; seqno = ntohs(frag_packet->seqno); bucket = seqno % BATADV_FRAG_BUFFER_COUNT; @@ -171,7 +175,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, spin_lock_bh(&chain->lock); if (batadv_frag_init_chain(chain, seqno)) { hlist_add_head(&frag_entry_new->list, &chain->fragment_list); - chain->size = skb->len - hdr_size; + chain->size = data_len; chain->timestamp = jiffies; chain->total_size = ntohs(frag_packet->total_size); ret = true; @@ -188,7 +192,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, if (frag_entry_curr->no < frag_entry_new->no) { hlist_add_before(&frag_entry_new->list, &frag_entry_curr->list); - chain->size += skb->len - hdr_size; + + if (check_add_overflow(chain->size, data_len, + &chain->size)) + overflow = true; + chain->timestamp = jiffies; ret = true; goto out; @@ -201,13 +209,16 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, /* Reached the end of the list, so insert after 'frag_entry_last'. */ if (likely(frag_entry_last)) { hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list); - chain->size += skb->len - hdr_size; + + if (check_add_overflow(chain->size, data_len, &chain->size)) + overflow = true; + chain->timestamp = jiffies; ret = true; } out: - if (chain->size > batadv_frag_size_limit() || + if (overflow || chain->size > batadv_frag_size_limit() || chain->total_size != ntohs(frag_packet->total_size) || chain->total_size > batadv_frag_size_limit()) { /* Clear chain if total size of either the list or the packet @@ -294,6 +305,31 @@ free: } /** + * batadv_skb_is_frag() - check if newly merged skb is gain a unicast packet + * @skb: newly merged skb + * + * Return: if newly skb is of type BATADV_UNICAST_FRAG + */ +static bool batadv_skb_is_frag(struct sk_buff *skb) +{ + struct batadv_ogm_packet *batadv_ogm_packet; + + /* packet should hold at least type and version */ + if (unlikely(!pskb_may_pull(skb, 2))) + return false; + + batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; + + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) + return false; + + if (batadv_ogm_packet->packet_type != BATADV_UNICAST_FRAG) + return false; + + return true; +} + +/** * batadv_frag_skb_buffer() - buffer fragment for later merge * @skb: skb to buffer * @orig_node_src: originator that the skb is received from @@ -326,6 +362,16 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb, if (!skb_out) goto out_err; + /* fragment in fragment is not allowed. otherwise it is possible + * to exhaust the stack when receiving a matryoshka-style + * "fragments in a fragment packet" + */ + if (batadv_skb_is_frag(skb_out)) { + kfree_skb(skb_out); + skb_out = NULL; + goto out_err; + } + out: ret = true; out_err: diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 7a11b245e9f4..ff341c270d91 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -478,10 +478,14 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, */ void batadv_gw_node_free(struct batadv_priv *bat_priv) { + struct batadv_gw_node *curr_gw; struct batadv_gw_node *gw_node; struct hlist_node *node_tmp; spin_lock_bh(&bat_priv->gw.list_lock); + curr_gw = rcu_replace_pointer(bat_priv->gw.curr_gw, NULL, true); + batadv_gw_node_put(curr_gw); + hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.gateway_list, list) { hlist_del_init_rcu(&gw_node->list); diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c index df7e95811ef5..dcdc82f6af8c 100644 --- a/net/batman-adv/mesh-interface.c +++ b/net/batman-adv/mesh-interface.c @@ -787,6 +787,7 @@ static int batadv_meshif_init_late(struct net_device *dev) atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); + spin_lock_init(&bat_priv->bla.num_requests_lock); #endif atomic_set(&bat_priv->tp_num, 0); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a662408ad867..ae195053e060 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -835,8 +835,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) orig_node = container_of(rcu, struct batadv_orig_node, rcu); - batadv_mcast_purge_orig(orig_node); - batadv_frag_purge_orig(orig_node, NULL); kfree(orig_node->tt_buff); @@ -887,6 +885,8 @@ void batadv_orig_node_release(struct kref *ref) } spin_unlock_bh(&orig_node->vlan_list_lock); + batadv_mcast_purge_orig(orig_node); + call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 76ece4014384..b1629e0ac826 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -8,6 +8,7 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bug.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> @@ -254,6 +255,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * batadv_tp_list_find() - find a tp_vars object in the global list * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for + * @role: role of the session * * Look for a tp_vars object matching dst as end_point and return it after * having increment the refcounter. Return NULL is not found @@ -261,7 +263,8 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * Return: matching tp_vars or NULL when no tp_vars with @dst was found */ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, - const u8 *dst) + const u8 *dst, + enum batadv_tp_meter_role role) { struct batadv_tp_vars *pos, *tp_vars = NULL; @@ -270,6 +273,9 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, if (!batadv_compare_eth(pos->other_end, dst)) continue; + if (pos->role != role) + continue; + /* most of the time this function is invoked during the normal * process..it makes sens to pay more when the session is * finished and to speed the process up during the measurement @@ -286,11 +292,32 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, } /** + * batadv_tp_list_active() - check if session from/to destination is ongoing + * @bat_priv: the bat priv with all the mesh interface information + * @dst: the other endpoint MAC address to look for + * + * Return: if matching session with @dst was found + */ +static bool batadv_tp_list_active(struct batadv_priv *bat_priv, const u8 *dst) + __must_hold(&bat_priv->tp_list_lock) +{ + struct batadv_tp_vars *tp_vars; + + hlist_for_each_entry_rcu(tp_vars, &bat_priv->tp_list, list) { + if (batadv_compare_eth(tp_vars->other_end, dst)) + return true; + } + + return false; +} + +/** * batadv_tp_list_find_session() - find tp_vars session object in the global * list * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * @session: session identifier + * @role: role of the session * * Look for a tp_vars object matching dst as end_point, session as tp meter * session and return it after having increment the refcounter. Return NULL @@ -300,7 +327,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, */ static struct batadv_tp_vars * batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, - const u8 *session) + const u8 *session, enum batadv_tp_meter_role role) { struct batadv_tp_vars *pos, *tp_vars = NULL; @@ -312,6 +339,9 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, if (memcmp(pos->session, session, sizeof(pos->session)) != 0) continue; + if (pos->role != role) + continue; + /* most of the time this function is invoked during the normal * process..it makes sense to pay more when the session is * finished and to speed the process up during the measurement @@ -400,13 +430,7 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) batadv_tp_list_detach(tp_vars); /* kill the timer and remove its reference */ - timer_delete_sync(&tp_vars->timer); - /* the worker might have rearmed itself therefore we kill it again. Note - * that if the worker should run again before invoking the following - * timer_delete(), it would not re-arm itself once again because the status - * is OFF now - */ - timer_delete(&tp_vars->timer); + timer_shutdown_sync(&tp_vars->timer); batadv_tp_vars_put(tp_vars); } @@ -418,11 +442,14 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) static void batadv_tp_sender_end(struct batadv_priv *bat_priv, struct batadv_tp_vars *tp_vars) { + enum batadv_tp_meter_reason reason; u32 session_cookie; + reason = atomic_read(&tp_vars->send_result); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Test towards %pM finished..shutting down (reason=%d)\n", - tp_vars->other_end, tp_vars->reason); + tp_vars->other_end, reason); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n", @@ -435,7 +462,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, session_cookie = batadv_tp_session_cookie(tp_vars->session, tp_vars->icmp_uid); - batadv_tp_batctl_notify(tp_vars->reason, + batadv_tp_batctl_notify(reason, tp_vars->other_end, bat_priv, tp_vars->start_time, @@ -451,10 +478,18 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, enum batadv_tp_meter_reason reason) { - if (!atomic_dec_and_test(&tp_vars->sending)) - return; + atomic_cmpxchg(&tp_vars->send_result, 0, reason); +} - tp_vars->reason = reason; +/** + * batadv_tp_sender_stopped() - check if tp session was stopped with reason + * @tp_vars: the private data of the current TP meter session + * + * Return: whether stop reason was found + */ +static bool batadv_tp_sender_stopped(struct batadv_tp_vars *tp_vars) +{ + return atomic_read(&tp_vars->send_result) != 0; } /** @@ -484,7 +519,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) /* most of the time this function is invoked while normal packet * reception... */ - if (unlikely(atomic_read(&tp_vars->sending) == 0)) + if (unlikely(batadv_tp_sender_stopped(tp_vars))) /* timer ref will be dropped in batadv_tp_sender_cleanup */ return; @@ -504,7 +539,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t) struct batadv_tp_vars *tp_vars = timer_container_of(tp_vars, t, timer); struct batadv_priv *bat_priv = tp_vars->bat_priv; - if (atomic_read(&tp_vars->sending) == 0) + if (batadv_tp_sender_stopped(tp_vars)) return; /* if the user waited long enough...shutdown the test */ @@ -659,11 +694,11 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, /* find the tp_vars */ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_SENDER); if (unlikely(!tp_vars)) return; - if (unlikely(atomic_read(&tp_vars->sending) == 0)) + if (unlikely(batadv_tp_sender_stopped(tp_vars))) goto out; /* old ACK? silently drop it.. */ @@ -829,21 +864,21 @@ static int batadv_tp_send(void *arg) if (unlikely(tp_vars->role != BATADV_TP_SENDER)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end); if (unlikely(!orig_node)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } primary_if = batadv_primary_if_get_selected(bat_priv); if (unlikely(!primary_if)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } @@ -862,7 +897,7 @@ static int batadv_tp_send(void *arg) queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work, msecs_to_jiffies(tp_vars->test_length)); - while (atomic_read(&tp_vars->sending) != 0) { + while (!batadv_tp_sender_stopped(tp_vars)) { if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) { batadv_tp_wait_available(tp_vars, payload_len); continue; @@ -885,8 +920,7 @@ static int batadv_tp_send(void *arg) "Meter: %s() cannot send packets (%d)\n", __func__, err); /* ensure nobody else tries to stop the thread now */ - if (atomic_dec_and_test(&tp_vars->sending)) - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); break; } @@ -972,10 +1006,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, return; } - tp_vars = batadv_tp_list_find(bat_priv, dst); - if (tp_vars) { + if (batadv_tp_list_active(bat_priv, dst)) { spin_unlock_bh(&bat_priv->tp_list_lock); - batadv_tp_vars_put(tp_vars); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: test to or from the same node already ongoing, aborting\n"); batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING, @@ -1008,7 +1040,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, ether_addr_copy(tp_vars->other_end, dst); kref_init(&tp_vars->refcount); tp_vars->role = BATADV_TP_SENDER; - atomic_set(&tp_vars->sending, 1); + atomic_set(&tp_vars->send_result, 0); memcpy(tp_vars->session, session_id, sizeof(session_id)); tp_vars->icmp_uid = icmp_uid; @@ -1096,16 +1128,16 @@ void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, if (!orig_node) return; - tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig); + tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig, BATADV_TP_SENDER); if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: trying to interrupt an already over connection\n"); - goto out; + goto out_put_orig_node; } batadv_tp_sender_shutdown(tp_vars, return_value); batadv_tp_vars_put(tp_vars); -out: +out_put_orig_node: batadv_orig_node_put(orig_node); } @@ -1156,6 +1188,9 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) spin_unlock_bh(&tp_vars->unacked_lock); /* drop reference of timer */ + if (WARN_ON(atomic_xchg(&tp_vars->receiving, 0) != 1)) + return; + batadv_tp_vars_put(tp_vars); } @@ -1356,7 +1391,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, goto out_unlock; tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_RECEIVER); if (tp_vars) goto out_unlock; @@ -1374,6 +1409,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, ether_addr_copy(tp_vars->other_end, icmp->orig); tp_vars->role = BATADV_TP_RECEIVER; + atomic_set(&tp_vars->receiving, 1); memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session)); tp_vars->last_recv = BATADV_TP_FIRST_SEQ; tp_vars->bat_priv = bat_priv; @@ -1426,7 +1462,7 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, } } else { tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_RECEIVER); if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Unexpected packet from %pM!\n", @@ -1435,13 +1471,6 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, } } - if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) { - batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: dropping packet: not expected (role=%u)\n", - tp_vars->role); - goto out; - } - tp_vars->last_recv_time = jiffies; /* if the packet is a duplicate, it may be the case that an ACK has been @@ -1546,8 +1575,12 @@ void batadv_tp_stop_all(struct batadv_priv *bat_priv) break; case BATADV_TP_RECEIVER: batadv_tp_list_detach(tp_var); - if (timer_shutdown_sync(&tp_var->timer)) - batadv_tp_vars_put(tp_var); + timer_shutdown_sync(&tp_var->timer); + + if (atomic_xchg(&tp_var->receiving, 0) != 1) + break; + + batadv_tp_vars_put(tp_var); break; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 05cddcf994f6..9f6e67771ffa 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -797,24 +797,33 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, s32 *tt_len) { u16 num_vlan = 0; - u16 num_entries = 0; u16 tvlv_len = 0; unsigned int change_offset; struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_orig_node_vlan *vlan; + u16 total_entries = 0; u8 *tt_change_ptr; + int vlan_entries; + u16 sum_entries; spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + + if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) { + *tt_len = 0; + goto out; + } + + total_entries = sum_entries; num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); } change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); if (change_offset > U16_MAX || *tt_len > U16_MAX - change_offset) { *tt_len = 0; @@ -835,14 +844,26 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; + num_vlan = 0; hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); tt_vlan->reserved = 0; tt_vlan++; + num_vlan++; } + /* recalculate in case number of VLANs reduced */ + change_offset = struct_size(*tt_data, vlan_data, num_vlan); + tvlv_len = *tt_len + change_offset; + + (*tt_data)->num_vlan = htons(num_vlan); + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; @@ -877,21 +898,25 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_meshif_vlan *vlan; + size_t change_offset; u16 num_vlan = 0; - u16 vlan_entries = 0; u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; - int change_offset; + int vlan_entries; + u16 sum_entries; spin_lock_bh(&bat_priv->meshif_vlan_list_lock); hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); - if (vlan_entries < 1) - continue; + if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) { + tvlv_len = 0; + goto out; + } + + total_entries = sum_entries; num_vlan++; - total_entries += vlan_entries; } change_offset = struct_size(*tt_data, vlan_data, num_vlan); @@ -900,8 +925,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, if (*tt_len < 0) *tt_len = batadv_tt_len(total_entries); - tvlv_len = *tt_len; - tvlv_len += change_offset; + if (check_add_overflow(*tt_len, change_offset, &tvlv_len)) { + tvlv_len = 0; + goto out; + } *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); if (!*tt_data) { @@ -914,6 +941,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; + num_vlan = 0; hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) @@ -924,8 +952,15 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan->reserved = 0; tt_vlan++; + num_vlan++; } + /* recalculate in case number of VLANs reduced */ + change_offset = struct_size(*tt_data, vlan_data, num_vlan); + tvlv_len = *tt_len + change_offset; + + (*tt_data)->num_vlan = htons(num_vlan); + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 76dff1f9c559..cde798c82dcf 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -8,10 +8,12 @@ #include <linux/byteorder/generic.h> #include <linux/container_of.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/kref.h> +#include <linux/limits.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -159,10 +161,10 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) * * Return: size of all currently registered tvlv containers in bytes. */ -static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +static size_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) { struct batadv_tvlv_container *tvlv; - u16 tvlv_len = 0; + size_t tvlv_len = 0; lockdep_assert_held(&bat_priv->tvlv.container_list_lock); @@ -306,26 +308,35 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, * The ogm packet might be enlarged or shrunk depending on the current size * and the size of the to-be-appended tvlv containers. * - * Return: size of all appended tvlv containers in bytes. + * Return: size of all appended tvlv containers in bytes (max U16_MAX), negative + * if operation failed */ -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, +int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, int *packet_buff_len, int packet_min_len) { struct batadv_tvlv_container *tvlv; struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_len; + size_t tvlv_value_len; void *tvlv_value; + int tvlv_len_ret; bool ret; spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); + if (tvlv_value_len > U16_MAX) { + tvlv_len_ret = -E2BIG; + goto end; + } ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, packet_min_len, tvlv_value_len); - - if (!ret) + if (!ret) { + tvlv_len_ret = -ENOMEM; goto end; + } + + tvlv_len_ret = tvlv_value_len; if (!tvlv_value_len) goto end; @@ -344,7 +355,8 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, end: spin_unlock_bh(&bat_priv->tvlv.container_list_lock); - return tvlv_value_len; + + return tvlv_len_ret; } /** diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index e5697230d991..f96f6b3f44a0 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -16,7 +16,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len); -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, +int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, int *packet_buff_len, int packet_min_len); void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 41c1d19f786b..c9bd49d23547 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -83,6 +83,9 @@ struct batadv_hard_iface_bat_iv { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + /** @reschedule_work: recover OGM schedule after schedule error */ + struct delayed_work reschedule_work; + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct mutex ogm_buff_mutex; }; @@ -301,7 +304,7 @@ struct batadv_frag_table_entry { u16 seqno; /** @size: accumulated size of packets in list */ - u16 size; + size_t size; /** @total_size: expected size of the assembled packet */ u16 total_size; @@ -452,7 +455,7 @@ struct batadv_orig_node { * @tt_buff_len: length of the last tt changeset this node received * from the orig node */ - s16 tt_buff_len; + u16 tt_buff_len; /** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */ spinlock_t tt_buff_lock; @@ -993,7 +996,7 @@ struct batadv_priv_tt { * @last_changeset_len: length of last tt changeset this host has * generated */ - s16 last_changeset_len; + u16 last_changeset_len; /** * @last_changeset_lock: lock protecting last_changeset & @@ -1024,6 +1027,12 @@ struct batadv_priv_bla { atomic_t num_requests; /** + * @num_requests_lock: locks update num_requests + + * batadv_backbone_gw::state + batadv_backbone_gw::wait_periods update + */ + spinlock_t num_requests_lock; + + /** * @claim_hash: hash table containing mesh nodes this host has claimed */ struct batadv_hashtable *claim_hash; @@ -1320,11 +1329,14 @@ struct batadv_tp_vars { /** @role: receiver/sender modi */ enum batadv_tp_meter_role role; - /** @sending: sending binary semaphore: 1 if sending, 0 is not */ - atomic_t sending; + /** + * @send_result: 0 when sending is ongoing and otherwise + * enum batadv_tp_meter_reason + */ + atomic_t send_result; - /** @reason: reason for a stopped session */ - enum batadv_tp_meter_reason reason; + /** @receiving: receiving binary semaphore: 1 if receiving, 0 is not */ + atomic_t receiving; /** @finish_work: work item for the finishing procedure */ struct delayed_work finish_work; @@ -1666,6 +1678,27 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_BLA +enum batadv_bla_backbone_gw_state { + /** + * @BATADV_BLA_BACKBONE_GW_STOPPED: backbone gw is being removed + * and it must not longer work on requests + */ + BATADV_BLA_BACKBONE_GW_STOPPED, + + /** + * @BATADV_BLA_BACKBONE_GW_UNSYNCED: backbone was detected out + * of sync and a request was send. No traffic is forwarded until the + * situation is resolved + */ + BATADV_BLA_BACKBONE_GW_UNSYNCED, + + /** + * @BATADV_BLA_BACKBONE_GW_SYNCED: backbone is consider to be in + * sync. traffic can be forwarded + */ + BATADV_BLA_BACKBONE_GW_SYNCED, +}; + /** * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN */ @@ -1691,16 +1724,12 @@ struct batadv_bla_backbone_gw { /** * @wait_periods: grace time for bridge forward delays and bla group * forming at bootup phase - no bcast traffic is formwared until it has - * elapsed + * elapsed. Must only be access with num_requests_lock. */ - atomic_t wait_periods; + u8 wait_periods; - /** - * @request_sent: if this bool is set to true we are out of sync with - * this backbone gateway - no bcast traffic is formwared until the - * situation was resolved - */ - atomic_t request_sent; + /** @state: sync state. Must only be access with num_requests_lock. */ + enum batadv_bla_backbone_gw_state state; /** @crc: crc16 checksum over all claims */ u16 crc; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 2b94e2077203..70e35e198075 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,6 +154,7 @@ struct sock *bt_sock_alloc(struct net *net, struct socket *sock, sock_init_data(sock, sk); INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + spin_lock_init(&bt_sk(sk)->accept_q_lock); sock_reset_flag(sk, SOCK_ZAPPED); @@ -214,6 +215,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; struct pid *old_pid; + struct bt_sock *par = bt_sk(parent); BT_DBG("parent %p, sk %p", parent, sk); @@ -224,9 +226,13 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) else lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; + spin_lock_bh(&par->accept_q_lock); + list_add_tail(&bt_sk(sk)->accept_q, &par->accept_q); + sk_acceptq_added(parent); + spin_unlock_bh(&par->accept_q_lock); + /* Copy credentials from parent since for incoming connections the * socket is allocated by the kernel. */ @@ -244,8 +250,6 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) bh_unlock_sock(sk); else release_sock(sk); - - sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); @@ -254,45 +258,72 @@ EXPORT_SYMBOL(bt_accept_enqueue); */ void bt_accept_unlink(struct sock *sk) { + struct sock *parent = bt_sk(sk)->parent; + BT_DBG("sk %p state %d", sk, sk->sk_state); + spin_lock_bh(&bt_sk(parent)->accept_q_lock); list_del_init(&bt_sk(sk)->accept_q); - sk_acceptq_removed(bt_sk(sk)->parent); + sk_acceptq_removed(parent); + spin_unlock_bh(&bt_sk(parent)->accept_q_lock); bt_sk(sk)->parent = NULL; sock_put(sk); } EXPORT_SYMBOL(bt_accept_unlink); +static struct sock *bt_accept_get(struct sock *parent, struct sock *sk) +{ + struct bt_sock *bt = bt_sk(parent); + struct sock *next = NULL; + + /* accept_q is modified from child teardown paths too, so take a + * temporary reference before dropping the queue lock. + */ + spin_lock_bh(&bt->accept_q_lock); + + if (sk) { + if (bt_sk(sk)->parent != parent) + goto out; + + if (!list_is_last(&bt_sk(sk)->accept_q, &bt->accept_q)) { + next = &list_next_entry(bt_sk(sk), accept_q)->sk; + sock_hold(next); + } + } else if (!list_empty(&bt->accept_q)) { + next = &list_first_entry(&bt->accept_q, + struct bt_sock, accept_q)->sk; + sock_hold(next); + } + +out: + spin_unlock_bh(&bt->accept_q_lock); + return next; +} + struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { - struct bt_sock *s, *n; - struct sock *sk; + struct sock *sk, *next; BT_DBG("parent %p", parent); restart: - list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)s; - + for (sk = bt_accept_get(parent, NULL); sk; sk = next) { /* Prevent early freeing of sk due to unlink and sock_kill */ - sock_hold(sk); lock_sock(sk); /* Check sk has not already been unlinked via * bt_accept_unlink() due to serialisation caused by sk locking */ - if (!bt_sk(sk)->parent) { + if (bt_sk(sk)->parent != parent) { BT_DBG("sk %p, already unlinked", sk); release_sock(sk); sock_put(sk); - /* Restart the loop as sk is no longer in the list - * and also avoid a potential infinite loop because - * list_for_each_entry_safe() is not thread safe. - */ goto restart; } + next = bt_accept_get(parent, sk); + /* sk is safely in the parent list so reduce reference count */ sock_put(sk); @@ -309,7 +340,19 @@ restart: if (newsock) sock_graft(sk, newsock); + /* Hand the caller a reference taken while sk is + * still locked. bt_accept_unlink() just dropped + * the accept-queue reference; without this hold a + * concurrent teardown (e.g. l2cap_conn_del() -> + * l2cap_sock_kill()) could free sk between + * release_sock() and the caller using it. Every + * caller drops this with sock_put() when done. + */ + sock_hold(sk); + release_sock(sk); + if (next) + sock_put(next); return sk; } @@ -518,18 +561,28 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline __poll_t bt_accept_poll(struct sock *parent) { - struct bt_sock *s, *n; + struct bt_sock *bt = bt_sk(parent); + struct bt_sock *s; struct sock *sk; + __poll_t mask = 0; + + spin_lock_bh(&bt->accept_q_lock); + list_for_each_entry(s, &bt->accept_q, accept_q) { + int state; - list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; - if (sk->sk_state == BT_CONNECTED || - (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && - sk->sk_state == BT_CONNECT2)) - return EPOLLIN | EPOLLRDNORM; + state = READ_ONCE(sk->sk_state); + + if (state == BT_CONNECTED || + (test_bit(BT_SK_DEFER_SETUP, &bt->flags) && + state == BT_CONNECT2)) { + mask = EPOLLIN | EPOLLRDNORM; + break; + } } + spin_unlock_bh(&bt->accept_q_lock); - return 0; + return mask; } __poll_t bt_sock_poll(struct file *file, struct socket *sock, diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d44987d4515c..b3cef7a4db54 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -638,8 +638,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) goto failed; } - up_write(&bnep_session_sem); strcpy(req->device, dev->name); + up_write(&bnep_session_sem); return 0; failed: diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 46ebd69026fe..038e292fb194 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -741,6 +741,8 @@ static void iso_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { iso_sock_close(sk); iso_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } /* If listening socket has a hcon, properly disconnect it */ @@ -1282,8 +1284,13 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, } ch = bt_accept_dequeue(sk, newsock); - if (ch) + if (ch) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps ch alive from here. + */ + sock_put(ch); break; + } if (!timeo) { err = -EAGAIN; @@ -2454,6 +2461,11 @@ int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb, u16 flags) break; case ISO_END: + if (!conn->rx_len) { + BT_ERR("Unexpected end frame (len %d)", skb->len); + goto drop; + } + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), skb->len); conn->rx_len -= skb->len; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index bcb13ce53109..87ebe81277c5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7275,7 +7275,7 @@ static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, - sizeof(pdu), &pdu); + struct_size(pdu, scid, 1), pdu); } int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 15637402a39d..898ee21d7e4f 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -349,8 +349,13 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, } nsk = bt_accept_dequeue(sk, newsock); - if (nsk) + if (nsk) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps nsk alive from here. + */ + sock_put(nsk); break; + } if (!timeo) { err = -EAGAIN; @@ -1457,22 +1462,54 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) BT_DBG("parent %p state %s", parent, state_to_string(parent->sk_state)); - /* Close not yet accepted channels */ + /* Close not yet accepted channels. + * + * bt_accept_dequeue() now returns sk with an extra reference held + * (taken while sk was still locked) so a concurrent l2cap_conn_del() + * -> l2cap_sock_kill() cannot free sk under us. + * + * cleanup_listen() runs under the parent sk lock, so unlike + * l2cap_sock_shutdown() we must NOT take conn->lock here: that would + * establish sk_lock -> conn->lock and invert the established + * conn->lock -> chan->lock -> sk_lock order (lockdep deadlock). + * + * Instead, briefly take the child sk lock to fetch and pin its chan. + * l2cap_conn_del() reaches the chan free only via + * l2cap_chan_del() -> l2cap_sock_teardown_cb(), which itself takes + * the child sk lock; holding it across l2cap_chan_hold_unless_zero() + * therefore guarantees the chan cannot be freed while we read and + * pin it (hold_unless_zero() additionally skips a chan already past + * its last reference). We then drop the sk lock before taking + * chan->lock, so sk and chan locks are never held together. + */ while ((sk = bt_accept_dequeue(parent, NULL))) { - struct l2cap_chan *chan = l2cap_pi(sk)->chan; + struct l2cap_chan *chan; + + lock_sock_nested(sk, L2CAP_NESTING_NORMAL); + chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan); + release_sock(sk); + if (!chan) { + /* l2cap_conn_del() already tearing this child down */ + sock_put(sk); + continue; + } BT_DBG("child chan %p state %s", chan, state_to_string(chan->state)); - l2cap_chan_hold(chan); l2cap_chan_lock(chan); - __clear_chan_timer(chan); l2cap_chan_close(chan, ECONNRESET); - l2cap_sock_kill(sk); - + /* l2cap_conn_del() may already have killed this socket + * (it sets SOCK_DEAD); skip the duplicate to avoid a + * double sock_put()/l2cap_chan_put(). + */ + if (!sock_flag(sk, SOCK_DEAD)) + l2cap_sock_kill(sk); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); + sock_put(sk); } } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9065a864bc65..91d1c0d132f9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9094,9 +9094,15 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, struct adv_info *adv_instance; int err = 0; struct mgmt_pending_cmd *cmd; + u16 expected_len; BT_DBG("%s", hdev->name); + expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len); + if (expected_len != data_len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); adv_instance = hci_find_adv_instance(hdev, cp->instance); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 913402806fa0..3052436e9c6d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -180,6 +180,8 @@ static void rfcomm_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { rfcomm_sock_close(sk); rfcomm_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } parent->sk_state = BT_CLOSED; @@ -496,8 +498,13 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, } nsk = bt_accept_dequeue(sk, newsock); - if (nsk) + if (nsk) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps nsk alive from here. + */ + sock_put(nsk); break; + } if (!timeo) { err = -EAGAIN; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9404fdb10ea6..a536c2edd14f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -498,6 +498,8 @@ static void sco_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { sco_sock_close(sk); sco_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } parent->sk_state = BT_CLOSED; @@ -759,8 +761,13 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, } ch = bt_accept_dequeue(sk, newsock); - if (ch) + if (ch) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps ch alive from here. + */ + sock_put(ch); break; + } if (!timeo) { err = -EAGAIN; diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index ce6f63c77cc0..86f0e75d6e34 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -196,7 +196,7 @@ static const struct nla_policy br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 }, @@ -316,7 +316,7 @@ static const struct nla_policy br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, }; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 5855eb050208..b77b3250afbd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4640,10 +4640,31 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, rcu_read_unlock(); } +static void br_multicast_enable_all_ports(struct net_bridge *br) +{ + struct net_bridge_port *port; + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port_ctx(&port->multicast_ctx); +} + +static void br_multicast_disable_all_ports(struct net_bridge *br) +{ + struct net_bridge_port *port; + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + list_for_each_entry(port, &br->port_list, list) + __br_multicast_disable_port_ctx(&port->multicast_ctx); +} + int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - struct net_bridge_port *port; bool change_snoopers = false; int err = 0; @@ -4660,6 +4681,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { change_snoopers = true; + br_multicast_disable_all_ports(br); goto unlock; } @@ -4667,8 +4689,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, goto unlock; br_multicast_open(br); - list_for_each_entry(port, &br->port_list, list) - __br_multicast_enable_port_ctx(&port->multicast_ctx); + br_multicast_enable_all_ports(br); change_snoopers = true; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 741360219552..f05c79f215ea 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -112,24 +112,22 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret = ebt_register_template(&broute_table, broute_table_init); + int ret = register_pernet_subsys(&broute_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&broute_net_ops); - if (ret) { - ebt_unregister_template(&broute_table); - return ret; - } + ret = ebt_register_template(&broute_table, broute_table_init); + if (ret) + unregister_pernet_subsys(&broute_net_ops); - return 0; + return ret; } static void __exit ebtable_broute_fini(void) { - unregister_pernet_subsys(&broute_net_ops); ebt_unregister_template(&broute_table); + unregister_pernet_subsys(&broute_net_ops); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index dacd81b12e62..0fc03b07e62a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,24 +93,22 @@ static struct pernet_operations frame_filter_net_ops = { static int __init ebtable_filter_init(void) { - int ret = ebt_register_template(&frame_filter, frame_filter_table_init); + int ret = register_pernet_subsys(&frame_filter_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_filter_net_ops); - if (ret) { - ebt_unregister_template(&frame_filter); - return ret; - } + ret = ebt_register_template(&frame_filter, frame_filter_table_init); + if (ret) + unregister_pernet_subsys(&frame_filter_net_ops); - return 0; + return ret; } static void __exit ebtable_filter_fini(void) { - unregister_pernet_subsys(&frame_filter_net_ops); ebt_unregister_template(&frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0f2a8c6118d4..8a10375d8909 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,24 +93,22 @@ static struct pernet_operations frame_nat_net_ops = { static int __init ebtable_nat_init(void) { - int ret = ebt_register_template(&frame_nat, frame_nat_table_init); + int ret = register_pernet_subsys(&frame_nat_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_nat_net_ops); - if (ret) { - ebt_unregister_template(&frame_nat); - return ret; - } + ret = ebt_register_template(&frame_nat, frame_nat_table_init); + if (ret) + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } static void __exit ebtable_nat_fini(void) { - unregister_pernet_subsys(&frame_nat_net_ops); ebt_unregister_template(&frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index a04fc1757528..77df9e856c2e 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -42,6 +42,7 @@ struct ebt_pernet { struct list_head tables; + struct list_head dead_tables; }; struct ebt_template { @@ -1162,11 +1163,6 @@ free_newinfo: static void __ebt_unregister_table(struct net *net, struct ebt_table *table) { - mutex_lock(&ebt_mutex); - list_del(&table->list); - mutex_unlock(&ebt_mutex); - audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, ebt_cleanup_entry, net, NULL); if (table->private->nentries) @@ -1267,13 +1263,15 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, for (i = 0; i < num_ops; i++) ops[i].priv = table; - list_add(&table->list, &ebt_net->tables); - mutex_unlock(&ebt_mutex); - table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); - if (ret) + if (ret) { + synchronize_rcu(); __ebt_unregister_table(net, table); + } else { + list_add(&table->list, &ebt_net->tables); + } + mutex_unlock(&ebt_mutex); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REGISTER, GFP_KERNEL); @@ -1339,7 +1337,7 @@ void ebt_unregister_template(const struct ebt_table *t) } EXPORT_SYMBOL(ebt_unregister_template); -static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +void ebt_unregister_table_pre_exit(struct net *net, const char *name) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table *t; @@ -1348,30 +1346,36 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name) list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, name) == 0) { + list_move(&t->list, &ebt_net->dead_tables); mutex_unlock(&ebt_mutex); - return t; + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; } } mutex_unlock(&ebt_mutex); - return NULL; -} - -void ebt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct ebt_table *table = __ebt_find_table(net, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } EXPORT_SYMBOL(ebt_unregister_table_pre_exit); void ebt_unregister_table(struct net *net, const char *name) { - struct ebt_table *table = __ebt_find_table(net, name); + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + struct ebt_table *t; - if (table) - __ebt_unregister_table(net, table); + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &ebt_net->dead_tables, list) { + if (strcmp(t->name, name) == 0) { + list_del(&t->list); + audit_log_nfcfg(t->name, AF_BRIDGE, t->private->nentries, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + __ebt_unregister_table(net, t); + mutex_unlock(&ebt_mutex); + return; + } + } + + mutex_unlock(&ebt_mutex); } /* userspace just supplied us with counters */ @@ -2556,11 +2560,21 @@ static int __net_init ebt_pernet_init(struct net *net) struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); INIT_LIST_HEAD(&ebt_net->tables); + INIT_LIST_HEAD(&ebt_net->dead_tables); return 0; } +static void __net_exit ebt_pernet_exit(struct net *net) +{ + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + + WARN_ON_ONCE(!list_empty(&ebt_net->tables)); + WARN_ON_ONCE(!list_empty(&ebt_net->dead_tables)); +} + static struct pernet_operations ebt_net_ops = { .init = ebt_pernet_init, + .exit = ebt_pernet_exit, .id = &ebt_pernet_id, .size = sizeof(struct ebt_pernet), }; @@ -2569,19 +2583,20 @@ static int __init ebtables_init(void) { int ret; - ret = xt_register_target(&ebt_standard_target); + ret = register_pernet_subsys(&ebt_net_ops); if (ret < 0) return ret; - ret = nf_register_sockopt(&ebt_sockopts); + + ret = xt_register_target(&ebt_standard_target); if (ret < 0) { - xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } - ret = register_pernet_subsys(&ebt_net_ops); + ret = nf_register_sockopt(&ebt_sockopts); if (ret < 0) { - nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } diff --git a/net/core/dev.c b/net/core/dev.c index 29f2f35ae5eb..681d7de89c50 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6777,9 +6777,9 @@ static void skb_defer_free_flush(void) #if defined(CONFIG_NET_RX_BUSY_POLL) -static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) +static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout) { - if (!skip_schedule) { + if (!timeout) { gro_normal_list(&napi->gro); __napi_schedule(napi); return; @@ -6789,6 +6789,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) gro_flush_normal(&napi->gro, HZ >= 1000); clear_bit(NAPI_STATE_SCHED, &napi->state); + hrtimer_start(&napi->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); } enum { @@ -6800,8 +6802,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; - bool skip_schedule = false; - unsigned long timeout; + unsigned long timeout = 0; int rc; /* Busy polling means there is a high chance device driver hard irq @@ -6821,10 +6822,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); - timeout = napi_get_gro_flush_timeout(napi); - if (napi->defer_hard_irqs_count && timeout) { - hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - skip_schedule = true; + if (napi->defer_hard_irqs_count) { + /* A short enough gro flush timeout and long enough + * poll can result in timer firing too early. + * Timer will be armed later if necessary. + */ + timeout = napi_get_gro_flush_timeout(napi); } } @@ -6839,7 +6842,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, trace_napi_poll(napi, rc, budget); netpoll_poll_unlock(have_poll_lock); if (rc == budget) - __busy_poll_stop(napi, skip_schedule); + __busy_poll_stop(napi, timeout); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } diff --git a/net/core/gro.c b/net/core/gro.c index 867611d171db..b5f790a643d4 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -109,6 +109,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; + if (skb_zcopy(p) || skb_zcopy(skb)) + return -ETOOMANYREFS; + if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || NAPI_GRO_CB(skb)->flush)) return -E2BIG; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 35a6acbf9a57..75ea4fdb2764 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1268,12 +1268,19 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { const struct proto_ops *ops = NULL; + struct sk_psock *psock; struct socket *sock; int copied; trace_sk_data_ready(sk); rcu_read_lock(); + psock = sk_psock(sk); + if (psock && tls_sw_has_ctx_rx(sk)) { + psock->saved_data_ready(sk); + rcu_read_unlock(); + return; + } sock = READ_ONCE(sk->sk_socket); if (likely(sock)) ops = READ_ONCE(sock->ops); @@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk) copied = ops->read_skb(sk, sk_psock_verdict_recv); if (copied >= 0) { - struct sk_psock *psock; - rcu_read_lock(); psock = sk_psock(sk); if (psock) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index f0883357d12e..4691d6d0f2b7 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -91,7 +91,7 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, u32 mask; if (end <= start) - return true; + return false; if (start % 32) { mask = ethnl_upper_bits(start); @@ -104,11 +104,11 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, start_word++; } - if (!memchr_inv(map + start_word, '\0', - (end_word - start_word) * sizeof(u32))) + if (memchr_inv(map + start_word, '\0', + (end_word - start_word) * sizeof(u32))) return true; if (end % 32 == 0) - return true; + return false; return map[end_word] & ethnl_lower_bits(end); } diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index 68372bef4b2f..98392a3c34b5 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -76,6 +76,7 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, struct nlattr **tb = info->attrs; struct phy_device_node *pdn; struct phy_device *phydev; + int ret; /* RTNL is held by the caller */ phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER, @@ -88,8 +89,19 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, return -EOPNOTSUPP; rep_data->phyindex = phydev->phyindex; + rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL); - rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->name) + return -ENOMEM; + + if (phydev->drv) { + rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->drvname) { + ret = -ENOMEM; + goto err_free_name; + } + } + rep_data->upstream_type = pdn->upstream_type; if (pdn->upstream_type == PHY_UPSTREAM_PHY) { @@ -97,15 +109,33 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, rep_data->upstream_index = upstream->phyindex; } - if (pdn->parent_sfp_bus) + if (pdn->parent_sfp_bus) { rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus), GFP_KERNEL); + if (!rep_data->upstream_sfp_name) { + ret = -ENOMEM; + goto err_free_drvname; + } + } - if (phydev->sfp_bus) + if (phydev->sfp_bus) { rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus), GFP_KERNEL); + if (!rep_data->downstream_sfp_name) { + ret = -ENOMEM; + goto err_free_upstream_sfp; + } + } return 0; + +err_free_upstream_sfp: + kfree(rep_data->upstream_sfp_name); +err_free_drvname: + kfree(rep_data->drvname); +err_free_name: + kfree(rep_data->name); + return ret; } static int phy_fill_reply(struct sk_buff *skb, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ed773cd48876..c777895a720e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1149,7 +1149,7 @@ no_ownership: } drop: - __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + __inet_csk_reqsk_queue_drop(oreq->rsk_listener, oreq, true); reqsk_put(oreq); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 97ead883e4a1..f3dadbc416a3 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1501,13 +1501,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len static void __arpt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; + void *loc_cpu_entry; struct arpt_entry *iter; - private = xt_unregister_table(table); - /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) @@ -1515,6 +1513,7 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int arpt_register_table(struct net *net, @@ -1581,18 +1580,9 @@ out_free: return ret; } -void arpt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); -} -EXPORT_SYMBOL(arpt_unregister_table_pre_exit); - void arpt_unregister_table(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_ARP, name); if (table) __arpt_unregister_table(net, table); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 78cd5ee24448..370b635e3523 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -43,7 +43,7 @@ static int arptable_filter_table_init(struct net *net) static void __net_exit arptable_filter_net_pre_exit(struct net *net) { - arpt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_ARP, "filter"); } static void __net_exit arptable_filter_net_exit(struct net *net) @@ -58,32 +58,33 @@ static struct pernet_operations arptable_filter_net_ops = { static int __init arptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - arptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); - if (IS_ERR(arpfilter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(arpfilter_ops)) return PTR_ERR(arpfilter_ops); - } ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + arptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(arpfilter_ops); - return ret; + unregister_pernet_subsys(&arptable_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(arpfilter_ops); return ret; } static void __exit arptable_filter_fini(void) { - unregister_pernet_subsys(&arptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); kfree(arpfilter_ops); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 23c8deff8095..f4079f0718de 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1704,12 +1704,10 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ipt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ipt_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1718,6 +1716,7 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ipt_register_table(struct net *net, const struct xt_table *table, @@ -1789,17 +1788,9 @@ out_free: return ret; } -void ipt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); -} - void ipt_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV4, name); if (table) __ipt_unregister_table(net, table); @@ -1887,7 +1878,6 @@ static void __exit ip_tables_fini(void) } EXPORT_SYMBOL(ipt_register_table); -EXPORT_SYMBOL(ipt_unregister_table_pre_exit); EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 3ab908b74795..672d7da1071d 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -61,7 +61,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "filter"); } static void __net_exit iptable_filter_net_exit(struct net *net) @@ -77,32 +77,33 @@ static struct pernet_operations iptable_filter_net_ops = { static int __init iptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - iptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + iptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&iptable_filter_net_ops); + goto err_free; } return 0; +err_free: + kfree(filter_ops); + return ret; } static void __exit iptable_filter_fini(void) { - unregister_pernet_subsys(&iptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 385d945d8ebe..13d25d9a4610 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -96,7 +96,7 @@ static int iptable_mangle_table_init(struct net *net) static void __net_exit iptable_mangle_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "mangle"); } static void __net_exit iptable_mangle_net_exit(struct net *net) @@ -111,32 +111,33 @@ static struct pernet_operations iptable_mangle_net_ops = { static int __init iptable_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - iptable_mangle_table_init); - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); - ret = PTR_ERR(mangle_ops); - return ret; - } + if (IS_ERR(mangle_ops)) + return PTR_ERR(mangle_ops); ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + iptable_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&iptable_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } static void __exit iptable_mangle_fini(void) { - unregister_pernet_subsys(&iptable_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 625a1ca13b1b..a0df72554025 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -119,8 +119,11 @@ static int iptable_nat_table_init(struct net *net) } ret = ipt_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); + synchronize_rcu(); ipt_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; @@ -129,6 +132,7 @@ static int iptable_nat_table_init(struct net *net) static void __net_exit iptable_nat_net_pre_exit(struct net *net) { ipt_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); } static void __net_exit iptable_nat_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 0e7f53964d0a..2745c22f4034 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ static int iptable_raw_table_init(struct net *net) static void __net_exit iptable_raw_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "raw"); } static void __net_exit iptable_raw_net_exit(struct net *net) @@ -77,32 +77,32 @@ static int __init iptable_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, - iptable_raw_table_init); - if (ret < 0) - return ret; - rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, + iptable_raw_table_init); if (ret < 0) { - xt_unregister_template(table); - kfree(rawtable_ops); - return ret; + unregister_pernet_subsys(&iptable_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } static void __exit iptable_raw_fini(void) { + xt_unregister_template(&packet_raw); unregister_pernet_subsys(&iptable_raw_net_ops); kfree(rawtable_ops); - xt_unregister_template(&packet_raw); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index d885443cb267..491894511c54 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -50,7 +50,7 @@ static int iptable_security_table_init(struct net *net) static void __net_exit iptable_security_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "security"); } static void __net_exit iptable_security_net_exit(struct net *net) @@ -65,33 +65,34 @@ static struct pernet_operations iptable_security_net_ops = { static int __init iptable_security_init(void) { - int ret = xt_register_template(&security_table, - iptable_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + iptable_security_table_init); if (ret < 0) { - xt_unregister_template(&security_table); - kfree(sectbl_ops); - return ret; + unregister_pernet_subsys(&iptable_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } static void __exit iptable_security_fini(void) { + xt_unregister_template(&security_table); unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); - xt_unregister_template(&security_table); } module_init(iptable_security_init); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d54ebb7df966..f79f4c29a043 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -390,7 +390,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, * in, reject the frame as invalid */ err = -EINVAL; - if (iphlen > length) + if (iphlen > length || iphlen < sizeof(*iph)) goto error_free; if (iphlen >= sizeof(*iph)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2de4748269ca..6fc00b38695b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -300,9 +300,6 @@ enum { DEFINE_PER_CPU(unsigned int, tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); -DEFINE_PER_CPU(u32, tcp_tw_isn); -EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); - long sysctl_tcp_mem[3] __read_mostly; EXPORT_IPV6_MOD(sysctl_tcp_mem); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 849a69c1f497..aa624434b555 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -116,7 +116,8 @@ struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, { struct tcp_ao_key *key; - hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { + hlist_for_each_entry_rcu(key, &ao->head, node, + sk_fullsock(sk) && lockdep_sock_is_held(sk)) { if ((sndid >= 0 && key->sndid != sndid) || (rcvid >= 0 && key->rcvid != rcvid)) continue; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b5cf32a56c04..c650398e9199 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7387,6 +7387,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct sock *sk, struct sk_buff *skb) { struct tcp_fastopen_cookie foc = { .len = -1 }; + u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); @@ -7397,20 +7398,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct dst_entry *dst; struct flowi fl; u8 syncookies; - u32 isn; #ifdef CONFIG_TCP_AO const struct tcp_ao_hdr *aoh; #endif - isn = __this_cpu_read(tcp_tw_isn); - if (isn) { - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - __this_cpu_write(tcp_tw_isn, 0); - } else { + /* If isn is non-zero, this SYN originally matched a TIME_WAIT socket. + * TW sockets are converted to open requests without limitations, + * we skip the queue limits and syncookie checks in the block below. + */ + if (!isn) { syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 702fdff58f7a..36206fc6aed2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2333,6 +2333,7 @@ lookup: } } + isn = 0; process: if (static_branch_unlikely(&ip4_min_ttl)) { /* min_ttl can be changed concurrently from do_ip_setsockopt() */ @@ -2361,6 +2362,7 @@ process: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); + TCP_SKB_CB(skb)->tcp_tw_isn = isn; skb->dev = NULL; @@ -2446,7 +2448,6 @@ do_time_wait: sk = sk2; tcp_v4_restore_cb(skb); refcounted = false; - __this_cpu_write(tcp_tw_isn, isn); goto process; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ec03bcff6b65..d15e60943820 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -910,16 +910,27 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) { + enum skb_drop_reason drop_reason; struct ioam6_trace_hdr *trace; struct ioam6_namespace *ns; + struct inet6_dev *idev; struct ioam6_hdr *hdr; + drop_reason = SKB_DROP_REASON_IP_INHDR; + /* Bad alignment (must be 4n-aligned) */ if (optoff & 3) goto drop; + /* Does the device still have IPv6 configuration? */ + idev = __in6_dev_get(skb->dev); + if (!idev) { + drop_reason = SKB_DROP_REASON_IPV6DISABLED; + goto drop; + } + /* Ignore if IOAM is not enabled on ingress */ - if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) + if (!READ_ONCE(idev->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ @@ -955,9 +966,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) goto drop; - /* Trace pointer may have changed */ - trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) - + optoff + sizeof(*hdr)); + /* Trace and hdr pointers may have changed */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); ioam6_fill_trace_data(skb, ns, trace, true); @@ -972,7 +983,7 @@ ignore: return true; drop: - kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); + kfree_skb_reason(skb, drop_reason); return false; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d585ac3c1113..dfaea4f6727e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1713,12 +1713,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ip6t_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1727,6 +1725,7 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ip6t_register_table(struct net *net, const struct xt_table *table, @@ -1795,17 +1794,9 @@ out_free: return ret; } -void ip6t_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); -} - void ip6t_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); @@ -1894,7 +1885,6 @@ static void __exit ip6_tables_fini(void) } EXPORT_SYMBOL(ip6t_register_table); -EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index e7a3fb9355ee..450dd53846a2 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -168,6 +168,10 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", optsinfo->invflags); return -EINVAL; } + if (optsinfo->optsnr > IP6T_OPTS_OPTSNR) { + pr_debug("too many supported opts specified\n"); + return -EINVAL; + } if (optsinfo->flags & IP6T_OPTS_NSTRICT) { pr_debug("Not strict - not implemented"); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index e8992693e14a..b074fc477676 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,7 +60,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "filter"); } static void __net_exit ip6table_filter_net_exit(struct net *net) @@ -76,32 +76,32 @@ static struct pernet_operations ip6table_filter_net_ops = { static int __init ip6table_filter_init(void) { - int ret = xt_register_template(&packet_filter, - ip6table_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&ip6table_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, ip6table_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&ip6table_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(filter_ops); return ret; } static void __exit ip6table_filter_fini(void) { - unregister_pernet_subsys(&ip6table_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&ip6table_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 8dd4cd0c47bd..e6ee036a9b2c 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -89,7 +89,7 @@ static int ip6table_mangle_table_init(struct net *net) static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) @@ -104,32 +104,33 @@ static struct pernet_operations ip6table_mangle_net_ops = { static int __init ip6table_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - ip6table_mangle_table_init); - - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); + if (IS_ERR(mangle_ops)) return PTR_ERR(mangle_ops); - } ret = register_pernet_subsys(&ip6table_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + ip6table_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&ip6table_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } static void __exit ip6table_mangle_fini(void) { - unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&ip6table_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 5be723232df8..c2394e2c94b5 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -121,8 +121,11 @@ static int ip6table_nat_table_init(struct net *net) } ret = ip6t_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); + synchronize_rcu(); ip6t_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; @@ -131,6 +134,7 @@ static int ip6table_nat_table_init(struct net *net) static void __net_exit ip6table_nat_net_pre_exit(struct net *net) { ip6t_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); } static void __net_exit ip6table_nat_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index fc9f6754028f..3b161ee875bc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -52,7 +52,7 @@ static int ip6table_raw_table_init(struct net *net) static void __net_exit ip6table_raw_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "raw"); } static void __net_exit ip6table_raw_net_exit(struct net *net) @@ -75,31 +75,31 @@ static int __init ip6table_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, ip6table_raw_table_init); - if (ret < 0) - return ret; - /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&ip6table_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, ip6table_raw_table_init); if (ret < 0) { - kfree(rawtable_ops); - xt_unregister_template(table); - return ret; + unregister_pernet_subsys(&ip6table_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } static void __exit ip6table_raw_fini(void) { - unregister_pernet_subsys(&ip6table_raw_net_ops); xt_unregister_template(&packet_raw); + unregister_pernet_subsys(&ip6table_raw_net_ops); kfree(rawtable_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 4df14a9bae78..4bd5d97b8ab6 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -49,7 +49,7 @@ static int ip6table_security_table_init(struct net *net) static void __net_exit ip6table_security_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "security"); } static void __net_exit ip6table_security_net_exit(struct net *net) @@ -64,32 +64,33 @@ static struct pernet_operations ip6table_security_net_ops = { static int __init ip6table_security_init(void) { - int ret = xt_register_template(&security_table, - ip6table_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + ip6table_security_table_init); if (ret < 0) { - kfree(sectbl_ops); - xt_unregister_template(&security_table); - return ret; + unregister_pernet_subsys(&ip6table_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } static void __exit ip6table_security_fini(void) { - unregister_pernet_subsys(&ip6table_security_net_ops); xt_unregister_template(&security_table); + unregister_pernet_subsys(&ip6table_security_net_ops); kfree(sectbl_ops); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7f20db11e8ce..59b5900dd42b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1863,6 +1863,7 @@ lookup: } } + isn = 0; process: if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ @@ -1891,6 +1892,7 @@ process: th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); + TCP_SKB_CB(skb)->tcp_tw_isn = isn; skb->dev = NULL; @@ -1978,7 +1980,6 @@ do_time_wait: sk = sk2; tcp_v6_restore_cb(skb); refcounted = false; - __this_cpu_write(tcp_tw_isn, isn); goto process; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 9156a937334a..be32bf45d583 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1360,7 +1360,7 @@ static void l2tp_session_unhash(struct l2tp_session *session) spin_lock_bh(&pn->l2tp_session_idr_lock); /* Remove from the per-tunnel list */ - list_del_init(&session->list); + list_del_rcu(&session->list); /* Remove from per-net IDR */ if (tunnel->version == L2TP_HDR_VER_3) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bcc4090ddc1a..5d1da779cd6f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7959,6 +7959,7 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, "No active links for TID %d", tid); return -EINVAL; } + pos += map_size; } else { map = 0; } @@ -7977,7 +7978,6 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, default: return -EINVAL; } - pos += map_size; } return 0; } @@ -10997,6 +10997,9 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, control = get_unaligned_le16(pos); link_id = control & IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID; + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index c5e0f7f46004..b9ec99f51851 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -34,6 +34,13 @@ #include "led.h" #include "wep.h" +struct ieee80211_elem_defrag { + const struct element *elem; + /* container start/len */ + const u8 *start; + size_t len; +}; + struct ieee80211_elems_parse { /* must be first for kfree to work */ struct ieee802_11_elems elems; @@ -41,11 +48,7 @@ struct ieee80211_elems_parse { /* The basic Multi-Link element in the original elements */ const struct element *ml_basic_elem; - /* The reconfiguration Multi-Link element in the original elements */ - const struct element *ml_reconf_elem; - - /* The EPCS Multi-Link element in the original elements */ - const struct element *ml_epcs_elem; + struct ieee80211_elem_defrag ml_reconf, ml_epcs; bool multi_link_inner; bool skip_vendor; @@ -162,10 +165,14 @@ ieee80211_parse_extension_element(u32 *crc, } break; case IEEE80211_ML_CONTROL_TYPE_RECONF: - elems_parse->ml_reconf_elem = elem; + elems_parse->ml_reconf.elem = elem; + elems_parse->ml_reconf.start = params->start; + elems_parse->ml_reconf.len = params->len; break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: - elems_parse->ml_epcs_elem = elem; + elems_parse->ml_epcs.elem = elem; + elems_parse->ml_epcs.start = params->start; + elems_parse->ml_epcs.len = params->len; break; default: break; @@ -950,46 +957,27 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, sub->start, sub->len); } -static void -ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse) -{ - struct ieee802_11_elems *elems = &elems_parse->elems; - ssize_t ml_len; - - ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem, - elems->ie_start, - elems->total_len, - elems_parse->scratch_pos, - elems_parse->scratch + - elems_parse->scratch_len - - elems_parse->scratch_pos, - WLAN_EID_FRAGMENT); - if (ml_len < 0) - return; - elems->ml_reconf = (void *)elems_parse->scratch_pos; - elems->ml_reconf_len = ml_len; - elems_parse->scratch_pos += ml_len; -} - -static void -ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) +static const void * +ieee80211_mle_defrag(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elem_defrag *defrag, + size_t *out_len) { - struct ieee802_11_elems *elems = &elems_parse->elems; + const void *ret; ssize_t ml_len; - ml_len = cfg80211_defragment_element(elems_parse->ml_epcs_elem, - elems->ie_start, - elems->total_len, + ml_len = cfg80211_defragment_element(defrag->elem, + defrag->start, defrag->len, elems_parse->scratch_pos, elems_parse->scratch + elems_parse->scratch_len - elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; - elems->ml_epcs = (void *)elems_parse->scratch_pos; - elems->ml_epcs_len = ml_len; + return NULL; + ret = elems_parse->scratch_pos; + *out_len = ml_len; elems_parse->scratch_pos += ml_len; + return ret; } struct ieee802_11_elems * @@ -1069,9 +1057,12 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_defrag_reconf(elems_parse); - - ieee80211_mle_defrag_epcs(elems_parse); + elems->ml_reconf = ieee80211_mle_defrag(elems_parse, + &elems_parse->ml_reconf, + &elems->ml_reconf_len); + elems->ml_epcs = ieee80211_mle_defrag(elems_parse, + &elems_parse->ml_epcs, + &elems->ml_epcs_len); if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8523d3af3ca7..6c995cc38a00 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -364,7 +364,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_lock_bh(&msk->pm.lock); - if (!mptcp_pm_should_add_signal_addr(msk)) { + /* The cancel path (mptcp_pm_del_add_timer()) can race with this + * callback. Once cancel updates retrans_times to MAX, suppress further + * retransmissions here. If this callback acquires pm.lock first, one + * final transmit attempt is still possible. + */ + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX && + !mptcp_pm_should_add_signal_addr(msk)) { pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); @@ -414,8 +420,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, /* Note: entry might have been removed by another thread. * We hold rcu_read_lock() to ensure it is not freed under us. */ - if (stop_timer) - sk_stop_timer_sync(sk, &entry->add_timer); + if (stop_timer) { + if (check_id) + sk_stop_timer(sk, &entry->add_timer); + else + sk_stop_timer_sync(sk, &entry->add_timer); + } rcu_read_unlock(); return entry; @@ -880,6 +890,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, struct mptcp_addr_info *addr, bool *echo, bool *drop_other_suboptions) { + bool skip_add_addr = false; int ret = false; u8 add_addr; u8 family; @@ -901,24 +912,49 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, } *echo = mptcp_pm_should_add_signal_echo(msk); - port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); - - family = *echo ? msk->pm.remote.family : msk->pm.local.family; - if (remaining < mptcp_add_addr_len(family, *echo, port)) - goto out_unlock; - if (*echo) { *addr = msk->pm.remote; add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); + port = !!msk->pm.remote.port; + family = msk->pm.remote.family; } else { *addr = msk->pm.local; add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); + port = !!msk->pm.local.port; + family = msk->pm.local.family; } - WRITE_ONCE(msk->pm.addr_signal, add_addr); + + if (remaining < mptcp_add_addr_len(family, *echo, port)) { + struct net *net = sock_net((struct sock *)msk); + + if (!*drop_other_suboptions) + goto out_unlock; + + if (*echo) { + MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP); + } else { + skip_add_addr = true; + MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP); + } + goto drop_signal_mark; + } + ret = true; +drop_signal_mark: + WRITE_ONCE(msk->pm.addr_signal, add_addr); + out_unlock: spin_unlock_bh(&msk->pm.lock); + + /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR: + * clear the signal bit, cancel the matching retransmission timer, and + * let the PM state machine progress. + */ + if (skip_add_addr) { + mptcp_pm_del_add_timer(msk, addr, true); + mptcp_pm_subflow_established(msk); + } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index a22ec1a6f6ec..e26ca2a370e3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -150,7 +150,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++, i++) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_ipmark4_data_next(&h->next, &e); @@ -162,6 +162,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ret = 0; + + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e977b5a9c48d..41ca24a22a02 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -186,7 +186,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { @@ -203,6 +203,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], ret = 0; } + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 39a01934b153..b9ac2efaa15c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -182,7 +182,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { @@ -199,6 +199,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = 0; } + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5c6de605a9fb..2d6652d43199 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -274,7 +274,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], p = port; ip2 = ip2_from; } - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { e.ip = htonl(ip); for (; p <= port_to; p++) { e.port = htons(p); @@ -298,6 +298,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = ip2_from; } p = port; + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 7f12e56e6e52..dd416c8532c5 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -60,6 +60,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ + dev_put(entry->skb_dev); dev_put(state->in); dev_put(state->out); if (state->sk) @@ -101,6 +102,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) return false; + dev_hold(entry->skb_dev); dev_hold(state->in); dev_hold(state->out); @@ -201,11 +203,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, + .skb_dev = skb->dev, .state = *state, .hook_index = index, .size = sizeof(*entry) + route_key_size, }; - __nf_queue_entry_init_physdevs(entry); if (!nf_queue_entry_get_refs(entry)) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index fe5942535245..d42e8ac3062f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1198,6 +1198,8 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) if (physinif == ifindex || physoutif == ifindex) return 1; #endif + if (entry->skb_dev && entry->skb_dev->ifindex == ifindex) + return 1; if (entry->state.in) if (entry->state.in->ifindex == ifindex) return 1; diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index c4569d4b9228..ad08a43535b5 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -163,7 +163,6 @@ static int nft_inner_parse_l2l3(const struct nft_inner *priv, return -1; if (fragoff == 0) { - thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; @@ -247,8 +246,8 @@ static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { - local_bh_enable(); local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); + local_bh_enable(); return false; } *tun_ctx = *this_cpu_tun_ctx; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 1ca4fa9d249b..76fd0999db4a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -55,6 +55,9 @@ static struct list_head xt_templates[NFPROTO_NUMPROTO]; struct xt_pernet { struct list_head tables[NFPROTO_NUMPROTO]; + + /* stash area used during netns exit */ + struct list_head dead_tables[NFPROTO_NUMPROTO]; }; struct compat_delta { @@ -1522,22 +1525,87 @@ out: } EXPORT_SYMBOL_GPL(xt_register_table); -void *xt_unregister_table(struct xt_table *table) +/** + * xt_unregister_table_pre_exit - pre-shutdown unregister of a table + * @net: network namespace + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Unregisters the specified netfilter table from the given network namespace + * and also unregisters the hooks from netfilter core: no new packets will be + * processed. + * + * This must be called prior to xt_unregister_table_exit() from the pernet + * .pre_exit callback. After this call, the table is no longer visible to + * the get/setsockopt path. In case of rmmod, module exit path must have + * called xt_unregister_template() prior to unregistering pernet ops to + * prevent re-instantiation of the table. + * + * See also: xt_unregister_table_exit() + */ +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) { - struct xt_table_info *private; + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t; - mutex_lock(&xt[table->af].mutex); - private = table->private; - list_del(&table->list); - mutex_unlock(&xt[table->af].mutex); - audit_log_nfcfg(table->name, table->af, private->number, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); - kfree(table->ops); - kfree(table); + mutex_lock(&xt[af].mutex); + list_for_each_entry(t, &xt_net->tables[af], list) { + if (strcmp(t->name, name) == 0) { + list_move(&t->list, &xt_net->dead_tables[af]); + mutex_unlock(&xt[af].mutex); - return private; + if (t->ops) /* nat table registers with nat core, t->ops is NULL. */ + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; + } + } + mutex_unlock(&xt[af].mutex); } -EXPORT_SYMBOL_GPL(xt_unregister_table); +EXPORT_SYMBOL(xt_unregister_table_pre_exit); + +/** + * xt_unregister_table_exit - remove a table during namespace teardown + * @net: the network namespace from which to unregister the table + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Completes the unregister process for a table. This must be called from + * the pernet ops .exit callback. This is the second stage after + * xt_unregister_table_pre_exit(). + * + * pair with xt_unregister_table_pre_exit() during namespace shutdown. + * + * Return: the unregistered table or NULL if the table was never + * instantiated. The caller needs to kfree() the table after it + * has removed the family specific matches/targets. + */ +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name) +{ + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *table; + + mutex_lock(&xt[af].mutex); + list_for_each_entry(table, &xt_net->dead_tables[af], list) { + struct nf_hook_ops *ops = NULL; + + if (strcmp(table->name, name) != 0) + continue; + + list_del(&table->list); + + audit_log_nfcfg(table->name, table->af, table->private->number, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + swap(table->ops, ops); + mutex_unlock(&xt[af].mutex); + + kfree(ops); + return table; + } + mutex_unlock(&xt[af].mutex); + + return NULL; +} +EXPORT_SYMBOL_GPL(xt_unregister_table_exit); #endif #ifdef CONFIG_PROC_FS @@ -1984,8 +2052,10 @@ static int __net_init xt_net_init(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { INIT_LIST_HEAD(&xt_net->tables[i]); + INIT_LIST_HEAD(&xt_net->dead_tables[i]); + } return 0; } @@ -1994,8 +2064,10 @@ static void __net_exit xt_net_exit(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { WARN_ON_ONCE(!list_empty(&xt_net->tables[i])); + WARN_ON_ONCE(!list_empty(&xt_net->dead_tables[i])); + } } static struct pernet_operations xt_net_ops = { diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4db564d9d522..058a16c423ae 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -671,8 +671,23 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) /* Look for an existing pipe handle */ sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); - if (sknode) - return sk_receive_skb(sknode, skb, 1); + if (sknode) { + int rc; + + /* pep_do_rcv() runs from two contexts: from softirq via + * phonet_rcv() -> __sk_receive_skb() with BH disabled, + * and from process context via + * release_sock() -> __release_sock(), which drops + * the listener slock with spin_unlock_bh() before draining + * the backlog. The child pipe slock is taken below via + * bh_lock_sock_nested(), which does not itself disable BH, so + * disable BH here to keep both acquire contexts consistent. + */ + local_bh_disable(); + rc = sk_receive_skb(sknode, skb, 1); + local_bh_enable(); + return rc; + } switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index c39f5066d8e8..04a761c79548 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -480,8 +480,12 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call, _enter(""); - crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, - &data_offset, &data_len); + if (crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, + &data_offset, &data_len) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_1_short_header); + goto put_gk; + } hdr = kzalloc(sizeof(*hdr), GFP_NOFS); if (!hdr) @@ -529,6 +533,13 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call, _enter(""); + if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE, + len, sizeof(hdr)) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_header); + goto error; + } + ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac); if (ret < 0) { if (ret != -ENOMEM) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index be9999ab62e3..ce353042cfa1 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -90,6 +90,12 @@ static int net_shaper_handle_size(void) nla_total_size(sizeof(u32))); } +static int net_shaper_group_reply_size(void) +{ + return nla_total_size(sizeof(u32)) + /* NET_SHAPER_A_IFINDEX */ + net_shaper_handle_size(); /* NET_SHAPER_A_HANDLE */ +} + static int net_shaper_fill_binding(struct sk_buff *msg, const struct net_shaper_binding *binding, u32 type) @@ -130,35 +136,58 @@ handle_nest_cancel: return -EMSGSIZE; } +static void net_shaper_copy(struct net_shaper *dst, + const struct net_shaper *src) +{ + WRITE_ONCE(dst->parent.scope, READ_ONCE(src->parent.scope)); + WRITE_ONCE(dst->parent.id, READ_ONCE(src->parent.id)); + WRITE_ONCE(dst->handle.scope, READ_ONCE(src->handle.scope)); + WRITE_ONCE(dst->handle.id, READ_ONCE(src->handle.id)); + + WRITE_ONCE(dst->metric, READ_ONCE(src->metric)); + WRITE_ONCE(dst->bw_min, READ_ONCE(src->bw_min)); + WRITE_ONCE(dst->bw_max, READ_ONCE(src->bw_max)); + WRITE_ONCE(dst->burst, READ_ONCE(src->burst)); + WRITE_ONCE(dst->priority, READ_ONCE(src->priority)); + WRITE_ONCE(dst->weight, READ_ONCE(src->weight)); + + /* private fields are only used on the write path under the lock */ + data_race(dst->leaves = src->leaves); +} + static int net_shaper_fill_one(struct sk_buff *msg, const struct net_shaper_binding *binding, const struct net_shaper *shaper, const struct genl_info *info) { + struct net_shaper cur; void *hdr; hdr = genlmsg_iput(msg, info); if (!hdr) return -EMSGSIZE; + /* Make a copy to avoid data races */ + net_shaper_copy(&cur, shaper); + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || - net_shaper_fill_handle(msg, &shaper->parent, + net_shaper_fill_handle(msg, &cur.parent, NET_SHAPER_A_PARENT) || - net_shaper_fill_handle(msg, &shaper->handle, + net_shaper_fill_handle(msg, &cur.handle, NET_SHAPER_A_HANDLE) || - ((shaper->bw_min || shaper->bw_max || shaper->burst) && - nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || - (shaper->bw_min && - nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || - (shaper->bw_max && - nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || - (shaper->burst && - nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || - (shaper->priority && - nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || - (shaper->weight && - nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + ((cur.bw_min || cur.bw_max || cur.burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, cur.metric)) || + (cur.bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, cur.bw_min)) || + (cur.bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, cur.bw_max)) || + (cur.burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, cur.burst)) || + (cur.priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, cur.priority)) || + (cur.weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, cur.weight))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -275,25 +304,24 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle, parent->id = 0; } -/* - * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as - * it's cleared by xa_store(). - */ -#define NET_SHAPER_NOT_VALID XA_MARK_1 - static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, const struct net_shaper_handle *handle) { u32 index = net_shaper_handle_to_index(handle); struct net_shaper_hierarchy *hierarchy; + struct net_shaper *cur; hierarchy = net_shaper_hierarchy_rcu(binding); - if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID)) + if (!hierarchy) return NULL; - return xa_load(&hierarchy->shapers, index); + cur = xa_load(&hierarchy->shapers, index); + /* Check valid before reading fields */ + if (!cur || !smp_load_acquire(&cur->valid)) + return NULL; + + return cur; } /* Allocate on demand the per device shaper's hierarchy container. @@ -370,13 +398,10 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding, goto free_id; } - /* Mark 'tentative' shaper inside the hierarchy container. - * xa_set_mark is a no-op if the previous store fails. + /* Insert as 'tentative' (no VALID mark). The mark will be set by + * net_shaper_commit() once the driver-side configuration succeeds. */ - xa_lock(&hierarchy->shapers); - prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); - __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID); - xa_unlock(&hierarchy->shapers); + prev = xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); if (xa_err(prev)) { NL_SET_ERR_MSG(extack, "Can't insert shaper into device store"); kfree_rcu(cur, rcu); @@ -410,12 +435,10 @@ static void net_shaper_commit(struct net_shaper_binding *binding, if (WARN_ON_ONCE(!cur)) continue; - /* Successful update: drop the tentative mark - * and update the hierarchy container. - */ - __xa_clear_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID); - *cur = shapers[i]; + /* Successful update: update the hierarchy container... */ + net_shaper_copy(cur, &shapers[i]); + /* ... publish to lockless readers. */ + smp_store_release(&cur->valid, true); } xa_unlock(&hierarchy->shapers); } @@ -431,10 +454,11 @@ static void net_shaper_rollback(struct net_shaper_binding *binding) return; xa_lock(&hierarchy->shapers); - xa_for_each_marked(&hierarchy->shapers, index, cur, - NET_SHAPER_NOT_VALID) { + xa_for_each(&hierarchy->shapers, index, cur) { + if (cur->valid) + continue; __xa_erase(&hierarchy->shapers, index); - kfree(cur); + kfree_rcu(cur, rcu); } xa_unlock(&hierarchy->shapers); } @@ -465,10 +489,21 @@ static int net_shaper_parse_handle(const struct nlattr *attr, * shaper (any other value). */ id_attr = tb[NET_SHAPER_A_HANDLE_ID]; - if (id_attr) + if (id_attr) { id = nla_get_u32(id_attr); - else if (handle->scope == NET_SHAPER_SCOPE_NODE) + } else if (handle->scope == NET_SHAPER_SCOPE_NODE) { id = NET_SHAPER_ID_UNSPEC; + } else if (handle->scope == NET_SHAPER_SCOPE_QUEUE) { + NL_SET_ERR_ATTR_MISS(info->extack, attr, + NET_SHAPER_A_HANDLE_ID); + return -EINVAL; + } + + if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) { + NL_SET_ERR_MSG_ATTR(info->extack, id_attr, + "Netdev scope is a singleton, must use ID 0"); + return -EINVAL; + } handle->id = id; return 0; @@ -836,7 +871,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, goto out_unlock; for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, - U32_MAX, XA_PRESENT)); ctx->start_index++) { + U32_MAX, XA_PRESENT)); + ctx->start_index++) { + /* Check valid before reading fields */ + if (!smp_load_acquire(&shaper->valid)) + continue; + ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; @@ -932,6 +972,46 @@ static int net_shaper_handle_cmp(const struct net_shaper_handle *a, return memcmp(a, b, sizeof(*a)); } +static int net_shaper_parse_leaves(struct net_shaper_binding *binding, + struct genl_info *info, + const struct net_shaper *node, + struct net_shaper *leaves, + int leaves_count) +{ + struct nlattr *attr; + int i, j, ret, rem; + + i = 0; + nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (WARN_ON_ONCE(i >= leaves_count)) + return -EINVAL; + + ret = net_shaper_parse_leaf(binding, attr, info, + node, &leaves[i]); + if (ret) + return ret; + + /* Reject duplicates */ + for (j = 0; j < i; j++) { + if (net_shaper_handle_cmp(&leaves[i].handle, + &leaves[j].handle)) + continue; + + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "Duplicate leaf shaper %d:%d", + leaves[i].handle.scope, + leaves[i].handle.id); + return -EINVAL; + } + + i++; + } + + return 0; +} + static int net_shaper_parent_from_leaves(int leaves_count, const struct net_shaper *leaves, struct net_shaper *node, @@ -964,15 +1044,22 @@ static int __net_shaper_group(struct net_shaper_binding *binding, int i, ret; if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + struct net_shaper *cur = NULL; + new_node = node->handle.id == NET_SHAPER_ID_UNSPEC; - if (!new_node && !net_shaper_lookup(binding, &node->handle)) { - /* The related attribute is not available when - * reaching here from the delete() op. - */ - NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists", - node->handle.scope, node->handle.id); - return -ENOENT; + if (!new_node) { + cur = net_shaper_lookup(binding, &node->handle); + if (!cur) { + /* The related attribute is not available + * when reaching here from the delete() op. + */ + NL_SET_ERR_MSG_FMT(extack, + "Node shaper %d:%d does not exist", + node->handle.scope, + node->handle.id); + return -ENOENT; + } } /* When unspecified, the node parent scope is inherited from @@ -986,6 +1073,15 @@ static int __net_shaper_group(struct net_shaper_binding *binding, return ret; } + if (cur && net_shaper_handle_cmp(&cur->parent, + &node->parent)) { + NL_SET_ERR_MSG_FMT(extack, + "Cannot reparent node shaper %d:%d", + node->handle.scope, + node->handle.id); + return -EOPNOTSUPP; + } + } else { net_shaper_default_parent(&node->handle, &node->parent); } @@ -1163,7 +1259,7 @@ static int net_shaper_group_send_reply(struct net_shaper_binding *binding, free_msg: /* Should never happen as msg is pre-allocated with enough space. */ WARN_ONCE(true, "calculated message payload length (%d)", - net_shaper_handle_size()); + net_shaper_group_reply_size()); nlmsg_free(msg); return -EMSGSIZE; } @@ -1173,10 +1269,9 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) struct net_shaper **old_nodes, *leaves, node = {}; struct net_shaper_hierarchy *hierarchy; struct net_shaper_binding *binding; - int i, ret, rem, leaves_count; + int i, ret, leaves_count; int old_nodes_count = 0; struct sk_buff *msg; - struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES)) return -EINVAL; @@ -1204,26 +1299,19 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_leaves; - i = 0; - nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, - genlmsg_data(info->genlhdr), - genlmsg_len(info->genlhdr), rem) { - if (WARN_ON_ONCE(i >= leaves_count)) - goto free_leaves; - - ret = net_shaper_parse_leaf(binding, attr, info, - &node, &leaves[i]); - if (ret) - goto free_leaves; - i++; - } + ret = net_shaper_parse_leaves(binding, info, &node, + leaves, leaves_count); + if (ret) + goto free_leaves; /* Prepare the msg reply in advance, to avoid device operation * rollback on allocation failure. */ - msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); - if (!msg) + msg = genlmsg_new(net_shaper_group_reply_size(), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; goto free_leaves; + } hierarchy = net_shaper_hierarchy_setup(binding); if (!hierarchy) { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6421c2e1c84d..5915fcdef743 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1400,7 +1400,8 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { + if (ini->ism_dev[i] && + ini->ism_chid[i] == ntohs(aclc->d1.chid)) { ini->ism_selected = i; return 0; } diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index a9a6e3c1113a..53da84f57fd6 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event, __field(const void *, smc) __field(u64, net_cookie) __field(size_t, len) - __string(name, smc->conn.lnk->ibname) + __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "") ), TP_fast_assign( diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f2ea190777f0..034f322054e5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -789,23 +789,33 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_pl->sg.end; sk_msg_iter_var_prev(i); + /* msg_pl->sg.data is a ring; data[MAX+1] is reserved for the wrap + * link (frags won't use it). 'i' is now the last filled entry: + * + * i end start + * v v v [ rsv ] + * [ d ][ d ][ ][ ]...[ ][ d ][ d ][ d ][chain] + * ^ END v + * `-----------------------------------------' + * + * Note that SGL does not allow chain-after-chain, so for TLS 1.3, + * we must make sure we don't create the wrap entry and then chain + * link to content_type immediately at index 0. + */ + if (i < msg_pl->sg.start) + sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data), + msg_pl->sg.data); + rec->content_type = record_type; if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); - sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, - &rec->sg_content_type); + sg_chain(msg_pl->sg.data, i + 2, &rec->sg_content_type); } else { sg_mark_end(sk_msg_elem(msg_pl, i)); } - if (msg_pl->sg.end < msg_pl->sg.start) { - sg_chain(&msg_pl->sg.data[msg_pl->sg.start], - MAX_SKB_FRAGS - msg_pl->sg.start + 1, - msg_pl->sg.data); - } - i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); @@ -1356,9 +1366,14 @@ unlock: mutex_unlock(&tls_ctx->tx_lock); } +/* When has_copied is true the caller has already moved bytes to + * userspace. Report sk_err but leave it set so the next read + * surfaces it instead of a spurious EOF, otherwise sk_err is + * consumed via sock_error(). + */ static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, - bool released) + bool released, bool has_copied) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1376,8 +1391,11 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (!sk_psock_queue_empty(psock)) return 0; - if (sk->sk_err) + if (sk->sk_err) { + if (has_copied) + return -READ_ONCE(sk->sk_err); return sock_error(sk); + } if (ret < 0) return ret; @@ -1413,7 +1431,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, } if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) - return tls_rx_rec_wait(sk, psock, nonblock, false); + return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied); return 1; } @@ -2101,7 +2119,7 @@ int tls_sw_recvmsg(struct sock *sk, int to_decrypt, chunk; err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, - released); + released, !!(decrypted + copied)); if (err <= 0) { if (psock) { chunk = sk_msg_recvmsg(sk, psock, msg, len, @@ -2288,7 +2306,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, - true); + true, false); if (err <= 0) goto splice_read_end; @@ -2374,7 +2392,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, } else { struct tls_decrypt_arg darg; - err = tls_rx_rec_wait(sk, NULL, true, released); + err = tls_rx_rec_wait(sk, NULL, true, released, !!copied); if (err <= 0) goto read_sock_end; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 33ed8ecb556d..faf04d1b6c01 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2716,8 +2716,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len, - bool freezable) + struct sk_buff *last, bool freezable) { unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; struct sk_buff *tail; @@ -2730,7 +2729,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, tail = skb_peek_tail(&sk->sk_receive_queue); if (tail != last || - (tail && tail->len != last_len) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || @@ -2923,7 +2921,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, int flags = state->flags; bool check_creds = false; struct scm_cookie scm; - unsigned int last_len; struct unix_sock *u; int copied = 0; int err = 0; @@ -2969,7 +2966,6 @@ redo: goto unlock; } last = skb = skb_peek(&sk->sk_receive_queue); - last_len = last ? last->len : 0; again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) @@ -3003,8 +2999,7 @@ again: mutex_unlock(&u->iolock); - timeo = unix_stream_data_wait(sk, timeo, last, - last_len, freezable); + timeo = unix_stream_data_wait(sk, timeo, last, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -3021,7 +3016,6 @@ unlock: while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; - last_len = skb->len; skb = skb_peek_next(skb, &sk->sk_receive_queue); if (!skb) goto again; @@ -3096,7 +3090,6 @@ unlock: skip = 0; last = skb; - last_len = skb->len; unix_state_lock(sk); skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 495c93cddcdc..ed42e08798a9 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -72,34 +72,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, return true; } -static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, - struct sk_buff *skb, - struct msghdr *msg, - size_t pkt_len, - bool zerocopy) -{ - struct ubuf_info *uarg; - - if (msg->msg_ubuf) { - uarg = msg->msg_ubuf; - net_zcopy_get(uarg); - } else { - struct ubuf_info_msgzc *uarg_zc; - - uarg = msg_zerocopy_realloc(sk_vsock(vsk), - pkt_len, NULL, false); - if (!uarg) - return -1; - - uarg_zc = uarg_to_msgzc(uarg); - uarg_zc->zerocopy = zerocopy ? 1 : 0; - } - - skb_zcopy_init(skb, uarg); - - return 0; -} - static int virtio_transport_fill_skb(struct sk_buff *skb, struct virtio_vsock_pkt_info *info, size_t len, @@ -319,8 +291,10 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; + struct ubuf_info *uarg = NULL; u32 pkt_len = info->pkt_len; bool can_zcopy = false; + bool have_uref = false; u32 rest_len; int ret; @@ -362,6 +336,25 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (can_zcopy) max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, (MAX_SKB_FRAGS * PAGE_SIZE)); + + if (info->msg->msg_flags & MSG_ZEROCOPY && + info->op == VIRTIO_VSOCK_OP_RW) { + uarg = info->msg->msg_ubuf; + + if (!uarg) { + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + pkt_len, NULL, false); + if (!uarg) { + virtio_transport_put_credit(vvs, pkt_len); + return -ENOMEM; + } + + if (!can_zcopy) + uarg_to_msgzc(uarg)->zerocopy = 0; + + have_uref = true; + } + } } rest_len = pkt_len; @@ -380,27 +373,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } - /* We process buffer part by part, allocating skb on - * each iteration. If this is last skb for this buffer - * and MSG_ZEROCOPY mode is in use - we must allocate - * completion for the current syscall. - * - * Pass pkt_len because msg iter is already consumed - * by virtio_transport_fill_skb(), so iter->count - * can not be used for RLIMIT_MEMLOCK pinned-pages - * accounting done by msg_zerocopy_realloc(). - */ - if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && - skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { - if (virtio_transport_init_zcopy_skb(vsk, skb, - info->msg, - pkt_len, - can_zcopy)) { - kfree_skb(skb); - ret = -ENOMEM; - break; - } - } + skb_zcopy_set(skb, uarg, NULL); virtio_transport_inc_tx_pkt(vvs, skb); @@ -424,6 +397,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, virtio_transport_put_credit(vvs, rest_len); + /* msg_zerocopy_realloc() initializes the ubuf_info refcnt to 1. + * skb_zcopy_set() increases it for each skb, so we can drop that + * initial reference to keep it balanced. + */ + if (have_uref) { + if (rest_len == pkt_len) + /* No data sent, abort the notification. */ + net_zcopy_put_abort(uarg, true); + else + net_zcopy_put(uarg); + } + /* Return number of bytes, if any data has been sent. */ if (rest_len != pkt_len) ret = pkt_len - rest_len; @@ -1335,7 +1320,7 @@ destroy: return err; } -static void +static bool virtio_transport_recv_enqueue(struct vsock_sock *vsk, struct sk_buff *skb) { @@ -1350,10 +1335,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); - if (!can_enqueue) { - free_pkt = true; + if (!can_enqueue) goto out; - } if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count++; @@ -1393,6 +1376,8 @@ out: spin_unlock_bh(&vvs->rx_lock); if (free_pkt) kfree_skb(skb); + + return can_enqueue; } static int @@ -1405,7 +1390,17 @@ virtio_transport_recv_connected(struct sock *sk, switch (le16_to_cpu(hdr->op)) { case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_enqueue(vsk, skb); + if (!virtio_transport_recv_enqueue(vsk, skb)) { + /* There is no more space to queue the packet, so let's + * close the connection; otherwise, we'll lose data. + */ + (void)virtio_transport_reset(vsk, skb); + virtio_transport_do_close(vsk, true); + sk->sk_err = ENOBUFS; + sk_error_report(sk); + vsock_remove_sock(vsk); + break; + } vsock_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_REQUEST: diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index aca3132689cf..4cd11f355e9d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1156,7 +1156,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, /* Close and cleanup the connection. */ vmci_transport_send_reset(pending, pkt); skerr = EPROTO; - err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + err = -EINVAL; goto destroy; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9a0c02c23dc5..4a1cdfc3221c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2475,6 +2475,9 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, memcpy(merged_ie + copied_len, next_sub->data, next_sub->datalen); copied_len += next_sub->datalen; + + mbssid_elem = next_mbssid; + sub_elem = next_sub; } return copied_len; diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 8f1b3500f8e2..abb1964c44d4 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -309,7 +309,9 @@ typedef const gimple *const_gimple_ptr; #define gimple gimple_ptr #define const_gimple const_gimple_ptr #undef CONST_CAST_GIMPLE -#define CONST_CAST_GIMPLE(X) CONST_CAST(gimple, (X)) +#define CONST_CAST_GIMPLE(X) const_cast<gimple>((X)) +#undef CONST_CAST_TREE +#define CONST_CAST_TREE(X) const_cast<tree>((X)) /* gimple related */ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree lhs, tree op1, tree op2 MEM_STAT_DECL) diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index 452374d63c24..1213c8e04671 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -10,7 +10,7 @@ for pkg in $_extrapackages; do pkgname+=("${pkgbase}-${pkg}") done -pkgver="${KERNELRELEASE//-/_}" +pkgver="$(echo "${KERNELRELEASE}" | sed 's/-\(rc[0-9]\+\)/\1/;s/-/_/g')" # The PKGBUILD is evaluated multiple times. # Running scripts/build-version from here would introduce inconsistencies. pkgrel="${KBUILD_REVISION}" diff --git a/security/keys/keyring.c b/security/keys/keyring.c index f331725d5a37..df2580072cfe 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1109,6 +1109,7 @@ key_ref_t find_key_to_update(key_ref_t keyring_ref, kenter("{%d},{%s,%s}", keyring->serial, index_key->type->name, index_key->description); + guard(rcu)(); object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, index_key); diff --git a/security/lsm_syscalls.c b/security/lsm_syscalls.c index 8440948a690c..b3887c85a2ba 100644 --- a/security/lsm_syscalls.c +++ b/security/lsm_syscalls.c @@ -55,7 +55,14 @@ u64 lsm_name_to_attr(const char *name) SYSCALL_DEFINE4(lsm_set_self_attr, unsigned int, attr, struct lsm_ctx __user *, ctx, u32, size, u32, flags) { - return security_setselfattr(attr, ctx, size, flags); + int rc; + + rc = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); + if (rc < 0) + return rc; + rc = security_setselfattr(attr, ctx, size, flags); + mutex_unlock(¤t->signal->cred_guard_mutex); + return rc; } /** diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 6eaa950504cf..932b9337c93e 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2138,6 +2138,9 @@ static int interleaved_copy(struct snd_pcm_substream *substream, off = frames_to_bytes(runtime, off); frames = frames_to_bytes(runtime, frames); + if (!data) + return fill_silence(substream, 0, hwoff, NULL, frames); + return do_transfer(substream, 0, hwoff, data + off, frames, transfer, in_kernel); } diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index ebbe1cbe0b9b..9a6b4b61bd74 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -37,6 +37,7 @@ struct seq_ump_client { struct snd_ump_endpoint *ump; /* assigned endpoint */ int seq_client; /* sequencer client id */ int opened[2]; /* current opens for each direction */ + rwlock_t output_lock; /* protects out_rfile output access */ struct snd_rawmidi_file out_rfile; /* rawmidi for output */ struct seq_ump_input_buffer input; /* input parser context */ void *ump_info[SNDRV_UMP_MAX_BLOCKS + 1]; /* shadow of seq client ump_info */ @@ -88,6 +89,7 @@ static int seq_ump_process_event(struct snd_seq_event *ev, int direct, unsigned char type; int len; + guard(read_lock_irqsave)(&client->output_lock); substream = client->out_rfile.output; if (!substream) return -ENODEV; @@ -106,6 +108,7 @@ static int seq_ump_process_event(struct snd_seq_event *ev, int direct, static int seq_ump_client_open(struct seq_ump_client *client, int dir) { struct snd_ump_endpoint *ump = client->ump; + struct snd_rawmidi_file rfile = {}; int err; guard(mutex)(&ump->open_mutex); @@ -113,9 +116,11 @@ static int seq_ump_client_open(struct seq_ump_client *client, int dir) err = snd_rawmidi_kernel_open(&ump->core, 0, SNDRV_RAWMIDI_LFLG_OUTPUT | SNDRV_RAWMIDI_LFLG_APPEND, - &client->out_rfile); + &rfile); if (err < 0) return err; + scoped_guard(write_lock_irqsave, &client->output_lock) + client->out_rfile = rfile; } client->opened[dir]++; return 0; @@ -125,11 +130,19 @@ static int seq_ump_client_open(struct seq_ump_client *client, int dir) static int seq_ump_client_close(struct seq_ump_client *client, int dir) { struct snd_ump_endpoint *ump = client->ump; + struct snd_rawmidi_file rfile = {}; guard(mutex)(&ump->open_mutex); - if (!--client->opened[dir]) - if (dir == STR_OUT) - snd_rawmidi_kernel_release(&client->out_rfile); + if (!--client->opened[dir]) { + if (dir == STR_OUT) { + scoped_guard(write_lock_irqsave, &client->output_lock) { + rfile = client->out_rfile; + client->out_rfile = (struct snd_rawmidi_file){}; + } + if (rfile.rmidi) + snd_rawmidi_kernel_release(&rfile); + } + } return 0; } @@ -468,6 +481,7 @@ static int snd_seq_ump_probe(struct device *_dev) INIT_WORK(&client->group_notify_work, handle_group_notify); client->ump = ump; + rwlock_init(&client->output_lock); client->seq_client = snd_seq_create_kernel_client(card, ump->core.device, diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index ea98cbc4310d..4cd5b719556e 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7182,12 +7182,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f00, "ASUS Strix G815LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f00, "ASUS Strix G815LH_LM_LP", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x1043, 0x3fd0, "ASUS B3605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3ff0, "ASUS B5405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), diff --git a/sound/hda/codecs/side-codecs/cs35l41_hda.c b/sound/hda/codecs/side-codecs/cs35l41_hda.c index 21e00055c0c4..47263c5a021c 100644 --- a/sound/hda/codecs/side-codecs/cs35l41_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l41_hda.c @@ -1901,8 +1901,10 @@ static int cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41, const char *hid, i cs35l41->dacpi = adev; physdev = get_device(acpi_get_first_physical_node(adev)); - if (!physdev) + if (!physdev) { + acpi_dev_put(adev); return -ENODEV; + } sub = acpi_get_subsystem_id(ACPI_HANDLE(physdev)); if (IS_ERR(sub)) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index 79c15e21d4bc..1d25fe01066e 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -949,6 +949,7 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) return -ENODEV; } ACPI_COMPANION_SET(cs35l56->base.dev, adev); + acpi_dev_put(adev); } /* Initialize things that could be overwritten by a fixup */ diff --git a/sound/pci/asihpi/hpicmn.c b/sound/pci/asihpi/hpicmn.c index 7d1abaedb46a..f06f44b13d3d 100644 --- a/sound/pci/asihpi/hpicmn.c +++ b/sound/pci/asihpi/hpicmn.c @@ -276,6 +276,12 @@ static short find_control(u16 control_index, return 0; } + if (control_index >= p_cache->control_count) { + HPI_DEBUG_LOG(VERBOSE, "control_index out of bounce %d\n", + control_index); + return 0; + } + *pI = p_cache->p_info[control_index]; if (!*pI) { HPI_DEBUG_LOG(VERBOSE, "Uncached Control %d\n", diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index 798c73d7e26d..e87d9e9991e1 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c @@ -244,9 +244,9 @@ static int create_sdw_dailink(struct snd_soc_card *card, cpus->dai_name = devm_kasprintf(dev, GFP_KERNEL, "SDW%d Pin%d", link_num, cpu_pin_id); - dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); if (!cpus->dai_name) return -ENOMEM; + dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); codec_maps[j].cpu = 0; codec_maps[j].codec = j; diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index 42d24ac2977f..a513a15d7e5a 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -560,10 +560,11 @@ static int cs35l56_sdw_remove(struct sdw_slave *peripheral) /* Disable SoundWire interrupts */ cs35l56->sdw_irq_no_unmask = true; - cancel_work_sync(&cs35l56->sdw_irq_work); + flush_work(&cs35l56->sdw_irq_work); sdw_write_no_pm(peripheral, CS35L56_SDW_GEN_INT_MASK_1, 0); sdw_read_no_pm(peripheral, CS35L56_SDW_GEN_INT_STAT_1); sdw_write_no_pm(peripheral, CS35L56_SDW_GEN_INT_STAT_1, 0xFF); + flush_work(&cs35l56->sdw_irq_work); cs35l56_remove(cs35l56); diff --git a/sound/soc/codecs/fs210x.c b/sound/soc/codecs/fs210x.c index e2f85714972d..e2207c53c50d 100644 --- a/sound/soc/codecs/fs210x.c +++ b/sound/soc/codecs/fs210x.c @@ -968,7 +968,7 @@ static int fs210x_effect_scene_info(struct snd_kcontrol *kcontrol, if (scene->name) name = scene->name; - strscpy(uinfo->value.enumerated.name, name, strlen(name) + 1); + strscpy(uinfo->value.enumerated.name, name); return 0; } diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 3848c7df1916..3facb78748ac 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -170,6 +170,8 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dai_type = SOC_SDW_DAI_TYPE_MIC, .dailink = {SOC_SDW_UNUSED_DAI_ID, SOC_SDW_DMIC_DAI_ID}, .rtd_init = asoc_sdw_rt_dmic_rtd_init, + .quirk = SOC_SDW_CODEC_MIC, + .quirk_exclude = true, }, }, .dai_num = 3, @@ -429,6 +431,8 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dai_type = SOC_SDW_DAI_TYPE_MIC, .dailink = {SOC_SDW_UNUSED_DAI_ID, SOC_SDW_DMIC_DAI_ID}, .rtd_init = asoc_sdw_rt_dmic_rtd_init, + .quirk = SOC_SDW_CODEC_MIC, + .quirk_exclude = true, }, }, .dai_num = 3, diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index c8adfff826bd..9cb7567e263e 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -36,6 +36,7 @@ int snd_soc_ret(const struct device *dev, int ret, const char *fmt, ...) vaf.va = &args; dev_err(dev, "ASoC error (%d): %pV", ret, &vaf); + va_end(args); } return ret; diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 71a18f156de2..f615b8d1c802 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -223,7 +223,7 @@ static int psp_send_cmd(struct acp_dev_data *adata, int cmd) { struct snd_sof_dev *sdev = adata->dev; int ret; - u32 data; + int data; if (!cmd) return -EINVAL; diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 99e98b5e3241..586ef13097a3 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -894,8 +894,9 @@ find_format_descriptor(struct usb_interface *interface) struct uac_format_type_i_discrete_descriptor *desc; desc = (struct uac_format_type_i_discrete_descriptor *)extra; - if (desc->bLength > extralen) { - dev_err(&interface->dev, "descriptor overflow\n"); + if (desc->bLength < sizeof(struct usb_descriptor_header) || + desc->bLength > extralen) { + dev_err(&interface->dev, "invalid descriptor length\n"); return NULL; } if (desc->bLength == UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1) && diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index a75a4610663e..da3c2741ef57 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -6707,6 +6707,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) err = scarlett2_add_new_ctl( mixer, &scarlett2_autogain_status_ctl, i, 1, s, &private->autogain_status_ctls[i]); + if (err < 0) + return err; } /* Add autogain target controls */ @@ -9186,12 +9188,15 @@ static long scarlett2_hwdep_write(struct snd_hwdep *hw, flash_size = private->flash_segment_blocks[segment_id] * SCARLETT2_FLASH_BLOCK_SIZE; - if (count < 0 || *offset < 0 || *offset + count >= flash_size) - return -ENOSPC; + if (count < 0 || *offset < 0) + return -EINVAL; if (!count) return 0; + if (*offset >= flash_size || count > flash_size - *offset) + return -ENOSPC; + /* Limit the *req size to SCARLETT2_FLASH_RW_MAX */ if (count > max_data_size) count = max_data_size; diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 15aadaf24a66..c13b779bf118 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -999,6 +999,56 @@ TEST_F(hmm, migrate) } /* + * Migrate private file memory to device private memory. + */ +TEST_F(hmm, migrate_file_private) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* * Migrate anonymous memory to device private memory and fault some of it back * to system memory, then try migrating the resulting mix of system and device * private memory to the device. diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index d9173f2312b7..1edb2d785d93 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -97,7 +97,7 @@ RUN_ALL=false RUN_DESTRUCTIVE=false TAP_PREFIX="# " -while getopts "aht:n" OPT; do +while getopts "aht:nd" OPT; do case ${OPT} in "a") RUN_ALL=true ;; "h") usage ;; diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index c368fc045f4b..d5134f93f7d7 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -268,6 +268,17 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; } +static __always_inline __u16 csum_fold_helper(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + return ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline __u16 csum_fold_udp_helper(__u32 csum) +{ + return csum_fold_helper(csum) ? : 0xffff; +} + static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) { void *data_end = (void *)(long)ctx->data_end; @@ -281,21 +292,22 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) if (eth->h_proto == bpf_htons(ETH_P_IP)) { struct iphdr *iph = data + sizeof(*eth); - __u16 total_len; if (iph + 1 > (struct iphdr *)data_end) return NULL; - iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); - udph = (void *)eth + sizeof(*iph) + sizeof(*eth); if (!udph || udph + 1 > (struct udphdr *)data_end) return NULL; - len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + len = iph->tot_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + iph->tot_len = len_new; + iph->check = csum_fold_helper( + bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), ~((__u32)iph->check))); } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { struct ipv6hdr *ipv6h = data + sizeof(*eth); - __u16 payload_len; if (ipv6h + 1 > (struct ipv6hdr *)data_end) return NULL; @@ -304,33 +316,27 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) if (!udph || udph + 1 > (struct udphdr *)data_end) return NULL; - *udp_csum = ~((__u32)udph->check); - len = ipv6h->payload_len; len_new = bpf_htons(bpf_ntohs(len) + offset); ipv6h->payload_len = len_new; - - *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, - sizeof(len_new), *udp_csum); - - len = udph->len; - len_new = bpf_htons(bpf_ntohs(udph->len) + offset); - *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, - sizeof(len_new), *udp_csum); } else { return NULL; } + len = udph->len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + + *udp_csum = ~((__u32)udph->check); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + udph->len = len_new; return udph; } -static __u16 csum_fold_helper(__u32 csum) -{ - return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; -} - static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, __u32 hdr_len) { @@ -359,7 +365,7 @@ static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, return -1; udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) return -1; @@ -403,7 +409,7 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) return -1; udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); buff_len = bpf_xdp_get_buff_len(ctx); @@ -483,8 +489,7 @@ static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, return -1; udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); - - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) return -1; @@ -541,7 +546,7 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, return -1; udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) return -1; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index cbd23444c8a9..ac47979349a4 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1220,6 +1220,17 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) goto fail; } + /* + * The kernel may reduce nr_hw_queues (e.g. capped to nr_cpu_ids). + * Cap nthreads to the actual queue count to avoid creating extra + * handler threads that will hang during device removal. + * + * per_io_tasks mode is excluded: threads interleave across all + * queues so nthreads > nr_hw_queues is valid and intentional. + */ + if (!ctx->per_io_tasks && dev->nthreads > info->nr_hw_queues) + dev->nthreads = info->nr_hw_queues; + ret = ublk_start_daemon(ctx, dev); ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); if (ret < 0) |
