summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2026-03-27 07:16:47 +0100
committerAndrew Morton <akpm@linux-foundation.org>2026-04-02 23:36:18 -0700
commit3f276cece4dd9e8bf199d9bf3901eef8ca904c2d (patch)
tree0417f9fb501587b9a23f06c30d797b50c8b7301b /lib
parent033bee3e49631bd0c7e081aeafeadc7623495107 (diff)
powerpc: move the XOR code to lib/raid/
Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-16-hch@lst.de Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Eric Biggers <ebiggers@kernel.org> Tested-by: Eric Biggers <ebiggers@kernel.org> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alexandre Ghiti <alex@ghiti.fr> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: "Borislav Petkov (AMD)" <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Mason <clm@fb.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: David Sterba <dsterba@suse.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Johannes Berg <johannes@sipsolutions.net> Cc: Li Nan <linan122@huawei.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Magnus Lindholm <linmag7@gmail.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Richard Henderson <richard.henderson@linaro.org> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@armlinux.org.uk> Cc: Song Liu <song@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/raid/xor/Makefile5
-rw-r--r--lib/raid/xor/powerpc/xor_vmx.c156
-rw-r--r--lib/raid/xor/powerpc/xor_vmx.h22
-rw-r--r--lib/raid/xor/powerpc/xor_vmx_glue.c67
4 files changed, 250 insertions, 0 deletions
diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile
index e8868f5fc396..006b44ce46bf 100644
--- a/lib/raid/xor/Makefile
+++ b/lib/raid/xor/Makefile
@@ -16,6 +16,7 @@ endif
xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o
xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o
xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o
+xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o
CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU)
@@ -23,3 +24,7 @@ CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU)
CFLAGS_arm64/xor-neon.o += $(CC_FLAGS_FPU)
CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU)
+
+CFLAGS_powerpc/xor_vmx.o += -mhard-float -maltivec \
+ $(call cc-option,-mabi=altivec) \
+ -isystem $(shell $(CC) -print-file-name=include)
diff --git a/lib/raid/xor/powerpc/xor_vmx.c b/lib/raid/xor/powerpc/xor_vmx.c
new file mode 100644
index 000000000000..aab49d056d18
--- /dev/null
+++ b/lib/raid/xor/powerpc/xor_vmx.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+/*
+ * Sparse (as at v0.5.0) gets very, very confused by this file.
+ * Make it a bit simpler for it.
+ */
+#if !defined(__CHECKER__)
+#include <altivec.h>
+#else
+#define vec_xor(a, b) a ^ b
+#define vector __attribute__((vector_size(16)))
+#endif
+
+#include "xor_vmx.h"
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V) \
+ unative_t *V = (unative_t *)V##_in; \
+ unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V) \
+ do { \
+ V##_0 = V[0]; \
+ V##_1 = V[1]; \
+ V##_2 = V[2]; \
+ V##_3 = V[3]; \
+ } while (0)
+
+#define STORE(V) \
+ do { \
+ V[0] = V##_0; \
+ V[1] = V##_1; \
+ V[2] = V##_2; \
+ V[3] = V##_3; \
+ } while (0)
+
+#define XOR(V1, V2) \
+ do { \
+ V1##_0 = vec_xor(V1##_0, V2##_0); \
+ V1##_1 = vec_xor(V1##_1, V2##_1); \
+ V1##_2 = vec_xor(V1##_2, V2##_2); \
+ V1##_3 = vec_xor(V1##_3, V2##_3); \
+ } while (0)
+
+void __xor_altivec_2(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ XOR(v1, v2);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ } while (--lines > 0);
+}
+
+void __xor_altivec_3(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ XOR(v1, v2);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ } while (--lines > 0);
+}
+
+void __xor_altivec_4(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ } while (--lines > 0);
+}
+
+void __xor_altivec_5(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in,
+ const unsigned long * __restrict v5_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ DEFINE(v5);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ LOAD(v5);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v5);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ v5 += 4;
+ } while (--lines > 0);
+}
diff --git a/lib/raid/xor/powerpc/xor_vmx.h b/lib/raid/xor/powerpc/xor_vmx.h
new file mode 100644
index 000000000000..573c41d90dac
--- /dev/null
+++ b/lib/raid/xor/powerpc/xor_vmx.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple interface to link xor_vmx.c and xor_vmx_glue.c
+ *
+ * Separating these file ensures that no altivec instructions are run
+ * outside of the enable/disable altivec block.
+ */
+
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c
new file mode 100644
index 000000000000..c41e38340700
--- /dev/null
+++ b/lib/raid/xor/powerpc/xor_vmx_glue.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Altivec XOR operations
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/raid/xor_impl.h>
+#include <asm/switch_to.h>
+#include <asm/xor.h>
+#include "xor_vmx.h"
+
+static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_2(bytes, p1, p2);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_3(bytes, p1, p2, p3);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_4(bytes, p1, p2, p3, p4);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+struct xor_block_template xor_block_altivec = {
+ .name = "altivec",
+ .do_2 = xor_altivec_2,
+ .do_3 = xor_altivec_3,
+ .do_4 = xor_altivec_4,
+ .do_5 = xor_altivec_5,
+};