summaryrefslogtreecommitdiff
path: root/lib/raid/raid6/x86/sse1.c
blob: f4b260df522a38637b2e72a4fadd64ede712f02e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright 2002 H. Peter Anvin - All Rights Reserved
 *
 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions.
 *
 * This is really an MMX implementation, but it requires SSE-1 or AMD MMXEXT for
 * prefetch support and a few other features.  The support for nontemporal
 * memory accesses is enough to make this worthwhile as a separate
 * implementation.
 */

#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
#include "algos.h"

/* Defined in raid6/mmx.c */
extern const struct raid6_mmx_constants {
	u64 x1d;
} raid6_mmx_constants;

/*
 * Plain SSE1 implementation
 */
static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	u8 **dptr = (u8 **)ptrs;
	u8 *p, *q;
	int d, z, z0;

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	kernel_fpu_begin();

	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
	asm volatile("pxor %mm5,%mm5");	/* Zero temp */

	for ( d = 0 ; d < bytes ; d += 8 ) {
		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
		asm volatile("movq %mm2,%mm4");	/* Q[0] */
		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
		for ( z = z0-2 ; z >= 0 ; z-- ) {
			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
			asm volatile("pcmpgtb %mm4,%mm5");
			asm volatile("paddb %mm4,%mm4");
			asm volatile("pand %mm0,%mm5");
			asm volatile("pxor %mm5,%mm4");
			asm volatile("pxor %mm5,%mm5");
			asm volatile("pxor %mm6,%mm2");
			asm volatile("pxor %mm6,%mm4");
			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
		}
		asm volatile("pcmpgtb %mm4,%mm5");
		asm volatile("paddb %mm4,%mm4");
		asm volatile("pand %mm0,%mm5");
		asm volatile("pxor %mm5,%mm4");
		asm volatile("pxor %mm5,%mm5");
		asm volatile("pxor %mm6,%mm2");
		asm volatile("pxor %mm6,%mm4");

		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
	}

	asm volatile("sfence" : : : "memory");
	kernel_fpu_end();
}

const struct raid6_calls raid6_sse1x1 = {
	.gen_syndrome	= raid6_sse11_gen_syndrome,
	.name		= "sse1x1",
};

/*
 * Unrolled-by-2 SSE1 implementation
 */
static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	u8 **dptr = (u8 **)ptrs;
	u8 *p, *q;
	int d, z, z0;

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	kernel_fpu_begin();

	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
	asm volatile("pxor %mm7,%mm7"); /* Zero temp */

	/* We uniformly assume a single prefetch covers at least 16 bytes */
	for ( d = 0 ; d < bytes ; d += 16 ) {
		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
		asm volatile("movq %mm2,%mm4");	/* Q[0] */
		asm volatile("movq %mm3,%mm6"); /* Q[1] */
		for ( z = z0-1 ; z >= 0 ; z-- ) {
			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
			asm volatile("pcmpgtb %mm4,%mm5");
			asm volatile("pcmpgtb %mm6,%mm7");
			asm volatile("paddb %mm4,%mm4");
			asm volatile("paddb %mm6,%mm6");
			asm volatile("pand %mm0,%mm5");
			asm volatile("pand %mm0,%mm7");
			asm volatile("pxor %mm5,%mm4");
			asm volatile("pxor %mm7,%mm6");
			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
			asm volatile("pxor %mm5,%mm2");
			asm volatile("pxor %mm7,%mm3");
			asm volatile("pxor %mm5,%mm4");
			asm volatile("pxor %mm7,%mm6");
			asm volatile("pxor %mm5,%mm5");
			asm volatile("pxor %mm7,%mm7");
		}
		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
	}

	asm volatile("sfence" : :: "memory");
	kernel_fpu_end();
}

const struct raid6_calls raid6_sse1x2 = {
	.gen_syndrome	= raid6_sse12_gen_syndrome,
	.name		= "sse1x2",
};