summaryrefslogtreecommitdiff
path: root/lib/libc/riscv/string/strlen.S
blob: 3beb160f2e6fc8d4622caac7fe773c7d84f3190a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
 */

#include <machine/asm.h>

/*
 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
 * uses haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)
 * which evalutates > 0 when there is zero in v
 *
 * register a0 - char *s
 */
ENTRY(strlen)
	/*
	 * register a0 - char *str_start
	 * register a1 - char *str_ptr
	 * register a2 - char[8] iter
	 */

	/* load constants for haszero */
	li t0, 0x0101010101010101
	slli t1, t0, 7				# 0x8080808080808080, avoid li

	/* check alignment of str_start */
	andi a1, a0, ~0b111
	ld a2, (a1)
	beq a1, a0, .Lhas_zero

	/* fill bytes before str_start with non-zero */
	slli t2, a0, 3
	addi t3, t2, -64
	neg t3, t3
	srl t3, t0, t3
	or a2, a2, t3

	/* unrolled iteration of haszero */
	not t2, a2
	sub a2, a2, t0
	and a2, a2, t2
	and a2, a2, t1

	bnez a2, .Lfind_zero

.Lloop_has_zero:
	ld a2, 8(a1)
	addi a1, a1, 8	# move ptr to next 8byte
.Lhas_zero:
	not t2, a2
	sub a2, a2, t0
	and a2, a2, t2
	and a2, a2, t1

	beqz a2, .Lloop_has_zero

.Lfind_zero:
	/* use (iter & -iter) to isolate lowest set bit */
	sub a3, zero, a2	#a3 = -iter
	and t1, a2, a3		#t1 = (iter & -iter)

	li t0, 0x0001020304050607
	srli t1, t1, 7
	/*
	 * lowest set bit is 2^(8*k)
	 * multiplying by it shifts the idx array in t0 by k bytes to the left
	 */
	mul	t1, t1, t0
	/* highest byte contains idx of first zero */
	srli t1, t1, 56

	add a1, a1, t1
	sub a0, a1, a0
	ret
END(strlen)