summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-03-09 09:28:43 -0700
committerAlexei Starovoitov <ast@kernel.org>2026-03-09 09:28:43 -0700
commitbd2e02e3c9215305dfa344c050d5822f19929cf7 (patch)
tree4782826af167dab0c11f8dd9f8ffa600dc5328dc /kernel
parent099bded7525d9803f62bc5a1ed60e2c9ec4851e0 (diff)
parentfcec7c66d68165b69b3cef6af913a435a25e36a1 (diff)
Merge branch 'always-allow-sleepable-and-fmod_ret-programs-on-syscalls'
Viktor Malik says: ==================== Always allow sleepable and fmod_ret programs on syscalls Both sleepable and fmod_ret programs are only allowed on selected functions. For convenience, the error injection list was originally used. When error injection is disabled, that list is empty and sleepable tracing programs, as well as fmod_ret programs, are effectively unavailable. This patch series addresses the issue by at least enabling sleepable and fmod_ret programs on syscalls, if error injection is disabled. More details on why syscalls are used can be found in [1]. [1] https://lore.kernel.org/bpf/CAADnVQK6qP8izg+k9yV0vdcT-+=axtFQ2fKw7D-2Ei-V6WS5Dw@mail.gmail.com/ Changes in v3: - Handle LoongArch (Leon) - Add Kumar's and Leon's acks Changes in v2: - Check "sys_" prefix instead of "sys" for powerpc syscalls (AI review) - Add link to the original discussion (Kumar) - Add explanation why arch syscall prefixes are hard-coded (Leon) ==================== Link: https://patch.msgid.link/cover.1773055375.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c85
1 files changed, 71 insertions, 14 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7aa06f534cb2..8e4f69918693 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -24952,14 +24952,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
#define SECURITY_PREFIX "security_"
-static int check_attach_modify_return(unsigned long addr, const char *func_name)
-{
- if (within_error_injection_list(addr) ||
- !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
- return 0;
-
- return -EINVAL;
-}
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
/* list of non-sleepable functions that are otherwise on
* ALLOW_ERROR_INJECTION list
@@ -24982,6 +24975,75 @@ static int check_non_sleepable_error_inject(u32 btf_id)
return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
}
+static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
+{
+ /* fentry/fexit/fmod_ret progs can be sleepable if they are
+ * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+ */
+ if (!check_non_sleepable_error_inject(btf_id) &&
+ within_error_injection_list(addr))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
+{
+ if (within_error_injection_list(addr) ||
+ !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
+ return 0;
+
+ return -EINVAL;
+}
+
+#else
+
+/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
+ * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
+ * but that just compares two concrete function names.
+ */
+static bool has_arch_syscall_prefix(const char *func_name)
+{
+#if defined(__x86_64__)
+ return !strncmp(func_name, "__x64_", 6);
+#elif defined(__i386__)
+ return !strncmp(func_name, "__ia32_", 7);
+#elif defined(__s390x__)
+ return !strncmp(func_name, "__s390x_", 8);
+#elif defined(__aarch64__)
+ return !strncmp(func_name, "__arm64_", 8);
+#elif defined(__riscv)
+ return !strncmp(func_name, "__riscv_", 8);
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ return !strncmp(func_name, "sys_", 4);
+#elif defined(__loongarch__)
+ return !strncmp(func_name, "sys_", 4);
+#else
+ return false;
+#endif
+}
+
+/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
+
+static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
+{
+ if (has_arch_syscall_prefix(func_name))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
+{
+ if (has_arch_syscall_prefix(func_name) ||
+ !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
+ return 0;
+
+ return -EINVAL;
+}
+
+#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
@@ -25261,12 +25323,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
ret = -EINVAL;
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
-
- /* fentry/fexit/fmod_ret progs can be sleepable if they are
- * attached to ALLOW_ERROR_INJECTION and are not in denylist.
- */
- if (!check_non_sleepable_error_inject(btf_id) &&
- within_error_injection_list(addr))
+ if (!check_attach_sleepable(btf_id, addr, tname))
ret = 0;
/* fentry/fexit/fmod_ret progs can also be sleepable if they are
* in the fmodret id set with the KF_SLEEPABLE flag.