summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_inode_storage.c9
-rw-r--r--kernel/bpf/core.c68
-rw-r--r--kernel/bpf/disasm.c5
-rw-r--r--kernel/bpf/dispatcher.c2
-rw-r--r--kernel/bpf/verifier.c34
-rw-r--r--kernel/fork.c9
6 files changed, 104 insertions, 23 deletions
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 0da8d923e39d..f9e81060c1f4 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -178,6 +178,15 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
+ /*
+ * Do not allow allocation of BPF_MAP_TYPE_INODE_STORAGE if the BPF LSM
+ * was not initialized by the LSM framework at boot. Without proper
+ * initialization, the BPF inode security blob offset remains unprepared,
+ * causing bpf_inode() to calculate an invalid memory offset and corrupt
+ * inode->i_security.
+ */
+ if (!bpf_lsm_initialized)
+ return ERR_PTR(-EOPNOTSUPP);
return bpf_local_storage_map_alloc(attr, &inode_cache);
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 649cce41e13f..6e19a030da6f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -20,6 +20,7 @@
#include <uapi/linux/btf.h>
#include <linux/filter.h>
#include <linux/skbuff.h>
+#include <linux/static_call.h>
#include <linux/vmalloc.h>
#include <linux/prandom.h>
#include <linux/bpf.h>
@@ -875,6 +876,7 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
struct bpf_prog_pack {
struct list_head list;
void *ptr;
+ bool arch_flush_needed;
unsigned long bitmap[];
};
@@ -883,6 +885,15 @@ void bpf_jit_fill_hole_with_zero(void *area, unsigned int size)
memset(area, 0, size);
}
+DEFINE_STATIC_CALL_NULL(bpf_arch_pred_flush, bpf_arch_pred_flush);
+
+/*
+ * Enabled once bpf_arch_pred_flush points at a real flush routine. Lets the
+ * pack allocator test "is a predictor flush wired up at all" with a cheap
+ * static branch instead of repeatedly querying the static call target.
+ */
+DEFINE_STATIC_KEY_FALSE(bpf_pred_flush_enabled);
+
#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
static DEFINE_MUTEX(pack_mutex);
@@ -918,6 +929,8 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins
bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE);
bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
+ if (static_branch_unlikely(&bpf_pred_flush_enabled))
+ pack->arch_flush_needed = true;
set_vm_flush_reset_perms(pack->ptr);
err = set_memory_rox((unsigned long)pack->ptr,
BPF_PROG_PACK_SIZE / PAGE_SIZE);
@@ -932,15 +945,23 @@ out:
return NULL;
}
-void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
+void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic)
{
unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
- struct bpf_prog_pack *pack;
- unsigned long pos;
+ struct bpf_prog_pack *pack, *fallback_pack = NULL;
+ unsigned long pos, fallback_pos = 0;
void *ptr = NULL;
mutex_lock(&pack_mutex);
if (size > BPF_PROG_PACK_SIZE) {
+ /*
+ * Allocations larger than a pack get their own pages, and
+ * predictors are not flushed for such allocation. This is only
+ * safe because cBPF programs (the unprivileged attack surface)
+ * are bounded well below a pack size.
+ */
+ if (was_classic && static_branch_unlikely(&bpf_pred_flush_enabled))
+ pr_warn_once("BPF: Predictors not flushed for allocations greater than BPF_PROG_PACK_SIZE\n");
size = round_up(size, PAGE_SIZE);
ptr = bpf_jit_alloc_exec(size);
if (ptr) {
@@ -960,8 +981,29 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
list_for_each_entry(pack, &pack_list, list) {
pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
nbits, 0);
- if (pos < BPF_PROG_CHUNK_COUNT)
+ if (pos >= BPF_PROG_CHUNK_COUNT)
+ continue;
+ /* Flush not enabled, use any pack */
+ if (!static_branch_unlikely(&bpf_pred_flush_enabled))
goto found_free_area;
+ /*
+ * cBPF reuse of a dirty pack triggers a flush, so prefer a
+ * clean pack for cBPF. eBPF never flushes, so steer it to a
+ * dirty pack and keep clean packs free for cBPF.
+ */
+ if (was_classic ^ pack->arch_flush_needed)
+ goto found_free_area;
+ if (!fallback_pack) {
+ fallback_pack = pack;
+ fallback_pos = pos;
+ }
+ }
+
+ /* No preferred pack found */
+ if (fallback_pack) {
+ pack = fallback_pack;
+ pos = fallback_pos;
+ goto found_free_area;
}
pack = alloc_new_pack(bpf_fill_ill_insns);
@@ -971,6 +1013,16 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
pos = 0;
found_free_area:
+ /* Flush only for cBPF as it may contain a crafted gadget */
+ if (static_branch_unlikely(&bpf_pred_flush_enabled) &&
+ pack->arch_flush_needed &&
+ was_classic) {
+ struct bpf_prog_pack *p;
+
+ static_call_cond(bpf_arch_pred_flush)();
+ list_for_each_entry(p, &pack_list, list)
+ p->arch_flush_needed = false;
+ }
bitmap_set(pack->bitmap, pos, nbits);
ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
@@ -1008,6 +1060,9 @@ void bpf_prog_pack_free(void *ptr, u32 size)
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
bitmap_clear(pack->bitmap, pos, nbits);
+
+ if (static_branch_unlikely(&bpf_pred_flush_enabled))
+ pack->arch_flush_needed = true;
if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
BPF_PROG_CHUNK_COUNT, 0) == 0) {
list_del(&pack->list);
@@ -1130,7 +1185,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
struct bpf_binary_header **rw_header,
u8 **rw_image,
- bpf_jit_fill_hole_t bpf_fill_ill_insns)
+ bpf_jit_fill_hole_t bpf_fill_ill_insns,
+ bool was_classic)
{
struct bpf_binary_header *ro_header;
u32 size, hole, start;
@@ -1143,7 +1199,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
if (bpf_jit_charge_modmem(size))
return NULL;
- ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
+ ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns, was_classic);
if (!ro_header) {
bpf_jit_uncharge_modmem(size);
return NULL;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index f8a3c7eb451e..0391b3bc0073 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -323,7 +323,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
- insn->src_reg == BPF_PSEUDO_MAP_VALUE;
+ insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE ||
+ insn->src_reg == BPF_PSEUDO_BTF_ID;
char tmp[64];
if (is_ptr && !allow_ptr_leaks)
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index b77db7413f8c..ea2d60dc1fee 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -145,7 +145,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
- d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero);
+ d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero, false);
if (!d->image)
goto out;
d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 21a365d436a5..6515d4d3c003 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7996,9 +7996,10 @@ reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
return field;
}
-static int check_func_arg_reg_off(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, argno_t argno,
- enum bpf_arg_type arg_type)
+static int __check_func_arg_reg_off(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, argno_t argno,
+ enum bpf_arg_type arg_type,
+ bool btf_id_fixed_off_ok)
{
u32 type = reg->type;
@@ -8055,12 +8056,11 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
- * can be non-zero. This was already checked above. So pass
- * fixed_off_ok as true to allow fixed offset for all other
- * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
- * still need to do checks instead of returning.
+ * can be non-zero unless the caller requires otherwise.
+ * var_off always must be 0 for PTR_TO_BTF_ID, hence we still
+ * need to do checks instead of returning.
*/
- return __check_ptr_off_reg(env, reg, argno, true);
+ return __check_ptr_off_reg(env, reg, argno, btf_id_fixed_off_ok);
case PTR_TO_CTX:
/*
* Allow fixed and variable offsets for syscall context, but
@@ -8076,6 +8076,13 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
}
}
+static int check_func_arg_reg_off(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, argno_t argno,
+ enum bpf_arg_type arg_type)
+{
+ return __check_func_arg_reg_off(env, reg, argno, arg_type, true);
+}
+
static int check_arg_const_str(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, argno_t argno)
{
@@ -11947,6 +11954,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
enum bpf_arg_type arg_type = ARG_DONTCARE;
argno_t argno = argno_from_arg(i + 1);
int regno = reg_from_argno(argno);
+ bool btf_id_fixed_off_ok = true;
u32 ref_id, type_size;
bool is_ret_buf_sz = false;
int kf_arg_type;
@@ -12120,7 +12128,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_MEM:
case KF_ARG_PTR_TO_MEM_SIZE:
case KF_ARG_PTR_TO_CALLBACK:
- case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
case KF_ARG_PTR_TO_CONST_STR:
case KF_ARG_PTR_TO_WORKQUEUE:
case KF_ARG_PTR_TO_TIMER:
@@ -12134,6 +12141,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_CTX:
arg_type = ARG_PTR_TO_CTX;
break;
+ case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+ arg_type = ARG_PTR_TO_BTF_ID;
+ btf_id_fixed_off_ok = false;
+ break;
default:
verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
return -EFAULT;
@@ -12141,7 +12152,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (regno == meta->release_regno)
arg_type |= OBJ_RELEASE;
- ret = check_func_arg_reg_off(env, reg, argno, arg_type);
+ ret = __check_func_arg_reg_off(env, reg, argno, arg_type,
+ btf_id_fixed_off_ok);
if (ret < 0)
return ret;
@@ -19994,13 +20006,13 @@ err_unlock:
if (!is_priv)
mutex_unlock(&bpf_verifier_lock);
bpf_clear_insn_aux_data(env, 0, env->prog->len);
- vfree(env->insn_aux_data);
err_free_env:
bpf_stack_liveness_free(env);
kvfree(env->cfg.insn_postorder);
kvfree(env->scc_info);
kvfree(env->succ);
kvfree(env->gotox_tmp_buf);
+ vfree(env->insn_aux_data);
kvfree(env);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 13e38e89a1f3..f0e2e131a9a5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1009,6 +1009,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->mm_cid.active = 0;
INIT_HLIST_NODE(&tsk->mm_cid.node);
#endif
+
+#ifdef CONFIG_BPF_SYSCALL
+ RCU_INIT_POINTER(tsk->bpf_storage, NULL);
+ tsk->bpf_ctx = NULL;
+#endif
return tsk;
free_stack:
@@ -2247,10 +2252,6 @@ __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
-#ifdef CONFIG_BPF_SYSCALL
- RCU_INIT_POINTER(p->bpf_storage, NULL);
- p->bpf_ctx = NULL;
-#endif
unwind_task_init(p);