diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/bpf_inode_storage.c | 9 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 68 | ||||
| -rw-r--r-- | kernel/bpf/disasm.c | 5 | ||||
| -rw-r--r-- | kernel/bpf/dispatcher.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 34 | ||||
| -rw-r--r-- | kernel/fork.c | 9 |
6 files changed, 104 insertions, 23 deletions
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 0da8d923e39d..f9e81060c1f4 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -178,6 +178,15 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { + /* + * Do not allow allocation of BPF_MAP_TYPE_INODE_STORAGE if the BPF LSM + * was not initialized by the LSM framework at boot. Without proper + * initialization, the BPF inode security blob offset remains unprepared, + * causing bpf_inode() to calculate an invalid memory offset and corrupt + * inode->i_security. + */ + if (!bpf_lsm_initialized) + return ERR_PTR(-EOPNOTSUPP); return bpf_local_storage_map_alloc(attr, &inode_cache); } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 649cce41e13f..6e19a030da6f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -20,6 +20,7 @@ #include <uapi/linux/btf.h> #include <linux/filter.h> #include <linux/skbuff.h> +#include <linux/static_call.h> #include <linux/vmalloc.h> #include <linux/prandom.h> #include <linux/bpf.h> @@ -875,6 +876,7 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_prog_pack { struct list_head list; void *ptr; + bool arch_flush_needed; unsigned long bitmap[]; }; @@ -883,6 +885,15 @@ void bpf_jit_fill_hole_with_zero(void *area, unsigned int size) memset(area, 0, size); } +DEFINE_STATIC_CALL_NULL(bpf_arch_pred_flush, bpf_arch_pred_flush); + +/* + * Enabled once bpf_arch_pred_flush points at a real flush routine. Lets the + * pack allocator test "is a predictor flush wired up at all" with a cheap + * static branch instead of repeatedly querying the static call target. + */ +DEFINE_STATIC_KEY_FALSE(bpf_pred_flush_enabled); + #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) static DEFINE_MUTEX(pack_mutex); @@ -918,6 +929,8 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); + if (static_branch_unlikely(&bpf_pred_flush_enabled)) + pack->arch_flush_needed = true; set_vm_flush_reset_perms(pack->ptr); err = set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); @@ -932,15 +945,23 @@ out: return NULL; } -void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); - struct bpf_prog_pack *pack; - unsigned long pos; + struct bpf_prog_pack *pack, *fallback_pack = NULL; + unsigned long pos, fallback_pos = 0; void *ptr = NULL; mutex_lock(&pack_mutex); if (size > BPF_PROG_PACK_SIZE) { + /* + * Allocations larger than a pack get their own pages, and + * predictors are not flushed for such allocation. This is only + * safe because cBPF programs (the unprivileged attack surface) + * are bounded well below a pack size. + */ + if (was_classic && static_branch_unlikely(&bpf_pred_flush_enabled)) + pr_warn_once("BPF: Predictors not flushed for allocations greater than BPF_PROG_PACK_SIZE\n"); size = round_up(size, PAGE_SIZE); ptr = bpf_jit_alloc_exec(size); if (ptr) { @@ -960,8 +981,29 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) list_for_each_entry(pack, &pack_list, list) { pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, nbits, 0); - if (pos < BPF_PROG_CHUNK_COUNT) + if (pos >= BPF_PROG_CHUNK_COUNT) + continue; + /* Flush not enabled, use any pack */ + if (!static_branch_unlikely(&bpf_pred_flush_enabled)) goto found_free_area; + /* + * cBPF reuse of a dirty pack triggers a flush, so prefer a + * clean pack for cBPF. eBPF never flushes, so steer it to a + * dirty pack and keep clean packs free for cBPF. + */ + if (was_classic ^ pack->arch_flush_needed) + goto found_free_area; + if (!fallback_pack) { + fallback_pack = pack; + fallback_pos = pos; + } + } + + /* No preferred pack found */ + if (fallback_pack) { + pack = fallback_pack; + pos = fallback_pos; + goto found_free_area; } pack = alloc_new_pack(bpf_fill_ill_insns); @@ -971,6 +1013,16 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) pos = 0; found_free_area: + /* Flush only for cBPF as it may contain a crafted gadget */ + if (static_branch_unlikely(&bpf_pred_flush_enabled) && + pack->arch_flush_needed && + was_classic) { + struct bpf_prog_pack *p; + + static_call_cond(bpf_arch_pred_flush)(); + list_for_each_entry(p, &pack_list, list) + p->arch_flush_needed = false; + } bitmap_set(pack->bitmap, pos, nbits); ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT); @@ -1008,6 +1060,9 @@ void bpf_prog_pack_free(void *ptr, u32 size) "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); bitmap_clear(pack->bitmap, pos, nbits); + + if (static_branch_unlikely(&bpf_pred_flush_enabled)) + pack->arch_flush_needed = true; if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, BPF_PROG_CHUNK_COUNT, 0) == 0) { list_del(&pack->list); @@ -1130,7 +1185,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, struct bpf_binary_header **rw_header, u8 **rw_image, - bpf_jit_fill_hole_t bpf_fill_ill_insns) + bpf_jit_fill_hole_t bpf_fill_ill_insns, + bool was_classic) { struct bpf_binary_header *ro_header; u32 size, hole, start; @@ -1143,7 +1199,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, if (bpf_jit_charge_modmem(size)) return NULL; - ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns); + ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns, was_classic); if (!ro_header) { bpf_jit_uncharge_modmem(size); return NULL; diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index f8a3c7eb451e..0391b3bc0073 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -323,7 +323,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD || - insn->src_reg == BPF_PSEUDO_MAP_VALUE; + insn->src_reg == BPF_PSEUDO_MAP_VALUE || + insn->src_reg == BPF_PSEUDO_MAP_IDX || + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE || + insn->src_reg == BPF_PSEUDO_BTF_ID; char tmp[64]; if (is_ptr && !allow_ptr_leaks) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index b77db7413f8c..ea2d60dc1fee 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -145,7 +145,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero); + d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero, false); if (!d->image) goto out; d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 21a365d436a5..6515d4d3c003 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7996,9 +7996,10 @@ reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) return field; } -static int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, argno_t argno, - enum bpf_arg_type arg_type) +static int __check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, argno_t argno, + enum bpf_arg_type arg_type, + bool btf_id_fixed_off_ok) { u32 type = reg->type; @@ -8055,12 +8056,11 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: /* When referenced PTR_TO_BTF_ID is passed to release function, * its fixed offset must be 0. In the other cases, fixed offset - * can be non-zero. This was already checked above. So pass - * fixed_off_ok as true to allow fixed offset for all other - * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we - * still need to do checks instead of returning. + * can be non-zero unless the caller requires otherwise. + * var_off always must be 0 for PTR_TO_BTF_ID, hence we still + * need to do checks instead of returning. */ - return __check_ptr_off_reg(env, reg, argno, true); + return __check_ptr_off_reg(env, reg, argno, btf_id_fixed_off_ok); case PTR_TO_CTX: /* * Allow fixed and variable offsets for syscall context, but @@ -8076,6 +8076,13 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, } } +static int check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, argno_t argno, + enum bpf_arg_type arg_type) +{ + return __check_func_arg_reg_off(env, reg, argno, arg_type, true); +} + static int check_arg_const_str(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno) { @@ -11947,6 +11954,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ enum bpf_arg_type arg_type = ARG_DONTCARE; argno_t argno = argno_from_arg(i + 1); int regno = reg_from_argno(argno); + bool btf_id_fixed_off_ok = true; u32 ref_id, type_size; bool is_ret_buf_sz = false; int kf_arg_type; @@ -12120,7 +12128,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM: case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: - case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: case KF_ARG_PTR_TO_TIMER: @@ -12134,6 +12141,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_CTX: arg_type = ARG_PTR_TO_CTX; break; + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + arg_type = ARG_PTR_TO_BTF_ID; + btf_id_fixed_off_ok = false; + break; default: verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type); return -EFAULT; @@ -12141,7 +12152,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (regno == meta->release_regno) arg_type |= OBJ_RELEASE; - ret = check_func_arg_reg_off(env, reg, argno, arg_type); + ret = __check_func_arg_reg_off(env, reg, argno, arg_type, + btf_id_fixed_off_ok); if (ret < 0) return ret; @@ -19994,13 +20006,13 @@ err_unlock: if (!is_priv) mutex_unlock(&bpf_verifier_lock); bpf_clear_insn_aux_data(env, 0, env->prog->len); - vfree(env->insn_aux_data); err_free_env: bpf_stack_liveness_free(env); kvfree(env->cfg.insn_postorder); kvfree(env->scc_info); kvfree(env->succ); kvfree(env->gotox_tmp_buf); + vfree(env->insn_aux_data); kvfree(env); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index 13e38e89a1f3..f0e2e131a9a5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1009,6 +1009,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->mm_cid.active = 0; INIT_HLIST_NODE(&tsk->mm_cid.node); #endif + +#ifdef CONFIG_BPF_SYSCALL + RCU_INIT_POINTER(tsk->bpf_storage, NULL); + tsk->bpf_ctx = NULL; +#endif return tsk; free_stack: @@ -2247,10 +2252,6 @@ __latent_entropy struct task_struct *copy_process( p->sequential_io = 0; p->sequential_io_avg = 0; #endif -#ifdef CONFIG_BPF_SYSCALL - RCU_INIT_POINTER(p->bpf_storage, NULL); - p->bpf_ctx = NULL; -#endif unwind_task_init(p); |
