From 0c93b7d85d40b690f04786ea0f18798b73182e4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 12:06:51 -0400 Subject: bpf: reject invalid names right in ->lookup() ... and other methods won't see them at all Signed-off-by: Al Viro --- kernel/bpf/inode.c | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index f2ece3c174a5..35d21c189bb0 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -119,18 +119,10 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) return 0; } -static bool bpf_dname_reserved(const struct dentry *dentry) -{ - return strchr(dentry->d_name.name, '.'); -} - static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; - if (bpf_dname_reserved(dentry)) - return -EPERM; - inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -152,9 +144,6 @@ static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, { struct inode *inode; - if (bpf_dname_reserved(dentry)) - return -EPERM; - inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -187,31 +176,21 @@ static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, } } -static int bpf_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry) +static struct dentry * +bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { - if (bpf_dname_reserved(new_dentry)) - return -EPERM; - - return simple_link(old_dentry, dir, new_dentry); -} - -static int bpf_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - if (bpf_dname_reserved(new_dentry)) - return -EPERM; - - return simple_rename(old_dir, old_dentry, new_dir, new_dentry); + if (strchr(dentry->d_name.name, '.')) + return ERR_PTR(-EPERM); + return simple_lookup(dir, dentry, flags); } static const struct inode_operations bpf_dir_iops = { - .lookup = simple_lookup, + .lookup = bpf_lookup, .mknod = bpf_mkobj, .mkdir = bpf_mkdir, .rmdir = simple_rmdir, - .rename = bpf_rename, - .link = bpf_link, + .rename = simple_rename, + .link = simple_link, .unlink = simple_unlink, }; -- cgit v1.2.3 From cfbcf468454ab4b20f0b4b62da51920b99fdb19e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 28 Apr 2016 12:30:53 -0300 Subject: perf core: Pass max stack as a perf_callchain_entry context This makes perf_callchain_{user,kernel}() receive the max stack as context for the perf_callchain_entry, instead of accessing the global sysctl_perf_event_max_stack. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/bpf/stackmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index f5a19548be12..a82d7605db3f 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -136,7 +136,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; - trace = get_perf_callchain(regs, init_nr, kernel, user, false, false); + trace = get_perf_callchain(regs, init_nr, kernel, user, + sysctl_perf_event_max_stack, false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ -- cgit v1.2.3 From b7552e1bccbe3da9c8e7386c6188e8ea4667c8e7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 May 2016 14:14:28 +0200 Subject: bpf: rather use get_random_int for randomizations Start address randomization and blinding in BPF currently use prandom_u32(). prandom_u32() values are not exposed to unpriviledged user space to my knowledge, but given other kernel facilities such as ASLR, stack canaries, etc make use of stronger get_random_int(), we better make use of it here as well given blinding requests successively new random values. get_random_int() has minimal entropy pool depletion, is not cryptographically secure, but doesn't need to be for our use cases here. Suggested-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f1e8a0def99b..b94a36550591 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -231,7 +231,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, hdr->pages = size / PAGE_SIZE; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); - start = (prandom_u32() % hole) & ~(alignment - 1); + start = (get_random_int() % hole) & ~(alignment - 1); /* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; @@ -251,7 +251,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, struct bpf_insn *to_buff) { struct bpf_insn *to = to_buff; - u32 imm_rnd = prandom_u32(); + u32 imm_rnd = get_random_int(); s16 off; BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); -- cgit v1.2.3 From e27f4a942a0ee4b84567a3c6cfa84f273e55cbb7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 May 2016 17:22:48 -0500 Subject: bpf: Use mount_nodev not mount_ns to mount the bpf filesystem While reviewing the filesystems that set FS_USERNS_MOUNT I spotted the bpf filesystem. Looking at the code I saw a broken usage of mount_ns with current->nsproxy->mnt_ns. As the code does not acquire a reference to the mount namespace it can not possibly be correct to store the mount namespace on the superblock as it does. Replace mount_ns with mount_nodev so that each mount of the bpf filesystem returns a distinct instance, and the code is not buggy. In discussion with Hannes Frederic Sowa it was reported that the use of mount_ns was an attempt to have one bpf instance per mount namespace, in an attempt to keep resources that pin resources from hiding. That intent simply does not work, the vfs is not built to allow that kind of behavior. Which means that the bpf filesystem really is buggy both semantically and in it's implemenation as it does not nor can it implement the original intent. This change is userspace visible, but my experience with similar filesystems leads me to believe nothing will break with a model of each mount of the bpf filesystem is distinct from all others. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Cc: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: "Eric W. Biederman" Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- kernel/bpf/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 71b75d9c81da..04be7021f848 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -357,7 +357,7 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *bpf_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { - return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); + return mount_nodev(type, flags, data, bpf_fill_super); } static struct file_system_type bpf_fs_type = { -- cgit v1.2.3 From d91b28ed42de99217efb2e8cb0357263d6fb737c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 19 May 2016 18:17:13 -0700 Subject: bpf: support decreasing order in direct packet access when packet headers are accessed in 'decreasing' order (like TCP port may be fetched before the program reads IP src) the llvm may generate the following code: [...] // R7=pkt(id=0,off=22,r=70) r2 = *(u32 *)(r7 +0) // good access [...] r7 += 40 // R7=pkt(id=0,off=62,r=70) r8 = *(u32 *)(r7 +0) // good access [...] r1 = *(u32 *)(r7 -20) // this one will fail though it's within a safe range // it's doing *(u32*)(skb->data + 42) Fix verifier to recognize such code pattern Alos turned out that 'off > range' condition is not a verifier bug. It's a buggy program that may do something like: if (ptr + 50 > data_end) return 0; ptr += 60; *(u32*)ptr; in such case emit "invalid access to packet, off=0 size=4, R1(id=0,off=60,r=50)" error message, so all information is available for the program author to fix the program. Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a08d66215245..d54e34874579 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -683,15 +683,11 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, { struct reg_state *regs = env->cur_state.regs; struct reg_state *reg = ®s[regno]; - int linear_size = (int) reg->range - (int) reg->off; - if (linear_size < 0 || linear_size >= MAX_PACKET_OFF) { - verbose("verifier bug\n"); - return -EFAULT; - } - if (off < 0 || off + size > linear_size) { - verbose("invalid access to packet, off=%d size=%d, allowed=%d\n", - off, size, linear_size); + off += reg->off; + if (off < 0 || off + size > reg->range) { + verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", + off, size, regno, reg->id, reg->off, reg->range); return -EACCES; } return 0; -- cgit v1.2.3 From 1b9b69ecb3a5236d4d3da0f0fa11af916371841e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 19 May 2016 18:17:14 -0700 Subject: bpf: teach verifier to recognize imm += ptr pattern Humans don't write C code like: u8 *ptr = skb->data; int imm = 4; imm += ptr; but from llvm backend point of view 'imm' and 'ptr' are registers and imm += ptr may be preferred vs ptr += imm depending which register value will be used further in the code, while verifier can only recognize ptr += imm. That caused small unrelated changes in the C code of the bpf program to trigger rejection by the verifier. Therefore teach the verifier to recognize both ptr += imm and imm += ptr. For example: when R6=pkt(id=0,off=0,r=62) R7=imm22 after r7 += r6 instruction will be R6=pkt(id=0,off=0,r=62) R7=pkt(id=0,off=22,r=62) Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d54e34874579..668e07903c8f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1245,6 +1245,7 @@ static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) struct reg_state *regs = env->cur_state.regs; struct reg_state *dst_reg = ®s[insn->dst_reg]; struct reg_state *src_reg = ®s[insn->src_reg]; + struct reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1267,6 +1268,19 @@ add_imm: */ dst_reg->off += imm; } else { + if (src_reg->type == PTR_TO_PACKET) { + /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ + tmp_reg = *dst_reg; /* save r7 state */ + *dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */ + src_reg = &tmp_reg; /* pretend it's src_reg state */ + /* if the checks below reject it, the copy won't matter, + * since we're rejecting the whole program. If all ok, + * then imm22 state will be added to r7 + * and r7 will be pkt(id=0,off=22,r=62) while + * r6 will stay as pkt(id=0,off=0,r=62) + */ + } + if (src_reg->type == CONST_IMM) { /* pkt_ptr += reg where reg is known constant */ imm = src_reg->imm; @@ -1565,7 +1579,9 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && - dst_reg->type == PTR_TO_PACKET) { + (dst_reg->type == PTR_TO_PACKET || + (BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == PTR_TO_PACKET))) { /* ptr_to_packet += K|X */ return check_packet_ptr_add(env, insn); } else if (BPF_CLASS(insn->code) == BPF_ALU64 && -- cgit v1.2.3 From 612bacad78ba6d0a91166fc4487af114bac172a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 22 May 2016 23:16:18 +0200 Subject: bpf, inode: disallow userns mounts Follow-up to commit e27f4a942a0e ("bpf: Use mount_nodev not mount_ns to mount the bpf filesystem"), which removes the FS_USERNS_MOUNT flag. The original idea was to have a per mountns instance instead of a single global fs instance, but that didn't work out and we had to switch to mount_nodev() model. The intent of that middle ground was that we avoid users who don't play nice to create endless instances of bpf fs which are difficult to control and discover from an admin point of view, but at the same time it would have allowed us to be more flexible with regard to namespaces. Therefore, since we now did the switch to mount_nodev() as a fix where individual instances are created, we also need to remove userns mount flag along with it to avoid running into mentioned situation. I don't expect any breakage at this early point in time with removing the flag and we can revisit this later should the requirement for this come up with future users. This and commit e27f4a942a0e have been split to facilitate tracking should any of them run into the unlikely case of causing a regression. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 04be7021f848..318858edb1cd 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -365,7 +365,6 @@ static struct file_system_type bpf_fs_type = { .name = "bpf", .mount = bpf_mount, .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT, }; MODULE_ALIAS_FS("bpf"); -- cgit v1.2.3