From 8bab8dded67d026c39367bbd5e27d2f6c556c38e Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Fri, 4 Apr 2008 14:29:57 -0700 Subject: cgroups: add cgroup support for enabling controllers at boot time The effects of cgroup_disable=foo are: - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem As a result there will only ever be one call to foo->create(), at init time; all processes will stay in this group, and the group will never be mounted on a visible hierarchy. Any additional effects (e.g. not allocating metadata) are up to the foo subsystem. This doesn't handle early_init subsystems (their "disabled" bit isn't set be, but it could easily be extended to do so if any of the early_init systems wanted it - I think it would just involve some nastier parameter processing since it would occur before the command-line argument parser had been run. Hugh said: Ballpark figures, I'm trying to get this question out rather than processing the exact numbers: CONFIG_CGROUP_MEM_RES_CTLR adds 15% overhead to the affected paths, booting with cgroup_disable=memory cuts that back to 1% overhead (due to slightly bigger struct page). I'm no expert on distros, they may have no interest whatever in CONFIG_CGROUP_MEM_RES_CTLR=y; and the rest of us can easily build with or without it, or apply the cgroup_disable=memory patches. Unix bench's execl test result on x86_64 was == just after boot without mounting any cgroup fs.== mem_cgorup=off : Execl Throughput 43.0 3150.1 732.6 mem_cgroup=on : Execl Throughput 43.0 2932.6 682.0 == [lizf@cn.fujitsu.com: fix boot option parsing] Signed-off-by: Balbir Singh Cc: Paul Menage Cc: Balbir Singh Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: David Rientjes Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 53d86b4b0ce0..62f1a5231fe9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -782,7 +782,14 @@ static int parse_cgroupfs_options(char *data, if (!*token) return -EINVAL; if (!strcmp(token, "all")) { - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; + /* Add all non-disabled subsystems */ + int i; + opts->subsys_bits = 0; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (!ss->disabled) + opts->subsys_bits |= 1ul << i; + } } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); } else if (!strncmp(token, "release_agent=", 14)) { @@ -800,7 +807,8 @@ static int parse_cgroupfs_options(char *data, for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { ss = subsys[i]; if (!strcmp(token, ss->name)) { - set_bit(i, &opts->subsys_bits); + if (!ss->disabled) + set_bit(i, &opts->subsys_bits); break; } } @@ -2600,13 +2608,13 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) { int i; - seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n"); + seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); mutex_lock(&cgroup_mutex); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; - seq_printf(m, "%s\t%lu\t%d\n", + seq_printf(m, "%s\t%lu\t%d\t%d\n", ss->name, ss->root->subsys_bits, - ss->root->number_of_cgroups); + ss->root->number_of_cgroups, !ss->disabled); } mutex_unlock(&cgroup_mutex); return 0; @@ -3010,3 +3018,27 @@ static void cgroup_release_agent(struct work_struct *work) spin_unlock(&release_list_lock); mutex_unlock(&cgroup_mutex); } + +static int __init cgroup_disable(char *str) +{ + int i; + char *token; + + while ((token = strsep(&str, ",")) != NULL) { + if (!*token) + continue; + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + + if (!strcmp(token, ss->name)) { + ss->disabled = 1; + printk(KERN_INFO "Disabling %s control group" + " subsystem\n", ss->name); + break; + } + } + } + return 1; +} +__setup("cgroup_disable=", cgroup_disable); -- cgit v1.2.3 From 54a015104136974262afa4b8ddd943ea70dec8a2 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 10 Apr 2008 15:37:38 -0700 Subject: asmlinkage_protect replaces prevent_tail_call The prevent_tail_call() macro works around the problem of the compiler clobbering argument words on the stack, which for asmlinkage functions is the caller's (user's) struct pt_regs. The tail/sibling-call optimization is not the only way that the compiler can decide to use stack argument words as scratch space, which we have to prevent. Other optimizations can do it too. Until we have new compiler support to make "asmlinkage" binding on the compiler's own use of the stack argument frame, we have work around all the manifestations of this issue that crop up. More cases seem to be prevented by also keeping the incoming argument variables live at the end of the function. This makes their original stack slots attractive places to leave those variables, so the compiler tends not clobber them for something else. It's still no guarantee, but it handles some observed cases that prevent_tail_call() did not. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 ++-- kernel/uid16.c | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 53872bf993fa..073005b1cfb2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1608,7 +1608,7 @@ asmlinkage long sys_waitid(int which, pid_t upid, put_pid(pid); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(5, ret, which, upid, infop, options, ru); return ret; } @@ -1640,7 +1640,7 @@ asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, put_pid(pid); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(4, ret, upid, stat_addr, options, ru); return ret; } diff --git a/kernel/uid16.c b/kernel/uid16.c index dd308ba4e03b..3e41c1673e2f 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -21,7 +21,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi { long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(3, ret, filename, user, group); return ret; } @@ -29,7 +29,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g { long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(3, ret, filename, user, group); return ret; } @@ -37,7 +37,7 @@ asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) { long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(3, ret, fd, user, group); return ret; } @@ -45,7 +45,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) { long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(2, ret, rgid, egid); return ret; } @@ -53,7 +53,7 @@ asmlinkage long sys_setgid16(old_gid_t gid) { long ret = sys_setgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(1, ret, gid); return ret; } @@ -61,7 +61,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) { long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(2, ret, ruid, euid); return ret; } @@ -69,7 +69,7 @@ asmlinkage long sys_setuid16(old_uid_t uid) { long ret = sys_setuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(1, ret, uid); return ret; } @@ -78,7 +78,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), low2highuid(suid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(3, ret, ruid, euid, suid); return ret; } @@ -98,7 +98,7 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), low2highgid(sgid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(3, ret, rgid, egid, sgid); return ret; } @@ -117,7 +117,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid) { long ret = sys_setfsuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(1, ret, uid); return ret; } @@ -125,7 +125,7 @@ asmlinkage long sys_setfsgid16(old_gid_t gid) { long ret = sys_setfsgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ - prevent_tail_call(ret); + asmlinkage_protect(1, ret, gid); return ret; } -- cgit v1.2.3 From b6c3006d204a0b86e1ebe02ca38f9f071a03c7ef Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Thu, 10 Apr 2008 21:29:16 -0700 Subject: cgroups: include hierarchy ids in /proc//cgroup Extend the /proc//cgroup file to include the appropriate hierarchy ID on each line. Currently this ID isn't really needed since a hierarchy can be completely identified by the set of subsystems bound to it, but this is likely to change in the near future in order to support stateless subsystems and merging/rebinding of subsystems. Getting this change into 2.6.25 reduces the need for an API change later. Signed-off-by: Paul Menage Cc: Balbir Singh Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 62f1a5231fe9..2727f9238359 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2569,6 +2569,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v) /* Skip this hierarchy if it has no active subsystems */ if (!root->actual_subsys_bits) continue; + seq_printf(m, "%lu:", root->subsys_bits); for_each_subsys(root, ss) seq_printf(m, "%s%s", count++ ? "," : "", ss->name); seq_putc(m, ':'); -- cgit v1.2.3 From e2df9e0905136eebeca66eb9a994ca48d0fa7990 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 14 Apr 2008 08:50:02 +0200 Subject: revert "sched: fix fair sleepers" revert "sched: fix fair sleepers" (e22ecef1d2658ba54ed7d3fdb5d60829fb434c23), because it is causing audio skipping, see: http://bugzilla.kernel.org/show_bug.cgi?id=10428 the patch is correct and the real cause of the skipping is not understood (tracing makes it go away), but time has run out so we'll revert it and re-try in 2.6.26. Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 86a93376282c..0080968d3e4a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -510,10 +510,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) if (!initial) { /* sleeps upto a single latency don't count. */ - if (sched_feat(NEW_FAIR_SLEEPERS)) { - vruntime -= calc_delta_fair(sysctl_sched_latency, - &cfs_rq->load); - } + if (sched_feat(NEW_FAIR_SLEEPERS)) + vruntime -= sysctl_sched_latency; /* ensure we never gain time by being placed backwards. */ vruntime = max_vruntime(se->vruntime, vruntime); -- cgit v1.2.3