aboutsummaryrefslogtreecommitdiffstats
path: root/recipes/linux/linux-omap-2.6.37rc/0008-Miracle-patch.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/linux/linux-omap-2.6.37rc/0008-Miracle-patch.patch')
-rw-r--r--recipes/linux/linux-omap-2.6.37rc/0008-Miracle-patch.patch504
1 files changed, 504 insertions, 0 deletions
diff --git a/recipes/linux/linux-omap-2.6.37rc/0008-Miracle-patch.patch b/recipes/linux/linux-omap-2.6.37rc/0008-Miracle-patch.patch
new file mode 100644
index 0000000000..d8e4c741fa
--- /dev/null
+++ b/recipes/linux/linux-omap-2.6.37rc/0008-Miracle-patch.patch
@@ -0,0 +1,504 @@
+From f7410950ad86e2b70fca1b02e4c4097fe9280bef Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <efault@gmx.de>
+Date: Fri, 19 Nov 2010 12:52:42 +0100
+Subject: [PATCH 08/11] Miracle patch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+On Sun, 2010-11-14 at 16:26 -0800, Linus Torvalds wrote:
+> On Sun, Nov 14, 2010 at 4:15 PM, Linus Torvalds
+> <torvalds@linux-foundation.org> wrote:
+> >
+> > THAT is why I think it's so silly to try to be so strict and walk over
+> > all processes while holding a couple of spinlocks.
+>
+> Btw, let me say that I think the patch is great even with that thing
+> in. It looks clean, the thing I'm complaining about is not a big deal,
+> and it seems to perform very much as advertized. The difference with
+> autogroup scheduling is very noticeable with a simple "make -j64"
+> kernel compile.
+>
+> So I really don't think it's a big deal. The sysctl handler isn't even
+> complicated. But boy does it hurt my eyes to see a spinlock held
+> around a "do_each_thread()". And I do get the feeling that the
+> simplest way to fix it would be to just remove the code entirely, and
+> just say that "enabling/disabling may be delayed for old processes
+> with existing autogroups".
+
+Which is what I just did. If the oddball case isn't a big deal, the
+patch shrinks, which is a good thing. I just wanted to cover all bases.
+
+Patchlet with handler whacked:
+
+A recurring complaint from CFS users is that parallel kbuild has a negative
+impact on desktop interactivity. This patch implements an idea from Linus,
+to automatically create task groups. This patch only implements Linus' per
+tty task group suggestion, and only for fair class tasks, but leaves the way
+open for enhancement.
+
+Implementation: each task's signal struct contains an inherited pointer to a
+refcounted autogroup struct containing a task group pointer, the default for
+all tasks pointing to the init_task_group. When a task calls __proc_set_tty(),
+the process wide reference to the default group is dropped, a new task group is
+created, and the process is moved into the new task group. Children thereafter
+inherit this task group, and increase it's refcount. On exit, a reference to the
+current task group is dropped when the last reference to each signal struct is
+dropped. The task group is destroyed when the last signal struct referencing
+it is freed. At runqueue selection time, IFF a task has no cgroup assignment,
+it's current autogroup is used.
+
+The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP is
+selected, but can be disabled via the boot option noautogroup, and can be
+also be turned on/off on the fly via..
+ echo [01] > /proc/sys/kernel/sched_autogroup_enabled.
+..which will automatically move tasks to/from the root task group.
+
+Some numbers.
+
+A 100% hog overhead measurement proggy pinned to the same CPU as a make -j10
+
+About measurement proggy:
+ pert/sec = perturbations/sec
+ min/max/avg = scheduler service latencies in usecs
+ sum/s = time accrued by the competition per sample period (1 sec here)
+ overhead = %CPU received by the competition per sample period
+
+pert/s: 31 >40475.37us: 3 min: 0.37 max:48103.60 avg:29573.74 sum/s:916786us overhead:90.24%
+pert/s: 23 >41237.70us: 12 min: 0.36 max:56010.39 avg:40187.01 sum/s:924301us overhead:91.99%
+pert/s: 24 >42150.22us: 12 min: 8.86 max:61265.91 avg:39459.91 sum/s:947038us overhead:92.20%
+pert/s: 26 >42344.91us: 11 min: 3.83 max:52029.60 avg:36164.70 sum/s:940282us overhead:91.12%
+pert/s: 24 >44262.90us: 14 min: 5.05 max:82735.15 avg:40314.33 sum/s:967544us overhead:92.22%
+
+Same load with this patch applied.
+
+pert/s: 229 >5484.43us: 41 min: 0.15 max:12069.42 avg:2193.81 sum/s:502382us overhead:50.24%
+pert/s: 222 >5652.28us: 43 min: 0.46 max:12077.31 avg:2248.56 sum/s:499181us overhead:49.92%
+pert/s: 211 >5809.38us: 43 min: 0.16 max:12064.78 avg:2381.70 sum/s:502538us overhead:50.25%
+pert/s: 223 >6147.92us: 43 min: 0.15 max:16107.46 avg:2282.17 sum/s:508925us overhead:50.49%
+pert/s: 218 >6252.64us: 43 min: 0.16 max:12066.13 avg:2324.11 sum/s:506656us overhead:50.27%
+
+Average service latency is an order of magnitude better with autogroup.
+(Imagine that pert were Xorg or whatnot instead)
+
+Using Mathieu Desnoyers' wakeup-latency testcase:
+
+With taskset -c 3 make -j 10 running..
+
+taskset -c 3 ./wakeup-latency& sleep 30;killall wakeup-latency
+
+without:
+maximum latency: 42963.2 µs
+average latency: 9077.0 µs
+missed timer events: 0
+
+with:
+maximum latency: 4160.7 µs
+average latency: 149.4 µs
+missed timer events: 0
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+---
+ Documentation/kernel-parameters.txt | 2 +
+ drivers/tty/tty_io.c | 1 +
+ include/linux/sched.h | 19 +++++
+ init/Kconfig | 12 +++
+ kernel/fork.c | 5 +-
+ kernel/sched.c | 25 ++++--
+ kernel/sched_autogroup.c | 140 +++++++++++++++++++++++++++++++++++
+ kernel/sched_autogroup.h | 18 +++++
+ kernel/sysctl.c | 11 +++
+ 9 files changed, 224 insertions(+), 9 deletions(-)
+ create mode 100644 kernel/sched_autogroup.c
+ create mode 100644 kernel/sched_autogroup.h
+
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index cdd2a6e..b79a8fb 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file
+ noapic [SMP,APIC] Tells the kernel to not make use of any
+ IOAPICs that may be present in the system.
+
++ noautogroup Disable scheduler automatic task group creation.
++
+ nobats [PPC] Do not use BATs for mapping kernel lowmem
+ on "Classic" PPC cores.
+
+diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
+index 35480dd..1849f4a 100644
+--- a/drivers/tty/tty_io.c
++++ b/drivers/tty/tty_io.c
+@@ -3169,6 +3169,7 @@ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+ put_pid(tsk->signal->tty_old_pgrp);
+ tsk->signal->tty = tty_kref_get(tty);
+ tsk->signal->tty_old_pgrp = NULL;
++ sched_autogroup_create_attach(tsk);
+ }
+
+ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 2c79e92..1e677c2 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -509,6 +509,8 @@ struct thread_group_cputimer {
+ spinlock_t lock;
+ };
+
++struct autogroup;
++
+ /*
+ * NOTE! "signal_struct" does not have it's own
+ * locking, because a shared signal_struct always
+@@ -576,6 +578,9 @@ struct signal_struct {
+
+ struct tty_struct *tty; /* NULL if no tty */
+
++#ifdef CONFIG_SCHED_AUTOGROUP
++ struct autogroup *autogroup;
++#endif
+ /*
+ * Cumulative resource counters for dead threads in the group,
+ * and for reaped dead child processes forked by this group.
+@@ -1931,6 +1936,20 @@ int sched_rt_handler(struct ctl_table *table, int write,
+
+ extern unsigned int sysctl_sched_compat_yield;
+
++#ifdef CONFIG_SCHED_AUTOGROUP
++extern unsigned int sysctl_sched_autogroup_enabled;
++
++extern void sched_autogroup_create_attach(struct task_struct *p);
++extern void sched_autogroup_detach(struct task_struct *p);
++extern void sched_autogroup_fork(struct signal_struct *sig);
++extern void sched_autogroup_exit(struct signal_struct *sig);
++#else
++static inline void sched_autogroup_create_attach(struct task_struct *p) { }
++static inline void sched_autogroup_detach(struct task_struct *p) { }
++static inline void sched_autogroup_fork(struct signal_struct *sig) { }
++static inline void sched_autogroup_exit(struct signal_struct *sig) { }
++#endif
++
+ #ifdef CONFIG_RT_MUTEXES
+ extern int rt_mutex_getprio(struct task_struct *p);
+ extern void rt_mutex_setprio(struct task_struct *p, int prio);
+diff --git a/init/Kconfig b/init/Kconfig
+index c972899..a4985d9 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -741,6 +741,18 @@ config NET_NS
+
+ endif # NAMESPACES
+
++config SCHED_AUTOGROUP
++ bool "Automatic process group scheduling"
++ select CGROUPS
++ select CGROUP_SCHED
++ select FAIR_GROUP_SCHED
++ help
++ This option optimizes the scheduler for common desktop workloads by
++ automatically creating and populating task groups. This separation
++ of workloads isolates aggressive CPU burners (like build jobs) from
++ desktop applications. Task group autogeneration is currently based
++ upon task tty association.
++
+ config MM_OWNER
+ bool
+
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 3b159c5..b6f2475 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
+
+ static inline void put_signal_struct(struct signal_struct *sig)
+ {
+- if (atomic_dec_and_test(&sig->sigcnt))
++ if (atomic_dec_and_test(&sig->sigcnt)) {
++ sched_autogroup_exit(sig);
+ free_signal_struct(sig);
++ }
+ }
+
+ void __put_task_struct(struct task_struct *tsk)
+@@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
+ posix_cpu_timers_init_group(sig);
+
+ tty_audit_fork(sig);
++ sched_autogroup_fork(sig);
+
+ sig->oom_adj = current->signal->oom_adj;
+ sig->oom_score_adj = current->signal->oom_score_adj;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index dc91a4d..b4ed496 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -78,6 +78,7 @@
+
+ #include "sched_cpupri.h"
+ #include "workqueue_sched.h"
++#include "sched_autogroup.h"
+
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/sched.h>
+@@ -605,11 +606,14 @@ static inline int cpu_of(struct rq *rq)
+ */
+ static inline struct task_group *task_group(struct task_struct *p)
+ {
++ struct task_group *tg;
+ struct cgroup_subsys_state *css;
+
+ css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+ lockdep_is_held(&task_rq(p)->lock));
+- return container_of(css, struct task_group, css);
++ tg = container_of(css, struct task_group, css);
++
++ return autogroup_task_group(p, tg);
+ }
+
+ /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+@@ -2006,6 +2010,7 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+ #include "sched_rt.c"
++#include "sched_autogroup.c"
+ #include "sched_stoptask.c"
+ #ifdef CONFIG_SCHED_DEBUG
+ # include "sched_debug.c"
+@@ -7979,7 +7984,7 @@ void __init sched_init(void)
+ #ifdef CONFIG_CGROUP_SCHED
+ list_add(&init_task_group.list, &task_groups);
+ INIT_LIST_HEAD(&init_task_group.children);
+-
++ autogroup_init(&init_task);
+ #endif /* CONFIG_CGROUP_SCHED */
+
+ #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
+@@ -8509,15 +8514,11 @@ void sched_destroy_group(struct task_group *tg)
+ /* change task's runqueue when it moves between groups.
+ * The caller of this function should have put the task in its new group
+ * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
+- * reflect its new group.
++ * reflect its new group. Called with the runqueue lock held.
+ */
+-void sched_move_task(struct task_struct *tsk)
++void __sched_move_task(struct task_struct *tsk, struct rq *rq)
+ {
+ int on_rq, running;
+- unsigned long flags;
+- struct rq *rq;
+-
+- rq = task_rq_lock(tsk, &flags);
+
+ running = task_current(rq, tsk);
+ on_rq = tsk->se.on_rq;
+@@ -8538,7 +8539,15 @@ void sched_move_task(struct task_struct *tsk)
+ tsk->sched_class->set_curr_task(rq);
+ if (on_rq)
+ enqueue_task(rq, tsk, 0);
++}
+
++void sched_move_task(struct task_struct *tsk)
++{
++ struct rq *rq;
++ unsigned long flags;
++
++ rq = task_rq_lock(tsk, &flags);
++ __sched_move_task(tsk, rq);
+ task_rq_unlock(rq, &flags);
+ }
+ #endif /* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
+new file mode 100644
+index 0000000..62f1d0e
+--- /dev/null
++++ b/kernel/sched_autogroup.c
+@@ -0,0 +1,140 @@
++#ifdef CONFIG_SCHED_AUTOGROUP
++
++unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
++
++struct autogroup {
++ struct kref kref;
++ struct task_group *tg;
++};
++
++static struct autogroup autogroup_default;
++
++static void autogroup_init(struct task_struct *init_task)
++{
++ autogroup_default.tg = &init_task_group;
++ kref_init(&autogroup_default.kref);
++ init_task->signal->autogroup = &autogroup_default;
++}
++
++static inline void autogroup_destroy(struct kref *kref)
++{
++ struct autogroup *ag = container_of(kref, struct autogroup, kref);
++ struct task_group *tg = ag->tg;
++
++ kfree(ag);
++ sched_destroy_group(tg);
++}
++
++static inline void autogroup_kref_put(struct autogroup *ag)
++{
++ kref_put(&ag->kref, autogroup_destroy);
++}
++
++static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
++{
++ kref_get(&ag->kref);
++ return ag;
++}
++
++static inline struct autogroup *autogroup_create(void)
++{
++ struct autogroup *ag = kmalloc(sizeof(*ag), GFP_KERNEL);
++
++ if (!ag)
++ goto out_fail;
++
++ ag->tg = sched_create_group(&init_task_group);
++ kref_init(&ag->kref);
++
++ if (!(IS_ERR(ag->tg)))
++ return ag;
++
++out_fail:
++ if (ag) {
++ kfree(ag);
++ WARN_ON(1);
++ } else
++ WARN_ON(1);
++
++ return autogroup_kref_get(&autogroup_default);
++}
++
++static inline struct task_group *
++autogroup_task_group(struct task_struct *p, struct task_group *tg)
++{
++ int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
++
++ enabled &= (tg == &root_task_group);
++ enabled &= (p->sched_class == &fair_sched_class);
++ enabled &= (!(p->flags & PF_EXITING));
++
++ if (enabled)
++ return p->signal->autogroup->tg;
++
++ return tg;
++}
++
++static void
++autogroup_move_group(struct task_struct *p, struct autogroup *ag)
++{
++ struct autogroup *prev;
++ struct task_struct *t;
++ struct rq *rq;
++ unsigned long flags;
++
++ rq = task_rq_lock(p, &flags);
++ prev = p->signal->autogroup;
++ if (prev == ag) {
++ task_rq_unlock(rq, &flags);
++ return;
++ }
++
++ p->signal->autogroup = autogroup_kref_get(ag);
++ __sched_move_task(p, rq);
++ task_rq_unlock(rq, &flags);
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(t, &p->thread_group, thread_group) {
++ sched_move_task(t);
++ }
++ rcu_read_unlock();
++
++ autogroup_kref_put(prev);
++}
++
++void sched_autogroup_create_attach(struct task_struct *p)
++{
++ struct autogroup *ag = autogroup_create();
++
++ autogroup_move_group(p, ag);
++ /* drop extra refrence added by autogroup_create() */
++ autogroup_kref_put(ag);
++}
++EXPORT_SYMBOL(sched_autogroup_create_attach);
++
++/* currently has no users */
++void sched_autogroup_detach(struct task_struct *p)
++{
++ autogroup_move_group(p, &autogroup_default);
++}
++EXPORT_SYMBOL(sched_autogroup_detach);
++
++void sched_autogroup_fork(struct signal_struct *sig)
++{
++ sig->autogroup = autogroup_kref_get(current->signal->autogroup);
++}
++
++void sched_autogroup_exit(struct signal_struct *sig)
++{
++ autogroup_kref_put(sig->autogroup);
++}
++
++static int __init setup_autogroup(char *str)
++{
++ sysctl_sched_autogroup_enabled = 0;
++
++ return 1;
++}
++
++__setup("noautogroup", setup_autogroup);
++#endif
+diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
+new file mode 100644
+index 0000000..6048f5d
+--- /dev/null
++++ b/kernel/sched_autogroup.h
+@@ -0,0 +1,18 @@
++#ifdef CONFIG_SCHED_AUTOGROUP
++
++static void __sched_move_task(struct task_struct *tsk, struct rq *rq);
++
++static inline struct task_group *
++autogroup_task_group(struct task_struct *p, struct task_group *tg);
++
++#else /* !CONFIG_SCHED_AUTOGROUP */
++
++static inline void autogroup_init(struct task_struct *init_task) { }
++
++static inline struct task_group *
++autogroup_task_group(struct task_struct *p, struct task_group *tg)
++{
++ return tg;
++}
++
++#endif /* CONFIG_SCHED_AUTOGROUP */
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 5abfa15..b162f65 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -382,6 +382,17 @@ static struct ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
++#ifdef CONFIG_SCHED_AUTOGROUP
++ {
++ .procname = "sched_autogroup_enabled",
++ .data = &sysctl_sched_autogroup_enabled,
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ .extra1 = &zero,
++ .extra2 = &one,
++ },
++#endif
+ #ifdef CONFIG_PROVE_LOCKING
+ {
+ .procname = "prove_locking",
+--
+1.6.6.1
+