aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2024-02-21 13:10:04 +0000
committerRichard Purdie <richard.purdie@linuxfoundation.org>2024-02-23 14:32:29 +0000
commit14a27306f6dceb4999c2804ccae5a09cc3d8dd49 (patch)
tree5ac902437cf14686c289ba83a0c52185b38587e7 /lib
parent4fe05513b5314c201725e3f8ad54f58d70c56258 (diff)
downloadbitbake-14a27306f6dceb4999c2804ccae5a09cc3d8dd49.tar.gz
runqueue: Add support for BB_LOADFACTOR_MAX
Some ditros don't enable /proc/pressure and it tends to be those which we see bitbake timeout issues on, seemingly as load gets too high and the bitbake processes don't get scheduled in for minutes at a time. Add support for stopping running extra tasks if the system load average goes above a certain threshold by setting BB_LOADFACTOR_MAX. The value used is scaled by CPU number, so a value of 1 would be when the load average equals the number of cpu cores of the system, under one only starts tasks when the load average is below the number of cores. This means you can centrally set a value such as 1.5 which will then scale correctly to different sized machines with differing numbers of CPUs. The pressure regulation is probably more accurate and responsive, however our graphs do show singificant load spikes on some workers and this patch is aimed at trying to avoid those. Pressure regulation is used where available in preference to this load factor regulation when both are set. Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/bb/runqueue.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index e86ccd8c6..6987de3e2 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -220,6 +220,16 @@ class RunQueueScheduler(object):
bb.note("Pressure status changed to CPU: %s, IO: %s, Mem: %s (CPU: %s/%s, IO: %s/%s, Mem: %s/%s) - using %s/%s bitbake threads" % (pressure_state + pressure_values + (len(self.rq.runq_running.difference(self.rq.runq_complete)), self.rq.number_tasks)))
self.pressure_state = pressure_state
return (exceeds_cpu_pressure or exceeds_io_pressure or exceeds_memory_pressure)
+ elif self.rq.max_loadfactor:
+ limit = False
+ loadfactor = float(os.getloadavg()[0]) / os.cpu_count()
+ # bb.warn("Comparing %s to %s" % (loadfactor, self.rq.max_loadfactor))
+ if loadfactor > self.rq.max_loadfactor:
+ limit = True
+ if hasattr(self, "loadfactor_limit") and limit != self.loadfactor_limit:
+ bb.note("Load average limiting set to %s as load average: %s - using %s/%s bitbake threads" % (limit, loadfactor, len(self.rq.runq_running.difference(self.rq.runq_complete)), self.rq.number_tasks))
+ self.loadfactor_limit = limit
+ return limit
return False
def next_buildable_task(self):
@@ -1822,6 +1832,7 @@ class RunQueueExecute:
self.max_cpu_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_CPU")
self.max_io_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_IO")
self.max_memory_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_MEMORY")
+ self.max_loadfactor = self.cfgData.getVar("BB_LOADFACTOR_MAX")
self.sq_buildable = set()
self.sq_running = set()
@@ -1875,6 +1886,11 @@ class RunQueueExecute:
bb.fatal("Invalid BB_PRESSURE_MAX_MEMORY %s, minimum value is %s." % (self.max_memory_pressure, lower_limit))
if self.max_memory_pressure > upper_limit:
bb.warn("Your build will be largely unregulated since BB_PRESSURE_MAX_MEMORY is set to %s. It is very unlikely that such high pressure will be experienced." % (self.max_io_pressure))
+
+ if self.max_loadfactor:
+ self.max_loadfactor = float(self.max_loadfactor)
+ if self.max_loadfactor <= 0:
+ bb.fatal("Invalid BB_LOADFACTOR_MAX %s, needs to be greater than zero." % (self.max_loadfactor))
# List of setscene tasks which we've covered
self.scenequeue_covered = set()