From 99d3703edd77a21770b366c6ad65a3c0f5183493 Mon Sep 17 00:00:00 2001 From: Jianxun Zhang Date: Wed, 21 Dec 2016 12:27:37 -0800 Subject: use multiple processes to dump signatures. This change significantly shortens the time on reparsing stage of '-S' option. Each file is reparsed and then dumped within a dedicated process. The maximum number of the running processes is not greater than the value of BB_NUMBER_PARSE_THREADS if it is set. The dump_sigs() in class SignatureGeneratorBasic is _replaced_ by a new dump_sigfn() interface, so calls from the outside and subclasses are dispatched to the implementation in the base class of SignatureGeneratorBasic. Fixes [YOCTO #10352] Signed-off-by: Jianxun Zhang Signed-off-by: Richard Purdie --- lib/bb/runqueue.py | 32 +++++++++++++++++++++++++++----- lib/bb/siggen.py | 4 ++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py index 48c6a79ff..d42eb8166 100644 --- a/lib/bb/runqueue.py +++ b/lib/bb/runqueue.py @@ -36,6 +36,7 @@ from bb import msg, data, event from bb import monitordisk import subprocess import pickle +from multiprocessing import Process bblogger = logging.getLogger("BitBake") logger = logging.getLogger("BitBake.RunQueue") @@ -1303,15 +1304,36 @@ class RunQueue: else: self.rqexe.finish() + def rq_dump_sigfn(self, fn, options): + bb_cache = bb.cache.NoCache(self.cooker.databuilder) + the_data = bb_cache.loadDataFull(fn, self.cooker.collection.get_file_appends(fn)) + siggen = bb.parse.siggen + dataCaches = self.rqdata.dataCaches + siggen.dump_sigfn(fn, dataCaches, options) + def dump_signatures(self, options): - done = set() + fns = set() bb.note("Reparsing files to collect dependency data") - bb_cache = bb.cache.NoCache(self.cooker.databuilder) + for tid in self.rqdata.runtaskentries: fn = fn_from_tid(tid) - if fn not in done: - the_data = bb_cache.loadDataFull(fn, self.cooker.collection.get_file_appends(fn)) - done.add(fn) + fns.add(fn) + + max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1) + # We cannot use the real multiprocessing.Pool easily due to some local data + # that can't be pickled. This is a cheap multi-process solution. + launched = [] + while fns: + if len(launched) < max_process: + p = Process(target=self.rq_dump_sigfn, args=(fns.pop(), options)) + p.start() + launched.append(p) + for q in launched: + # The finished processes are joined when calling is_alive() + if not q.is_alive(): + launched.remove(q) + for p in launched: + p.join() bb.parse.siggen.dump_sigs(self.rqdata.dataCaches, options) diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py index 4226c80c8..3ceeef1cc 100644 --- a/lib/bb/siggen.py +++ b/lib/bb/siggen.py @@ -307,8 +307,8 @@ class SignatureGeneratorBasic(SignatureGenerator): pass raise err - def dump_sigs(self, dataCaches, options): - for fn in self.taskdeps: + def dump_sigfn(self, fn, dataCaches, options): + if fn in self.taskdeps: for task in self.taskdeps[fn]: tid = fn + ":" + task (mc, _, _) = bb.runqueue.split_tid(tid) -- cgit 1.2.3-korg