summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggleton <paul.eggleton@linux.intel.com>2012-05-23 00:23:32 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2012-05-23 11:25:05 +0100
commitc993b7c457f8b7776e8a5dff253bfa0724bc2cae (patch)
treeab0bfbf069fb1478f27326f43ccf025b6150ba7c
parent0fe3cb1438d297f90dd0fc6b26362ecbff75c76d (diff)
downloadbitbake-c993b7c457f8b7776e8a5dff253bfa0724bc2cae.tar.gz
bitbake: implement checksums for local files in SRC_URI
Gathers a list of paths to have checksums calculated at parse time, and processes these when calculating task hashes. Checksums are cached with the file's current mtime. Thus, changing any local file in SRC_URI will now cause the do_fetch taskhash to change, thus forcing a rebuild. This change adds very roughly about an 8% increase in parse time (a few seconds) and maybe a few seconds during runqueue generation, so a fairly moderate performance hit. Note that since paths are resolved at parse time, this will not force a rebuild when files are introduced which would cause that resolved path to be different - for example, where a machine-specific version of a file was added without otherwise changing the recipe. This will need to be handled in a future update. Code to hook this into the signature generator was courtesy of Richard Purdie <richard.purdie@linuxfoundation.org>. Implements [YOCTO #2044]. Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--lib/bb/cache.py13
-rw-r--r--lib/bb/checksum.py90
-rw-r--r--lib/bb/cooker.py2
-rw-r--r--lib/bb/fetch2/__init__.py85
-rw-r--r--lib/bb/siggen.py24
5 files changed, 211 insertions, 3 deletions
diff --git a/lib/bb/cache.py b/lib/bb/cache.py
index 36e6356f5..dea2a8061 100644
--- a/lib/bb/cache.py
+++ b/lib/bb/cache.py
@@ -43,7 +43,7 @@ except ImportError:
logger.info("Importing cPickle failed. "
"Falling back to a very slow implementation.")
-__cache_version__ = "143"
+__cache_version__ = "144"
def getCacheFile(path, filename, data_hash):
return os.path.join(path, filename + "." + data_hash)
@@ -76,9 +76,13 @@ class RecipeInfoCommon(object):
for task in tasks)
@classmethod
- def flaglist(cls, flag, varlist, metadata):
- return dict((var, metadata.getVarFlag(var, flag, True))
+ def flaglist(cls, flag, varlist, metadata, squash=False):
+ out_dict = dict((var, metadata.getVarFlag(var, flag, True))
for var in varlist)
+ if squash:
+ return dict((k,v) for (k,v) in out_dict.iteritems() if v)
+ else:
+ return out_dict
@classmethod
def getvar(cls, var, metadata):
@@ -128,6 +132,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
self.stamp = self.getvar('STAMP', metadata)
self.stamp_base = self.flaglist('stamp-base', self.tasks, metadata)
self.stamp_extrainfo = self.flaglist('stamp-extra-info', self.tasks, metadata)
+ self.file_checksums = self.flaglist('file-checksums', self.tasks, metadata, True)
self.packages_dynamic = self.listvar('PACKAGES_DYNAMIC', metadata)
self.depends = self.depvar('DEPENDS', metadata)
self.provides = self.depvar('PROVIDES', metadata)
@@ -154,6 +159,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
cachedata.stamp = {}
cachedata.stamp_base = {}
cachedata.stamp_extrainfo = {}
+ cachedata.file_checksums = {}
cachedata.fn_provides = {}
cachedata.pn_provides = defaultdict(list)
cachedata.all_depends = []
@@ -185,6 +191,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
cachedata.stamp[fn] = self.stamp
cachedata.stamp_base[fn] = self.stamp_base
cachedata.stamp_extrainfo[fn] = self.stamp_extrainfo
+ cachedata.file_checksums[fn] = self.file_checksums
provides = [self.pn]
for provide in self.provides:
diff --git a/lib/bb/checksum.py b/lib/bb/checksum.py
new file mode 100644
index 000000000..514ff0b1e
--- /dev/null
+++ b/lib/bb/checksum.py
@@ -0,0 +1,90 @@
+# Local file checksum cache implementation
+#
+# Copyright (C) 2012 Intel Corporation
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os
+import stat
+import bb.utils
+import logging
+from bb.cache import MultiProcessCache
+
+logger = logging.getLogger("BitBake.Cache")
+
+try:
+ import cPickle as pickle
+except ImportError:
+ import pickle
+ logger.info("Importing cPickle failed. "
+ "Falling back to a very slow implementation.")
+
+
+# mtime cache (non-persistent)
+# based upon the assumption that files do not change during bitbake run
+class FileMtimeCache(object):
+ cache = {}
+
+ def cached_mtime(self, f):
+ if f not in self.cache:
+ self.cache[f] = os.stat(f)[stat.ST_MTIME]
+ return self.cache[f]
+
+ def cached_mtime_noerror(self, f):
+ if f not in self.cache:
+ try:
+ self.cache[f] = os.stat(f)[stat.ST_MTIME]
+ except OSError:
+ return 0
+ return self.cache[f]
+
+ def update_mtime(self, f):
+ self.cache[f] = os.stat(f)[stat.ST_MTIME]
+ return self.cache[f]
+
+ def clear(self):
+ self.cache.clear()
+
+# Checksum + mtime cache (persistent)
+class FileChecksumCache(MultiProcessCache):
+ cache_file_name = "local_file_checksum_cache.dat"
+ CACHE_VERSION = 1
+
+ def __init__(self):
+ self.mtime_cache = FileMtimeCache()
+ MultiProcessCache.__init__(self)
+
+ def get_checksum(self, f):
+ entry = self.cachedata[0].get(f)
+ cmtime = self.mtime_cache.cached_mtime(f)
+ if entry:
+ (mtime, hashval) = entry
+ if cmtime == mtime:
+ return hashval
+ else:
+ bb.debug(2, "file %s changed mtime, recompute checksum" % f)
+
+ hashval = bb.utils.md5_file(f)
+ self.cachedata_extras[0][f] = (cmtime, hashval)
+ return hashval
+
+ def merge_data(self, source, dest):
+ for h in source[0]:
+ if h in dest:
+ (smtime, _) = source[0][h]
+ (dmtime, _) = dest[0][h]
+ if smtime > dmtime:
+ dest[0][h] = source[0][h]
+ else:
+ dest[0][h] = source[0][h]
diff --git a/lib/bb/cooker.py b/lib/bb/cooker.py
index dea0aadbe..8ad492265 100644
--- a/lib/bb/cooker.py
+++ b/lib/bb/cooker.py
@@ -1570,6 +1570,7 @@ class CookerParser(object):
def init():
Parser.cfg = self.cfgdata
multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, args=(self.cfgdata,), exitpriority=1)
+ multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, args=(self.cfgdata,), exitpriority=1)
self.feeder_quit = multiprocessing.Queue(maxsize=1)
self.parser_quit = multiprocessing.Queue(maxsize=self.num_processes)
@@ -1618,6 +1619,7 @@ class CookerParser(object):
sync.start()
multiprocessing.util.Finalize(None, sync.join, exitpriority=-100)
bb.codeparser.parser_cache_savemerge(self.cooker.configuration.data)
+ bb.fetch.fetcher_parse_done(self.cooker.configuration.data)
def load_cached(self):
for filename, appends in self.fromcache:
diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py
index 0b976c407..d4b6c3ec3 100644
--- a/lib/bb/fetch2/__init__.py
+++ b/lib/bb/fetch2/__init__.py
@@ -8,6 +8,7 @@ BitBake build tools.
"""
# Copyright (C) 2003, 2004 Chris Larson
+# Copyright (C) 2012 Intel Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -30,9 +31,11 @@ import os, re
import logging
import urllib
import bb.persist_data, bb.utils
+import bb.checksum
from bb import data
__version__ = "2"
+_checksum_cache = bb.checksum.FileChecksumCache()
logger = logging.getLogger("BitBake.Fetcher")
@@ -233,10 +236,18 @@ def fetcher_init(d):
else:
raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy)
+ _checksum_cache.init_cache(d)
+
for m in methods:
if hasattr(m, "init"):
m.init(d)
+def fetcher_parse_save(d):
+ _checksum_cache.save_extras(d)
+
+def fetcher_parse_done(d):
+ _checksum_cache.save_merge(d)
+
def fetcher_compare_revisions(d):
"""
Compare the revisions in the persistant cache with current values and
@@ -553,6 +564,80 @@ def srcrev_internal_helper(ud, d, name):
return rev
+
+def get_checksum_file_list(d):
+ """ Get a list of files checksum in SRC_URI
+
+ Returns the all resolved local path of all local file entries in
+ SRC_URI as a space-separated string
+ """
+ fetch = Fetch([], d)
+
+ dl_dir = d.getVar('DL_DIR', True)
+ filelist = []
+ for u in fetch.urls:
+ ud = fetch.ud[u]
+
+ if isinstance(ud.method, local.Local):
+ ud.setup_localpath(d)
+ f = ud.localpath
+ if f.startswith(dl_dir):
+ # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else
+ if os.path.exists(f):
+ bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN', True), os.path.basename(f)))
+ else:
+ bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN', True), os.path.basename(f)))
+ continue
+ filelist.append(f)
+
+ return " ".join(filelist)
+
+
+def get_file_checksums(filelist, pn):
+ """Get a list of the checksums for a list of local files
+
+ Returns the checksums for a list of local files, caching the results as
+ it proceeds
+
+ """
+
+ def checksum_file(f):
+ try:
+ checksum = _checksum_cache.get_checksum(f)
+ except OSError as e:
+ import traceback
+ bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
+ return None
+ return checksum
+
+ checksums = []
+ for pth in filelist.split():
+ checksum = None
+ if '*' in pth:
+ # Handle globs
+ import glob
+ for f in glob.glob(pth):
+ checksum = checksum_file(f)
+ if checksum:
+ checksums.append((f, checksum))
+ elif os.path.isdir(pth):
+ # Handle directories
+ for root, dirs, files in os.walk(pth):
+ for name in files:
+ fullpth = os.path.join(root, name)
+ checksum = checksum_file(fullpth)
+ if checksum:
+ checksums.append((fullpth, checksum))
+ else:
+ checksum = checksum_file(pth)
+
+ if checksum:
+ checksums.append((pth, checksum))
+
+ checksums.sort()
+ return checksums
+
+
class FetchData(object):
"""
A class which represents the fetcher state for a given URI.
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index 5a0b80e8a..daf56770f 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -60,6 +60,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
self.taskhash = {}
self.taskdeps = {}
self.runtaskdeps = {}
+ self.file_checksum_values = {}
self.gendeps = {}
self.lookupcache = {}
self.pkgnameextract = re.compile("(?P<fn>.*)\..*")
@@ -152,6 +153,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
k = fn + "." + task
data = dataCache.basetaskhash[k]
self.runtaskdeps[k] = []
+ self.file_checksum_values[k] = {}
recipename = dataCache.pkg_fn[fn]
for dep in sorted(deps, key=clean_basepath):
depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')]
@@ -161,6 +163,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
data = data + self.taskhash[dep]
self.runtaskdeps[k].append(dep)
+
+ if task in dataCache.file_checksums[fn]:
+ checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
+ for (f,cs) in checksums:
+ self.file_checksum_values[k][f] = cs
+ data = data + cs
h = hashlib.md5(data).hexdigest()
self.taskhash[k] = h
#d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
@@ -197,6 +205,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
if runtime and k in self.taskhash:
data['runtaskdeps'] = self.runtaskdeps[k]
+ data['file_checksum_values'] = self.file_checksum_values[k]
data['runtaskhashes'] = {}
for dep in data['runtaskdeps']:
data['runtaskhashes'][dep] = self.taskhash[dep]
@@ -304,6 +313,18 @@ def compare_sigfiles(a, b):
for dep in changed:
print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])
+ changed, added, removed = dict_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
+ if changed:
+ for f in changed:
+ print "Checksum for file %s changed from %s to %s" % (f, a_data['file_checksum_values'][f], b_data['file_checksum_values'][f])
+ if added:
+ for f in added:
+ print "Dependency on checksum of file %s was added" % (f)
+ if removed:
+ for f in removed:
+ print "Dependency on checksum of file %s was removed" % (f)
+
+
if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
a = clean_basepaths(a_data['runtaskhashes'])
b = clean_basepaths(b_data['runtaskhashes'])
@@ -353,6 +374,9 @@ def dump_sigfile(a):
if 'runtaskdeps' in a_data:
print "Tasks this task depends on: %s" % (a_data['runtaskdeps'])
+ if 'file_checksum_values' in a_data:
+ print "This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])
+
if 'runtaskhashes' in a_data:
for dep in a_data['runtaskhashes']:
print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])