aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/mtn2git/mtn2git.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/mtn2git/mtn2git.py')
-rwxr-xr-xcontrib/mtn2git/mtn2git.py405
1 files changed, 131 insertions, 274 deletions
diff --git a/contrib/mtn2git/mtn2git.py b/contrib/mtn2git/mtn2git.py
index cda4f39fca..1de3010756 100755
--- a/contrib/mtn2git/mtn2git.py
+++ b/contrib/mtn2git/mtn2git.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
"""
- Copyright (C) 2006, 2007 Holger Hans Peter Freyther
+ Copyright (C) 2006, 2007, 2008 Holger Hans Peter Freyther
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -27,18 +27,6 @@
# -tag handling
# -work with n-merges
#
-# DISCUSSION:
-# -For some reason the get_revision information might be inaccurate
-# and I should consider just comparing the manifests.
-# I would use the manifests of the parents and consider all files deleted
-# and then remove every dir/file that is inside the new manifest from this
-# list.
-# Benefits:
-# - 1:1 match of the manifest regardles of get_revision information
-# - Renaming is handled by git anyway
-# Downsides:
-# - The size of the import will grow.
-#
import mtn
import os
@@ -48,6 +36,17 @@ import email.Utils
import status
+# Interesting revisions:
+# Rename with dest==src: 24cba5923360fef7c5cc81d51000e30b90355eb9
+# Recursive rename: fca159c5c00ae4158c289f5aabce995378d4e41b
+# Delete+Rename: 91da98265a39c93946e00adf5d7bf92b341de847
+#
+#
+#
+
+# Our manifest/tree fifo construct
+cached_tree = {}
+cached_fifo = []
def get_mark(revision):
"""
@@ -104,21 +103,6 @@ def reset_git(ops, revision):
cmd += [""]
print "\n".join(cmd)
-def filter_renamed(manifest, renamed):
- """
- If we base from a revision that has already done
- the move, git-fast-import will complain that the file
- has been already moved
- """
- if len(renamed) == 0:
- return renamed
-
- for line in manifest:
- if line[0] == "file":
- renamed = filter(lambda (to,from_,manifest): to != line[1], renamed)
-
- return renamed
-
def get_git_date(revision):
"""
Convert the "date" cert of monotone to a time understandable by git. No timezone
@@ -127,135 +111,90 @@ def get_git_date(revision):
dt = datetime.datetime.strptime(revision["date"], "%Y-%m-%dT%H:%M:%S").strftime("%a, %d %b %Y %H:%M:%S +0000")
return dt
-def recursively_delete(ops, manifest, revision, dir_name, to_delete):
- """
- Recursively delete all files that dir_name inside the name
- """
- for line in manifest:
- if line[0] == "dir" or line[0] == "file":
- if line[1].startswith(dir_name):
- print >> sys.stderr, "Deleting '%s'" % line[1]
- to_delete.add((line[1], revision))
- elif line[0] in ["format_version"]:
- assert(line[1] == "1")
- else:
- print >> sys.stderr, line[0]
- assert(False)
+def is_executable_attribute_set(attributes, rev):
+ assert(len(attributes) % 3 == 0), rev
- return to_delete
+ if len(attributes) >= 3:
+ for i in range(0, len(attributes)%3+1):
+ if attributes[i] == "attr" and attributes[i+1] == "mtn:execute" and attributes[i+2] == "true":
+ return True
+ return False
-def recursively_rename(ops, manifest, revision, old_name, new_name, to_add_dirs, to_add_files, to_remove_items, files_deleted, files_sticky):
- """
- mtn has a rename command and can rename entrie directories. For git we will have to do the recursive renaming
- ourselves. Basicly we will get all files and replace old_name with new_name but only:
- If the file of the old_manifest is not in our to be deleted list
- """
- old_dir = old_name + "/"
- for line in manifest:
- if line[1].startswith(old_dir) or line[1] == old_name:
- already_handled = False
- for (deleted,_) in files_deleted:
- if line[1] == deleted:
- already_handled = True
- break
-
- # Don't rename files that should be in the same directory
- if line[1] in files_sticky:
- already_handled = True
-
- if already_handled:
- pass
- elif line[0] == "file":
- print >> sys.stderr, "Will add '%s' old: '%s' new: '%s' => result: '%s'" % (line[1], old_name, new_name, line[1].replace(old_name, new_name, 1))
- to_add_files.add((line[1].replace(old_name, new_name, 1), None, revision))
- elif line[0] == "dir":
- to_add_dirs.add((line[1].replace(old_name, new_name, 1), revision))
- elif line[0] in ["format_version"]:
- assert(line[1] == "1")
- else:
- print >> sys.stderr, line[0]
- assert(False)
-
- return (to_add_files, to_add_dirs)
+def build_tree(manifest, rev):
+ """Assemble a filesystem tree from a given manifest"""
-#
-# We need to recursively rename the directories. Now the difficult part is to undo certain operations.
-#
-# e.g we rename the whole dir and then rename a file back. We could revive a directory that was marked
-# for deletion.
-#
-# rename "weird/two/three"
-# to "unweird/four"
-#
-# rename "weird/two/three/four"
-# to "weird/two/three"
-#
-# Here we would schedule weird/two/three for deletion but then revive it again. So three does not
-# get copied to unweird/four/three
-# """
-def recursively_rename_directories(ops, manifests, rename_commands, files_deleted, files_moved_sticky):
- to_add_directories = set()
- to_add_files = set()
- to_remove_items = set()
-
- for (old_name, new_name, old_revision) in rename_commands:
- # Check if we have the above case and rename a more specific directory
- # and then we will alter the result...
- inner_rename = False
- for (other_old_name, other_new_name, other_rev) in rename_commands:
- if old_name.startswith(other_old_name + "/") and other_old_name != old_name:
- inner_rename = True
- print >> sys.stderr, "Inner rename detected", old_name, other_old_name
- # Fixup the renaming
- def rename(filename, filerev, rev, would_be_new_name):
- if filename.startswith(would_be_new_name + "/"):
- return filename.replace(would_be_new_name, new_name, 1), filerev, rev
- return filename, filerev, rev
-
- would_be_new_name = other_new_name + "/" + old_name[len(other_old_name)+1:]
- to_remove_items = set(filter(lambda (item,_): item != new_name, to_remove_items))
- to_add_directories = set(filter(lambda (item,_): item != would_be_new_name, to_add_directories))
- to_add_directories.add((new_name, old_revision))
- to_add_files = set(map(lambda (fn, fr, r): rename(fn, fr, r, would_be_new_name), to_add_files))
-
- if not inner_rename:
- to_remove_items.add((old_name, old_revision))
- recursively_delete(ops, manifests[old_revision], old_revision, old_name + "/", to_remove_items)
- recursively_rename(ops, manifests[old_revision], old_revision, old_name, new_name, to_add_directories, to_add_files, to_remove_items, files_deleted, files_moved_sticky)
-
- return (to_add_directories, to_add_files, to_remove_items)
-
-
-def build_tree(manifest):
- dirs = {}
- files = {}
+ class tree:
+ def __init__(self):
+ self.dirs = {}
+ self.files= {}
+
+ tree = tree()
for line in manifest:
if line[0] == "file":
- files[line[1]] = (line[3],line[4:])
+ tree.files[line[1]] = (line[3], is_executable_attribute_set(line[4:], rev))
elif line[0] == "dir":
- dirs[line[1]] = 1
+ tree.dirs[line[1]] = 1
elif line[0] != "format_version":
- print >> sys.stderr, line[0]
- assert(False)
- return (dirs,files)
+ assert(False), "Rev: %s: Line[0]: '%s'" % (rev, line[0])
-def compare_with_manifest(all_added, all_modified, all_deleted, new_manifest, old_manifests):
- """
- Sanity check that the difference between the old and the new manifest is the one
- we have in all_added, all_modified, all_deleted
- """
- old_trees = {}
- really_added = {}
- really_modified = {}
- really_removed = {}
+ return tree
+
+def get_and_cache_tree(ops, revision):
+ """Simple FIFO to cache a number of trees"""
+ global cached_tree, cached_fifo
+
+ if revision in cached_tree:
+ return cached_tree[revision]
- current_dirs, current_files = build_tree(new_manifest)
+ tree = build_tree([line for line in ops.get_manifest_of(revision)], revision)
+ cached_tree[revision] = tree
+ cached_fifo.append(revision)
+
+ # Shrink
+ if len(cached_fifo) > 100:
+ old_name = cached_fifo[0]
+ cached_fifo = cached_fifo[1:]
+ del cached_tree[old_name]
+
+ return tree
+
+def diff_manifest(old_tree, new_tree):
+ """Find additions, modifications and deletions"""
+ added = set()
+ modified = set()
+ deleted = set()
+
+ # Removed dirs
+ for dir in old_tree.dirs.keys():
+ if not dir in new_tree.dirs:
+ deleted.add((dir,True))
+
+ # New dirs
+ for dir in new_tree.dirs.keys():
+ if not dir in old_tree.dirs:
+ added.add(dir)
+
+ # Deleted files
+ for file in old_tree.files.keys():
+ if not file in new_tree.files:
+ deleted.add((file,False))
+
+ # Added files, goes to modifications
+ for file in new_tree.files.keys():
+ if not file in old_tree.files:
+ modified.add((file, new_tree.files[file][0]))
+ continue
+
+ # The file changed, either contents or executable attribute
+ old = old_tree.files[file]
+ new = new_tree.files[file]
+ if old != new:
+ modified.add((file, new_tree.files[file][0]))
+
- for parent in old_manifests.keys():
- old_trees[parent] = build_tree(old_manifests[parent])
+ return (added, modified, deleted)
- print >> sys.stderr, len(old_manifests)
def fast_import(ops, revision):
"""Import a revision into git using git-fast-import.
@@ -292,27 +231,28 @@ def fast_import(ops, revision):
return
# Use the manifest to find dirs and files
- manifest = [line for line in ops.get_manifest_of(revision["revision"])]
- manifests = {}
- dirs = {}
- for parent in revision["parent"]:
- manifests[parent] = [line for line in ops.get_manifest_of(parent)]
- for line in manifests[parent]:
- if line[0] == "dir":
- if not parent in dirs:
- dirs[parent] = {}
- dirs[parent][line[1]] = 1
+ current_tree = get_and_cache_tree(ops, revision["revision"])
- # We can not just change the mode of a file but we need to modifiy the whole file. We
- # will simply add it to the modified list and ask to retrieve the status from the manifest
- for (file, attribute, value, rev) in revision["set_attributes"]:
- if attribute == "mtn:execute":
- revision["modified"].append((file, None, rev))
- for (file, attribute, rev) in revision["clear_attributes"]:
- if attribute == "mtn:execute":
- revision["modified"].append((file, None, rev))
+ all_added = set()
+ all_modifications = set()
+ all_deleted = set()
+
+ # Now diff the manifests
+ for parent in revision["parent"]:
+ (added, modified, deleted) = diff_manifest(get_and_cache_tree(ops, parent), current_tree)
+ all_added = all_added.union(added)
+ all_modifications = all_modifications.union(modified)
+ all_deleted = all_deleted.union(deleted)
+ if len(revision["parent"]) == 0:
+ (added, modified, deleted) = diff_manifest(build_tree([],""), current_tree)
+ all_added = all_added.union(added)
+ all_modifications = all_modifications.union(modified)
+ all_deleted = all_deleted.union(deleted)
+ # TODO:
+ # Readd the sanity check to see if we deleted and modified an entry. This
+ # could probably happen if we have more than one parent (on a merge)?
cmd = []
cmd += ["commit refs/heads/%s" % branch]
@@ -322,97 +262,28 @@ def fast_import(ops, revision):
cmd += ["data %d" % len(revision["changelog"])]
cmd += ["%s" % revision["changelog"]]
- # Emulation for renaming. We will split them into two lists
- file_renamed_del = set()
- file_renamed_new = set()
- file_moved_sticky = set()
-
if len(revision["parent"]) != 0:
cmd += ["from :%s" % get_mark(revision["parent"][0])]
- renamed = revision["renamed"]
-
- to_rename_directories = []
- for (new_name, old_name, old_revision) in renamed:
- # 24cba5923360fef7c5cc81d51000e30b90355eb9 is a rev where src == dest but the
- # directory got renamed, so this means this file got added to the new directory
- # TODO, XXX, FIXME check if this can be done for directories as well
- if new_name == old_name and not old_name in dirs[old_revision]:
- print >> sys.stderr, "Bogus rename in %s (%s, %s)?" % (revision["revision"], new_name, old_name)
- file_moved_sticky.add(old_name)
-
- # Check if the old_name was a directory in the old manifest
- # If we rename a directory we will need to recursively remove and recursively
- # add...
- # Add the '/' otherwise we might rename the wrong directory which shares the
- # same prefix.
- # fca159c5c00ae4158c289f5aabce995378d4e41b is quite funny. It renames a directory
- # and then renames another directory within the renamed one and in the worse case
- # we will revive a deleted directory, file...
- elif old_name in dirs[old_revision]:
- print >> sys.stderr, "Detected directory rename '%s' => '%s'" % (old_name, new_name)
- assert(old_revision in manifests)
- to_rename_directories.append((old_name, new_name, old_revision))
- else:
- print >> sys.stderr, "Renaming %s => %s" % (old_name, new_name)
- file_renamed_new.add((new_name, None, revision["revision"]))
- file_renamed_del.add((old_name, old_revision))
# The first parent is our from.
for parent in revision["parent"][1:]:
cmd += ["merge :%s" % get_mark(parent)]
- # Do the renaming now
- (renamed_dirs, renamed_new, renamed_old) = recursively_rename_directories(ops, manifests, to_rename_directories, file_renamed_del.union(set(revision["removed"])), file_moved_sticky)
-
- # Sanity check, don't remove anything we modify
- all_added = set(revision["added_dirs"]).union(renamed_dirs)
- all_modifications = set(revision["modified"]).union(set(revision["added_files"])).union(renamed_new).union(file_renamed_new)
- all_deleted = set(revision["removed"]).union(renamed_old).union(file_renamed_del)
- all_deleted_new = all_deleted
-
- # Check if we delete and add at the same time
- for (deleted,rev) in all_deleted:
- for (added,_) in all_added:
- if added == deleted:
- print >> sys.stderr, "Added and Deleted", added, deleted
- all_deleted_new = set(filter(lambda (dele,_): dele != added, all_deleted_new))
- assert((added,rev) not in all_deleted_new)
-
- for (modified,_,_) in all_modifications:
- if modified == deleted:
- print >> sys.stderr, "Modified and Deleted", modified, deleted
- all_deleted_new = set(filter(lambda (dele,_): dele != modified, all_deleted_new))
- assert((modified,rev) not in all_deleted_new)
-
- # Filtered list of to be deleted items
- all_deleted = all_deleted_new
- # Check if we delete but the manifest has a file like this
- for line in manifest:
- if line[0] == "dir" or line[0] == "file":
- for (deleted,rev) in all_deleted:
- if line[1] == deleted:
- # 91da98265a39c93946e00adf5d7bf92b341de847 of mtn has a delete + rename
- print >> sys.stderr, "Trying to delete a file which is in the new manifest", line[1], deleted
- assert(False)
-
- compare_with_manifest(all_added, all_modifications, all_deleted, manifest, manifests)
-
- for (dir_name, rev) in all_added:
+ for dir_name in all_added:
cmd += ["M 644 inline %s" % os.path.join(dir_name, ".mtn2git_empty")]
cmd += ["data <<EOF"]
cmd += ["EOF"]
cmd += [""]
- for (file_name, file_revision, rev) in all_modifications:
- (mode, file) = get_file_and_mode(ops, manifest, file_name, file_revision, revision["revision"])
+ for (file_name, file_revision) in all_modifications:
+ (mode, file) = get_file_and_mode(ops, current_tree, file_name, file_revision, revision["revision"])
cmd += ["M %d inline %s" % (mode, file_name)]
cmd += ["data %d" % len(file)]
cmd += ["%s" % file]
- for (path, rev) in all_deleted:
- assert(rev in dirs)
- if path in dirs[rev]:
+ for (path, is_dir) in all_deleted:
+ if is_dir:
cmd += ["D %s" % os.path.join(path, ".mtn2git_empty")]
else:
cmd += ["D %s" % path]
@@ -428,33 +299,20 @@ def is_trusted(operations, revision):
return False
return True
-def get_file_and_mode(operations, manifest, file_name, _file_revision, rev = None):
- mode = 644
+def get_file_and_mode(operations, file_tree, file_name, _file_revision, rev = None):
+ assert file_name in file_tree.files, "get_file_and_mode: Revision '%s', file_name='%s' " % (rev, file_name)
- file_revision = None
- for line in manifest:
- if line[0] == "file" and line[1] == file_name:
- assert(line[1] == file_name)
- assert(line[2] == "content")
-
- if _file_revision:
- assert(line[3] == _file_revision)
- file_revision = line[3]
-
- attributes = line[4:]
- assert(len(attributes) % 3 == 0)
- if len(attributes) >= 3:
- for i in range(0, len(attributes)%3+1):
- if attributes[i] == "attr" and attributes[i+1] == "mtn:execute" and attributes[i+2] == "true":
- mode = 755
- break
+ (file_revision, executable) = file_tree.files[file_name]
+ if _file_revision:
+ assert _file_revision == file_revision, "Same filerevision for file_name='%s' in rev='%s' (%s,%s)" % (file_name, rev, file_revision, _file_revision)
- assert(file_revision)
- file = "".join([file for file in operations.get_file(file_revision)])
- return (mode, file)
+ if executable:
+ mode = 755
+ else:
+ mode = 644
- print >> sys.stderr, file_name, rev
- assert(False)
+ file = "".join([file for file in operations.get_file(file_revision)])
+ return (mode, file)
def parse_revision(operations, revision):
@@ -491,20 +349,12 @@ def parse_revision(operations, revision):
old_rev = line[1]
elif line[0] == "new_manifest":
revision_description["manifest"] = line[1]
- elif line[0] == "rename":
- revision_description["renamed"].append((line[3], line[1], old_rev))
- elif line[0] == "patch":
- revision_description["modified"].append((line[1], line[5], old_rev))
- elif line[0] == "delete":
- revision_description["removed"].append((line[1], old_rev))
- elif line[0] == "add_dir":
- revision_description["added_dirs"].append((line[1], old_rev))
- elif line[0] == "add_file":
- revision_description["added_files"].append((line[1], line[3], old_rev))
elif line[0] == "clear":
revision_description["clear_attributes"].append((line[1], line[3], old_rev))
elif line[0] == "set":
revision_description["set_attributes"].append((line[1], line[3], line[5], old_rev))
+ elif line[0] in ["rename", "patch", "delete", "add_dir", "add_file"]:
+ pass
else:
print >> sys.stderr, line
assert(False)
@@ -519,7 +369,7 @@ def parse_revision(operations, revision):
revision_description["committer"] = cert[1]
else:
print >> sys.stderr, "Unknown Cert: Ignoring", cert[5], cert[7]
- assert(False)
+ #assert(False)
return revision_description
@@ -565,25 +415,31 @@ def main(mtn_cli, db, rev):
ops.automate.stop()
all_revs = []
+ branch_heads = {}
for branch in branches:
heads = [head for head in ops.heads(branch)]
+ if len(heads) != 1:
+ print >> sys.stderr, "Skipping branch '%s' due multiple heads" % (branch)
+ continue
+
if branch in status.former_heads:
old_heads = status.former_heads[branch]
else:
old_heads = []
for head in heads:
+ print >> sys.stderr, old_heads, head
all_revs += ops.ancestry_difference(head, old_heads)
status.former_heads[branch] = heads
+
sorted_revs = [rev for rev in ops.toposort(all_revs)]
for rev in sorted_revs:
if has_mark(rev):
- print >> sys.stderr, "Already having commit '%s'" % rev
+ print >> sys.stderr, "B: Already having commit '%s'" % rev
else:
print >> sys.stderr, "Going to import revision ", rev
fast_import(ops, parse_revision(ops, rev))
-
if __name__ == "__main__":
import optparse
@@ -608,3 +464,4 @@ if __name__ == "__main__":
print >> sys.stderr, "Failed to open the status file"
main(options.mtn, options.database, options.rev)
status.store(options.status)
+