[PATCH] revlog experiment

Chris Mason mason at suse.com
Thu Mar 2 19:25:22 CST 2006


Hello everyone,

This patch is still pretty raw, but it implements revlog-ng and delta-vs-parent and packed data files for hg.  This basically creates
three revlog formats.  v0 (current hg) v1 (Matt's revlog-ng) and 
v2 (revlog-ng + delta vs parent).

When v2 is used, packed files are used wherever possible.  More
tuning can be added later to make packing more optional, but the patch
is big enough already.

All three formats can be used side by side in the same repo.  Older
versions of hg will not be able to read repos created with v1 or v2, so
please do not use this on data you care about.  Matt hasn't acked any
of these changes and the formats are not finalized.

The main goal of posting here is for benchmarking the delta vs parent and
packed file changes.  In my tests here, hot cache usage is a little slower
and cold cache is much faster.  Much of the hot cache cost is going into
figuring out if a packed log should be used or not, basically I need
to optimize my new filelog.islog(), and improve caching of the biglogs
during hg checkout.

By default, this patch creates v0 files.  I've added new .hgrc options:

[revlog]
# if nothing is specified here, the default is 0
# make all files use v2 (packed files) by default# 
default=2

# the changelog and the manifest use v1 (delta vs tip) by default
# this makes cloning and other common operations much much faster
# the manifest and changelog will use the default above if not specified
manifest=1
changelog=1

You convert repos between formats with hg clone --pull.  This patch is against
Matt's tip as of tonight (changeset:   1808:7036cd7f770d)

-chris

diff -r 7036cd7f770d mercurial/changelog.py
--- a/mercurial/changelog.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/changelog.py	Thu Mar  2 19:47:33 2006 -0500
@@ -11,8 +11,9 @@ demandload(globals(), "os time util")
 demandload(globals(), "os time util")
 
 class changelog(revlog):
-    def __init__(self, opener):
-        revlog.__init__(self, opener, "00changelog.i", "00changelog.d")
+    def __init__(self, opener, defversion=0):
+        revlog.__init__(self, opener, "00changelog.i", "00changelog.d",
+                        defversion)
 
     def extract(self, text):
         if not text:
diff -r 7036cd7f770d mercurial/commands.py
--- a/mercurial/commands.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/commands.py	Thu Mar  2 19:47:33 2006 -0500
@@ -997,7 +997,7 @@ def copy(ui, repo, *pats, **opts):
 
 def debugancestor(ui, index, rev1, rev2):
     """find the ancestor revision of two revisions in a given index"""
-    r = revlog.revlog(util.opener(os.getcwd()), index, "")
+    r = revlog.revlog(util.opener(os.getcwd()), index, "", 0)
     a = r.ancestor(r.lookup(rev1), r.lookup(rev2))
     ui.write("%d:%s\n" % (r.rev(a), hex(a)))
 
@@ -1083,7 +1083,7 @@ def debugstate(ui, repo):
 
 def debugdata(ui, file_, rev):
     """dump the contents of an data file revision"""
-    r = revlog.revlog(util.opener(os.getcwd()), file_[:-2] + ".i", file_)
+    r = revlog.revlog(util.opener(os.getcwd()), file_[:-2] + ".i", file_, 0)
     try:
         ui.write(r.revision(r.lookup(rev)))
     except KeyError:
@@ -1091,7 +1091,7 @@ def debugdata(ui, file_, rev):
 
 def debugindex(ui, file_):
     """dump the contents of an index file"""
-    r = revlog.revlog(util.opener(os.getcwd()), file_, "")
+    r = revlog.revlog(util.opener(os.getcwd()), file_, "", 0)
     ui.write("   rev    offset  length   base linkrev" +
              " nodeid       p1           p2\n")
     for i in range(r.count()):
@@ -1102,7 +1102,7 @@ def debugindex(ui, file_):
 
 def debugindexdot(ui, file_):
     """dump an index DAG as a .dot file"""
-    r = revlog.revlog(util.opener(os.getcwd()), file_, "")
+    r = revlog.revlog(util.opener(os.getcwd()), file_, "", 0)
     ui.write("digraph G {\n")
     for i in range(r.count()):
         e = r.index[i]
@@ -1130,6 +1130,20 @@ def debugrename(ui, repo, file, rev=None
         ui.write(_("renamed from %s:%s\n") % (m[0], hex(m[1])))
     else:
         ui.write(_("not renamed\n"))
+
+def debugspeed(ui, repo, count=500):
+    mf = repo.manifest
+    ar = range(0, mf.count())
+    import random
+    rr = random.Random()
+    count = int(count)
+    while ar and count > 0:
+        i = rr.randint(0, len(ar) - 1)
+        print i
+        t = repo.manifest.read(repo.manifest.node(i))
+        del ar[i]
+        count = count - 1
+        repo.manifest.cache[0] == revlog.nullid
 
 def debugwalk(ui, repo, *pats, **opts):
     """show how files match on given patterns"""
@@ -2419,6 +2433,7 @@ table = {
     "debugdata": (debugdata, [], _('debugdata FILE REV')),
     "debugindex": (debugindex, [], _('debugindex FILE')),
     "debugindexdot": (debugindexdot, [], _('debugindexdot FILE')),
+    "debugspeed": (debugspeed, [], _('debugspeed')),
     "debugrename": (debugrename, [], _('debugrename FILE [REV]')),
     "debugwalk":
         (debugwalk,
diff -r 7036cd7f770d mercurial/filelog.py
--- a/mercurial/filelog.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/filelog.py	Thu Mar  2 19:47:33 2006 -0500
@@ -11,10 +11,37 @@ demandload(globals(), "bdiff")
 demandload(globals(), "bdiff")
 
 class filelog(revlog):
-    def __init__(self, opener, path):
-        revlog.__init__(self, opener,
-                        os.path.join("data", self.encodedir(path + ".i")),
-                        os.path.join("data", self.encodedir(path + ".d")))
+    def __init__(self, join, opener, path, defversion=0):
+        self.join = join
+        self.indexfile = None
+        ip = self.indexname(path, defversion)
+        dp = ip[:-1] + 'd'
+        revlog.__init__(self, opener, ip, dp, defversion=defversion)
+
+    def indexname(self, path, version):
+        ip = os.path.join("data", self.encodedir(path + ".i"))
+        if os.path.exists(self.join(ip)):
+            return ip
+        packedip = ip
+        biglog = False
+        if not path.startswith('.hg'):
+            d = self.encodedir(path)
+            d = os.path.dirname(d)
+            packedip = os.path.join("data", d, "biglog.pi")
+            if (self.indexfile == packedip or
+                os.path.exists(self.join(packedip))):
+                biglog = True
+
+        # if the directory uses biglog, we have to keep using it
+        if biglog or version == REVLOGNGPARENTDELTA:
+            ip = packedip
+        return ip
+
+    def islog(self, path):
+        ip = self.indexname(path, self.version)
+        if ip == self.indexfile:
+            return True
+        return False
 
     # This avoids a collision between a file named foo and a dir named
     # foo.i or foo.d
diff -r 7036cd7f770d mercurial/localrepo.py
--- a/mercurial/localrepo.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/localrepo.py	Thu Mar  2 19:47:33 2006 -0500
@@ -33,8 +33,16 @@ class localrepository(object):
         self.ui = ui
         self.opener = util.opener(self.path)
         self.wopener = util.opener(self.root)
-        self.manifest = manifest.manifest(self.opener)
-        self.changelog = changelog.changelog(self.opener)
+        v = self.ui.revlogdefault
+        rd = 0
+        if 'default' in v:
+            rd = v['default']
+        if 'manifest' in v:
+            rd = v['manifest']
+        self.manifest = manifest.manifest(self.opener, rd)
+        if 'changelog' in v:
+            rd = v['changelog']
+        self.changelog = changelog.changelog(self.opener, rd)
         self.tagscache = None
         self.nodetagscache = None
         self.encodepats = None
@@ -170,10 +178,13 @@ class localrepository(object):
     def wjoin(self, f):
         return os.path.join(self.root, f)
 
-    def file(self, f):
+    def file(self, f, prev=None):
         if f[0] == '/':
             f = f[1:]
-        return filelog.filelog(self.opener, f)
+        if prev and prev.islog(f):
+            return prev
+        rd = self.ui.revlogdefault.get('default', 0)
+        return filelog.filelog(self.join, self.opener, f, rd)
 
     def getcwd(self):
         return self.dirstate.getcwd()
@@ -230,7 +241,7 @@ class localrepository(object):
         self.opener("journal.dirstate", "w").write(ds)
 
         tr = transaction.transaction(self.ui.warn, self.opener,
-                                       self.join("journal"), 
+                                       self.join("journal"),
                                        aftertrans(self.path))
         self.transhandle = tr
         return tr
@@ -337,11 +348,12 @@ class localrepository(object):
         mm = m1.copy()
         mfm = mf1.copy()
         linkrev = self.changelog.count()
+        r = None
         for f in files:
             try:
                 t = self.wread(f)
                 tm = util.is_exec(self.wjoin(f), mfm.get(f, False))
-                r = self.file(f)
+                r = self.file(f, r)
                 mfm[f] = tm
 
                 (entry, fp1, fp2) = self.checkfilemerge(f, t, r, m1, m2)
@@ -417,6 +429,7 @@ class localrepository(object):
         new = {}
         linkrev = self.changelog.count()
         commit.sort()
+        r = None
         for f in commit:
             self.ui.note(f + "\n")
             try:
@@ -426,7 +439,7 @@ class localrepository(object):
                 self.ui.warn(_("trouble committing %s!\n") % f)
                 raise
 
-            r = self.file(f)
+            r = self.file(f, r)
 
             meta = {}
             cp = self.dirstate.copied(f)
@@ -514,7 +527,8 @@ class localrepository(object):
 
         def fcmp(fn, mf):
             t1 = self.wread(fn)
-            t2 = self.file(fn).read(mf.get(fn, nullid))
+            lastfcmp = self.file(fn)
+            t2 = lastfcmp.read(mf.get(fn, nullid))
             return cmp(t1, t2)
 
         def mfmatches(node):
@@ -1247,8 +1261,9 @@ class localrepository(object):
             changedfiles = changedfiles.keys()
             changedfiles.sort()
             # Go through all our files in order sorted by name.
+            filerevlog = None
             for fname in changedfiles:
-                filerevlog = self.file(fname)
+                filerevlog = self.file(fname, filerevlog)
                 # Toss out the filenodes that the recipient isn't really
                 # missing.
                 if msng_filenode_set.has_key(fname):
@@ -1328,8 +1343,9 @@ class localrepository(object):
             for chnk in mnfst.group(nodeiter, lookuprevlink_func(mnfst)):
                 yield chnk
 
+            filerevlog = None
             for fname in changedfiles:
-                filerevlog = self.file(fname)
+                filerevlog = self.file(fname, filerevlog)
                 nodeiter = gennodelst(filerevlog)
                 nodeiter = list(nodeiter)
                 if nodeiter:
@@ -1400,12 +1416,13 @@ class localrepository(object):
 
         # process the files
         self.ui.status(_("adding file changes\n"))
+        fl = None
         while 1:
             f = getchunk()
             if not f:
                 break
             self.ui.debug(_("adding %s revisions\n") % f)
-            fl = self.file(f)
+            fl = self.file(f, fl)
             o = fl.count()
             n = fl.addgroup(getgroup(), revmap, tr)
             revisions += fl.count() - o
@@ -1451,6 +1468,7 @@ class localrepository(object):
         mf2 = self.manifest.readflags(m2n)
         ma = self.manifest.read(man)
         mfa = self.manifest.readflags(man)
+        prevfp = None
 
         modified, added, removed, deleted, unknown = self.changes()
 
@@ -1468,7 +1486,8 @@ class localrepository(object):
             for f in unknown:
                 if f in m2:
                     t1 = self.wread(f)
-                    t2 = self.file(f).read(m2[f])
+                    prevfp = self.file(f, prevfp)
+                    t2 = prevfp.read(m2[f])
                     if cmp(t1, t2) != 0:
                         raise util.Abort(_("'%s' already exists in the working"
                                            " dir and differs from remote") % f)
@@ -1519,7 +1538,8 @@ class localrepository(object):
                 # is the wfile new since m1, and match m2?
                 if f not in m1:
                     t1 = self.wread(f)
-                    t2 = self.file(f).read(m2[f])
+                    prevfp = self.file(f, prevfp)
+                    t2 = prevfp.read(m2[f])
                     if cmp(t1, t2) == 0:
                         n = m2[f]
                     del t1, t2
@@ -1643,7 +1663,8 @@ class localrepository(object):
             if f[0] == "/":
                 continue
             self.ui.note(_("getting %s\n") % f)
-            t = self.file(f).read(get[f])
+            prevfp = self.file(f, prevfp)
+            t = prevfp.read(get[f])
             self.wwrite(f, t)
             util.set_exec(self.wjoin(f), mf2[f])
             if moddirstate:
@@ -1673,7 +1694,9 @@ class localrepository(object):
                     # of that file some time in the past. Thus our
                     # merge will appear as a normal local file
                     # modification.
-                    f_len = len(self.file(f).read(other))
+
+                    prevfp = self.file(f, prevfp)
+                    f_len = len(prevfp.read(other))
                     self.dirstate.update([f], 'n', st_size=f_len, st_mtime=-1)
 
         remove.sort()
@@ -1829,11 +1852,17 @@ class localrepository(object):
         self.ui.status(_("checking files\n"))
         ff = filenodes.keys()
         ff.sort()
+        fl = None
         for f in ff:
             if f == "/dev/null":
                 continue
+
+            # skip files packed files we've already done
+            if fl and fl.islog(f):
+                continue
+
             files += 1
-            fl = self.file(f)
+            fl = self.file(f, fl)
             checksize(fl, f)
 
             nodes = {nullid: 1}
@@ -1844,17 +1873,17 @@ class localrepository(object):
 
                 if n in seen:
                     err(_("%s: duplicate revision %d") % (f, i))
-                if n not in filenodes[f]:
+                if n in filenodes[f]:
+                    del filenodes[f][n]
+                elif not fl.deltavparent():
                     err(_("%s: %d:%s not in manifests") % (f, i, short(n)))
-                else:
-                    del filenodes[f][n]
 
                 flr = fl.linkrev(n)
-                if flr not in filelinkrevs[f]:
+                if flr in filelinkrevs[f]:
+                    filelinkrevs[f].remove(flr)
+                elif not fl.deltavparent():
                     err(_("%s:%s points to unexpected changeset %d")
                             % (f, short(n), flr))
-                else:
-                    filelinkrevs[f].remove(flr)
 
                 # verify contents
                 try:
diff -r 7036cd7f770d mercurial/manifest.py
--- a/mercurial/manifest.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/manifest.py	Thu Mar  2 19:47:33 2006 -0500
@@ -12,10 +12,11 @@ demandload(globals(), "bisect array")
 demandload(globals(), "bisect array")
 
 class manifest(revlog):
-    def __init__(self, opener):
+    def __init__(self, opener, defversion=0):
         self.mapcache = None
         self.listcache = None
-        revlog.__init__(self, opener, "00manifest.i", "00manifest.d")
+        revlog.__init__(self, opener, "00manifest.i", "00manifest.d", 
+                        defversion)
 
     def read(self, node):
         if node == nullid: return {} # don't upset local cache
@@ -164,7 +165,7 @@ class manifest(revlog):
             cachedelta = addlistdelta(addlist, delta)
 
             # the delta is only valid if we've been processing the tip revision
-            if self.mapcache[0] != self.tip():
+            if self.mapcache[0] != self.tip() or self.mapcache[0] != p1:
                 cachedelta = None
             self.listcache = addlist
 
diff -r 7036cd7f770d mercurial/revlog.py
--- a/mercurial/revlog.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/revlog.py	Thu Mar  2 19:47:33 2006 -0500
@@ -14,6 +14,11 @@ from i18n import gettext as _
 from i18n import gettext as _
 from demandload import demandload
 demandload(globals(), "binascii errno heapq mdiff os sha struct zlib")
+
+# revlog version strings
+REVLOGV0 = 0
+REVLOGNG = 1
+REVLOGNGPARENTDELTA = 2
 
 def hash(text, p1, p2):
     """generate a hash from the given text and its parent hashes
@@ -50,7 +55,19 @@ def decompress(bin):
     if t == 'u': return bin[1:]
     raise RevlogError(_("unknown compression type %s") % t)
 
-indexformat = ">4l20s20s20s"
+indexformatv0 = ">4l20s20s20s"
+# index ng:
+# 6 bytes offset
+# 2 bytes flags
+# 4 bytes compressed length
+# 4 bytes uncompressed length
+# 4 bytes: base rev
+# 4 bytes link rev
+# 4 bytes parent 1 rev
+# 4 bytes parent 2 rev
+# 32 bytes: nodeid
+indexformatng = ">6shiiiiii32s"
+versionformat = ">2h"
 
 class lazyparser(object):
     """
@@ -64,7 +81,7 @@ class lazyparser(object):
     """
     def __init__(self, data, revlog):
         self.data = data
-        self.s = struct.calcsize(indexformat)
+        self.s = struct.calcsize(revlog.indexformat)
         self.l = len(data)/self.s
         self.index = [None] * self.l
         self.map = {nullid: -1}
@@ -89,9 +106,10 @@ class lazyparser(object):
 
         while i < end:
             d = self.data[i * self.s: (i + 1) * self.s]
-            e = struct.unpack(indexformat, d)
+            e = self.revlog.unpack(d)
             self.index[i] = e
-            self.map[e[6]] = i
+            n = self.revlog.node(i)
+            self.map[n] = i
             i += 1
 
 class lazyindex(object):
@@ -132,10 +150,10 @@ class lazymap(object):
         yield nullid
         for i in xrange(self.p.l):
             try:
-                yield self.p.index[i][6]
+                yield self.p.revlog.node(i)
             except:
                 self.p.load(i)
-                yield self.p.index[i][6]
+                yield self.p.revlog.node(i)
     def __getitem__(self, key):
         try:
             return self.p.map[key]
@@ -152,6 +170,9 @@ class lazymap(object):
 
 class RevlogError(Exception): pass
 
+def revlog(opener, indexfile, datafile):
+    return revlogv1(opener, indexfile, datafile, fp, version)
+
 class revlog(object):
     """
     the underlying revision storage object
@@ -177,7 +198,7 @@ class revlog(object):
     remove data, and can use some simple techniques to avoid the need
     for locking while reading.
     """
-    def __init__(self, opener, indexfile, datafile):
+    def __init__(self, opener, indexfile, datafile, defversion=0):
         """
         create a revlog object
 
@@ -191,15 +212,21 @@ class revlog(object):
         self.indexstat = None
         self.cache = None
         self.chunkcache = None
+        self.defversion = defversion
         self.load()
 
     def load(self):
         try:
             f = self.opener(self.indexfile)
+            i = f.read(4)
+            if len(i) != 4:
+                i = ""
         except IOError, inst:
             if inst.errno != errno.ENOENT:
                 raise
             i = ""
+            f = None
+            v = self.defversion
         else:
             try:
                 st = os.fstat(f.fileno())
@@ -213,11 +240,27 @@ class revlog(object):
                     and st.st_ctime == oldst.st_ctime):
                     return
             self.indexstat = st
+            if len(i) > 0:
+                v, cb = struct.unpack(versionformat, i)
+                if v == 0:
+                    f.seek(0, 0)
+                    self.indexformat = indexformatv0
+                elif v != 1 and v != 2:
+                    raise RevlogError(_("unknown version format %d on %s") % (v,
+                                        self.indexfile))
+            else:
+                v = self.defversion
+        self.version = v
+        if v == 0:
+            self.indexformat = indexformatv0
+        else:
+            self.indexformat = indexformatng
+        self.setupops()
+
+        if f:
             i = f.read()
-
-        if i and i[:4] != "\0\0\0\0":
-            raise RevlogError(_("incompatible revlog signature on %s") %
-                              self.indexfile)
+        else:
+            i = ""
 
         if len(i) > 10000:
             # big index, let's parse it on demand
@@ -225,7 +268,7 @@ class revlog(object):
             self.index = lazyindex(parser)
             self.nodemap = lazymap(parser)
         else:
-            s = struct.calcsize(indexformat)
+            s = struct.calcsize(self.indexformat)
             l = len(i) / s
             self.index = [None] * l
             m = [None] * l
@@ -233,31 +276,61 @@ class revlog(object):
             n = 0
             for f in xrange(0, l * s, s):
                 # offset, size, base, linkrev, p1, p2, nodeid
-                e = struct.unpack(indexformat, i[f:f + s])
-                m[n] = (e[6], n)
+                e = self.unpack(i[f:f + s])
                 self.index[n] = e
+                node = self.node(n)
+                m[n] = (node, n)
                 n += 1
 
             self.nodemap = dict(m)
             self.nodemap[nullid] = -1
 
+    def setupops(self):
+        if self.version == 0:
+            self.node = self.nodev0
+            self.linkrev = self.linkrevv0
+            self.parents = self.parentsv0
+            self.length = self.lengthv0
+            self.base = self.basev0
+            self.start = self.startv0
+        else:
+            self.node = self.nodeng
+            self.linkrev = self.linkrevng
+            self.parents = self.parentsng
+            self.length = self.lengthng
+            self.base = self.baseng
+            self.start = self.startng
+
+    def unpack(self, d):
+        return struct.unpack(self.indexformat, d)
+
+    def deltavparent(self): return self.version == REVLOGNGPARENTDELTA
     def tip(self): return self.node(len(self.index) - 1)
     def count(self): return len(self.index)
-    def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
+    def nodev0(self, rev): return (rev < 0) and nullid or self.index[rev][6]
+    def nodeng(self, rev): return (rev < 0) and nullid or self.index[rev][8][12:]
     def rev(self, node):
         try:
             return self.nodemap[node]
         except KeyError:
             raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
-    def linkrev(self, node): return self.index[self.rev(node)][3]
-    def parents(self, node):
+    def linkrevv0(self, node): return self.index[self.rev(node)][3]
+    def linkrevng(self, node): return self.index[self.rev(node)][5]
+    def parentsv0(self, node):
         if node == nullid: return (nullid, nullid)
         return self.index[self.rev(node)][4:6]
-
-    def start(self, rev): return self.index[rev][0]
-    def length(self, rev): return self.index[rev][1]
+    def parentsng(self, node):
+        if node == nullid: return (nullid, nullid)
+        d = self.index[self.rev(node)][6:8]
+        return [ self.node(x) for x in d ]
+
+    def startng(self, rev): return self.bin2ngoffset(self.index[rev][0])
+    def startv0(self, rev): return self.index[rev][0]
+    def lengthv0(self, rev): return self.index[rev][1]
+    def lengthng(self, rev): return self.index[rev][2]
     def end(self, rev): return self.start(rev) + self.length(rev)
-    def base(self, rev): return self.index[rev][2]
+    def basev0(self, rev): return self.index[rev][2]
+    def baseng(self, rev): return self.index[rev][4]
 
     def reachable(self, rev, stop=None):
         reachable = {}
@@ -496,18 +569,20 @@ class revlog(object):
         """apply a list of patches to a string"""
         return mdiff.patches(t, pl)
 
-    def chunk(self, rev):
+    def chunk(self, rev, df=None):
         start, length = self.start(rev), self.length(rev)
         end = start + length
 
-        def loadcache():
-            cache_length = max(4096 * 1024, length) # 4Mo
-            df = self.opener(self.datafile)
+        def loadcache(df):
+            #cache_length = max(4096 * 1024, length) # 4Mo
+            cache_length = max(4096, length) # 4k
+            if not df:
+                df = self.opener(self.datafile)
             df.seek(start)
             self.chunkcache = (start, df.read(cache_length))
 
         if not self.chunkcache:
-            loadcache()
+            loadcache(df)
 
         cache_start = self.chunkcache[0]
         cache_end = cache_start + len(self.chunkcache[1])
@@ -515,7 +590,7 @@ class revlog(object):
             # it is cached
             offset = start - cache_start
         else:
-            loadcache()
+            loadcache(df)
             offset = 0
 
         #def checkchunk():
@@ -546,15 +621,31 @@ class revlog(object):
         base = self.base(rev)
 
         # do we have useful data cached?
-        if self.cache and self.cache[1] >= base and self.cache[1] < rev:
-            base = self.cache[1]
-            text = self.cache[2]
-        else:
-            text = self.chunk(base)
+        if not self.deltavparent():
+            if self.cache and self.cache[1] >= base and self.cache[1] < rev:
+                base = self.cache[1]
+                text = self.cache[2]
+            else:
+                text = self.chunk(base)
 
         bins = []
-        for r in xrange(base + 1, rev + 1):
-            bins.append(self.chunk(r))
+        if not self.deltavparent():
+            for r in xrange(base + 1, rev + 1):
+                bins.append(self.chunk(r))
+        else:
+            cr = rev
+            cc = []
+            c = node
+            while cr != base:
+                cc.append(cr)
+                c = self.parents(c)[0]
+                cr = self.rev(c)
+            cc.reverse()
+            df = self.opener(self.datafile)
+            if not text:
+                text = self.chunk(base)
+            bins = [ self.chunk(x, df) for x in cc ]
+            df.close()
 
         text = mdiff.patches(text, bins)
 
@@ -565,6 +656,15 @@ class revlog(object):
 
         self.cache = (node, rev, text)
         return text
+
+    def ngoffset2bin(self, off):
+        off = off & 0xFFFFFFFFFF
+        e = struct.pack(">q", off)
+        return e[2:]
+
+    def bin2ngoffset(self, bin):
+        e = '\0\0' + bin
+        return struct.unpack(">q", e)[0]
 
     def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
         """add a revision to the log
@@ -576,9 +676,12 @@ class revlog(object):
         d - an optional precomputed delta
         """
         if text is None: text = ""
+        deltap1 = p1
+        if deltap1 is None: deltap1 = nullid
         if p1 is None: p1 = self.tip()
         if p2 is None: p2 = nullid
 
+        deltavp = self.deltavparent()
         node = hash(text, p1, p2)
 
         if node in self.nodemap:
@@ -587,23 +690,48 @@ class revlog(object):
         n = self.count()
         t = n - 1
 
-        if n:
+        fullrev = False
+        dist = 0
+        if n == 0 or (deltavp and deltap1 == nullid):
+            fullrev = True
+        if not fullrev:
             base = self.base(t)
             start = self.start(base)
             end = self.end(t)
             if not d:
-                prev = self.revision(self.tip())
+                if deltavp:
+                    p = deltap1
+                else:
+                    p = self.tip()
+                prev = self.revision(p)
                 d = self.diff(prev, str(text))
             data = compress(d)
             l = len(data[1]) + len(data[0])
-            dist = end - start + l
+            if deltavp:
+                c = deltap1
+                cr = self.rev(c)
+                base = self.base(cr)
+                dist = l
+                while cr != base:
+                    dist += self.length(cr)
+                    c = self.parents(c)[0]
+                    ncr = self.rev(c)
+                    if c == nullid:
+                        base = self.base(cr)
+                        break
+                    cr = ncr
+                dist += self.length(base)
+            else:
+                dist = end - start + l
 
         # full versions are inserted when the needed deltas
         # become comparable to the uncompressed text
-        if not n or dist > len(text) * 2:
+        if fullrev or dist > len(text) * 2:
             data = compress(text)
             l = len(data[1]) + len(data[0])
             base = n
+        elif deltavp:
+            base = self.base(self.rev(deltap1))
         else:
             base = self.base(t)
 
@@ -611,19 +739,32 @@ class revlog(object):
         if t >= 0:
             offset = self.end(t)
 
-        e = (offset, l, base, link, p1, p2, node)
+        if self.version == 0:
+            e = (offset, l, base, link, p1, p2, node)
+        else:
+            e = (self.ngoffset2bin(offset), 0, l, len(text),
+                 base, link, self.rev(p1), self.rev(p2), '\0' * 12 + node)
 
         self.index.append(e)
         self.nodemap[node] = n
-        entry = struct.pack(indexformat, *e)
-
-        transaction.add(self.datafile, e[0])
+        entry = struct.pack(self.indexformat, *e)
+
+        transaction.add(self.datafile, offset)
         f = self.opener(self.datafile, "a")
         if data[0]:
             f.write(data[0])
         f.write(data[1])
-        transaction.add(self.indexfile, n * len(entry))
-        self.opener(self.indexfile, "a").write(entry)
+        addbytes = 0
+        if len(self.index) > 1 and self.defversion != 0:
+            addbytes = 2
+            
+        transaction.add(self.indexfile, n * len(entry) + addbytes)
+        fp = self.opener(self.indexfile, "a")
+        if len(self.index) == 1 and self.defversion != 0:
+            l = struct.pack(versionformat, self.defversion, self.defversion)
+            fp.write(l)
+
+        fp.write(entry)
 
         self.cache = (node, n, text)
         return node
@@ -716,7 +857,12 @@ class revlog(object):
                 infocollect(nb)
 
             # do we need to construct a new delta?
-            if a + 1 != b or self.base(b) == b:
+            needdelta = False
+            if a + 1 != b:
+                needdelta = True
+            elif self.deltavparent() and na != self.parents(nb)[0]:
+                needdelta = True
+            if needdelta or self.base(b) == b:
                 ta = self.revision(na)
                 tb = self.revision(nb)
                 d = self.diff(ta, tb)
@@ -749,14 +895,13 @@ class revlog(object):
         base = prev = -1
         start = end = measure = 0
         if r:
-            base = self.base(t)
-            start = self.start(base)
             end = self.end(t)
-            measure = self.length(base)
-            prev = self.tip()
-
         transaction.add(self.datafile, end)
-        transaction.add(self.indexfile, r * struct.calcsize(indexformat))
+        addbytes = 0
+        if self.version != 0 and r > 0:
+            addbytes = 2
+        transaction.add(self.indexfile, 
+                        r * struct.calcsize(self.indexformat) + addbytes)
         dfh = self.opener(self.datafile, "a")
         ifh = self.opener(self.indexfile, "a")
 
@@ -783,17 +928,24 @@ class revlog(object):
                 if not chain in self.nodemap:
                     raise RevlogError(_("unknown base %s") % short(chain[:4]))
 
+
             # full versions are inserted when the needed deltas become
             # comparable to the uncompressed text or when the previous
             # version is not the one we have a delta against. We use
             # the size of the previous full rev as a proxy for the
             # current size.
-
+            gooddelta = False
             if chain == prev:
+                gooddelta = True
+                if self.deltavparent() and (chain != p1 or p1 == nullid):
+                    gooddelta = False
+
+            if gooddelta:
                 tempd = compress(delta)
                 cdelta = tempd[0] + tempd[1]
 
-            if chain != prev or (end - start + len(cdelta)) > measure * 2:
+            if (not gooddelta or prev == -1 or
+               (end - start + len(cdelta)) > measure * 2):
                 # flush our writes here so we can read it in revision
                 dfh.flush()
                 ifh.flush()
@@ -804,11 +956,19 @@ class revlog(object):
                     raise RevlogError(_("consistency error adding group"))
                 measure = len(text)
             else:
-                e = (end, len(cdelta), base, link, p1, p2, node)
+                if not self.deltavparent():
+                    b = base
+                else:
+                    b = self.base(self.rev(p1))
+                if self.version == 0:
+                    e = (end, len(cdelta), b, link, p1, p2, node)
+                else:
+                    e = (self.ngoffset2bin(end), 0, len(cdelta), -1, b,
+                         link, self.rev(p1), self.rev(p2), '\0' * 12 + node)
                 self.index.append(e)
                 self.nodemap[node] = r
                 dfh.write(cdelta)
-                ifh.write(struct.pack(indexformat, *e))
+                ifh.write(struct.pack(self.indexformat, *e))
 
             t, r, chain, prev = r, r + 1, node, node
             base = self.base(t)
@@ -835,7 +995,7 @@ class revlog(object):
         # first truncate the files on disk
         end = self.start(rev)
         self.opener(self.datafile, "a").truncate(end)
-        end = rev * struct.calcsize(indexformat)
+        end = rev * struct.calcsize(self.indexformat)
         self.opener(self.indexfile, "a").truncate(end)
 
         # then reset internal state in memory to forget those revisions
@@ -869,9 +1029,12 @@ class revlog(object):
             f = self.opener(self.indexfile)
             f.seek(0, 2)
             actual = f.tell()
-            s = struct.calcsize(indexformat)
+            s = struct.calcsize(self.indexformat)
             i = actual / s
             di = actual - (i * s)
+            if self.version != 0:
+                # subtract four bytes for the revlog format field
+                di -= 4
         except IOError, inst:
             if inst.errno != errno.ENOENT:
                 raise
diff -r 7036cd7f770d mercurial/ui.py
--- a/mercurial/ui.py	Tue Feb 28 12:25:26 2006 -0600
+++ b/mercurial/ui.py	Thu Mar  2 19:47:33 2006 -0500
@@ -24,6 +24,7 @@ class ui(object):
 
         self.updateopts(verbose, debug, quiet, interactive)
         self.diffcache = None
+        self.revlogdefault = self.configrevlog()
 
     def updateopts(self, verbose=False, debug=False, quiet=False,
                  interactive=True):
@@ -77,6 +78,12 @@ class ui(object):
     def extensions(self):
         return self.configitems("extensions")
 
+    def configrevlog(self):
+        ret = {}
+        for x in self.configitems("revlog"):
+            k = x[0].lower()
+            ret[k] = int(x[1])
+        return ret
     def diffopts(self):
         if self.diffcache:
             return self.diffcache


More information about the Mercurial mailing list