I am testing netbeans repository with the following filemap:
"""
include websvc.restkit
"""
It should select a hundred files at most.
To get to the real issue faster, we can probably fast-path getchangedfiles() like:
diff --git a/hgext/convert/hg.py b/hgext/convert/hg.py
--- a/hgext/convert/hg.py
+++ b/hgext/convert/hg.py
@@ -280,12 +280,15 @@
def getchangedfiles(self, rev, i):
ctx = self.changectx(rev)
i = i or 0
- changes = self.repo.status(ctx.parents()[i].node(), ctx.node())[:3]
- if i == 0:
- self._changescache = (rev, changes)
-
- return changes[0] + changes[1] + changes[2]
+ parents = [p for p in ctx.parents() if p.node() != nullid]
+ if len(parents) < 2:
+ return ctx.files()
+ else:
+ changes = self.repo.status(ctx.parents()[i].node(), ctx.node())[:3]
+ if i == 0:
+ self._changescache = (rev, changes)
+ return changes[0] + changes[1] + changes[2]
def converted(self, rev, destrev):
if self.convertfp is None:
Running with --lsprof shows localrepo.status() is the bottleneck. To speed-up
things, I tried to pass the match function to getchangedfiles(), so we could
pass it to localrepo.status() directly (see passmatcher.diff). It is almost slower!
filemap matcher code is not really efficient. Just for testing, the matcher was
hard-coded like:
diff --git a/hgext/convert/filemap.py b/hgext/convert/filemap.py
--- a/hgext/convert/filemap.py
+++ b/hgext/convert/filemap.py
@@ -221,7 +221,7 @@
# indicated by i. If we're interested in any of these files,
# we're interested in rev.
try:
- match = self.filemapper
+ match = lambda f: f.startswith('websvc.restkit')
files = self.base.getchangedfiles(rev, i, match)
except NotImplementedError:
raise util.Abort(_("source repository doesn't support --filemap"))
Here is an interrupted run with this setup:
^C CallCount Recursive Total(ms) Inline(ms) module:lineno(function)
265865 0 22.3372 22.3372 <zlib.decompress>
140 0 31.4741 16.5301 mercurial.manifest:39(parse)
+7228472 0 5.5807 5.5807 +<binascii.unhexlify>
+7228472 0 5.5348 5.5348 +<method 'split' of 'str'
objects>
+140 0 2.7227 2.7227 +<method 'splitlines' of
'str' objects>
+7228472 0 1.1037 1.1037 +<len>
+140 0 0.0021 0.0021 +mercurial.manifest:13(__init__)
267494 0 7.2551 7.2551 <method 'update' of
'_hashlib.HASH' objects>
59 0 91.9858 6.7944 mercurial.localrepo:960(status)
+118 0 77.3566 5.4890
+mercurial.localrepo:972(mfmatches)
+464 0 4.1446 4.1446 +<method 'sort' of 'list'
objects>
+3751224 0 3.4581 2.7997 +mercurial.manifest:18(flags)
+116 0 0.2316 0.2316 +<method 'keys' of 'dict'
objects>
+1668 0 0.0005 0.0005 +<method 'append' of 'list'
objects>
7498277 0 6.0608 6.0608 <method 'split' of 'str'
objects>
168262 0 6.0603 6.0603 <method 'read' of 'file'
objects>
7494422 0 5.8853 5.8853 <binascii.unhexlify>
118 0 77.3566 5.4890
mercurial.localrepo:972(mfmatches)
+118 0 60.5234 4.6206 +mercurial.manifest:54(read)
+117 0 6.2338 3.6739 +mercurial.manifest:30(copy)
+4105211 0 3.5152 2.1797
+hgext.convert.filemap:225(<lambda>)
+117 0 0.8799 0.8799 +<method 'keys' of 'dict'
objects>
+3747198 0 0.6592 0.6592 +mercurial.util:235(always)
133657 0 65.0944 4.7408 mercurial.revlog:919(revision)
+133335 0 2.8242 2.8242 +<mercurial.mpatch.patches>
+274406 0 36.1083 2.6630 +mercurial.revlog:869(chunk)
+133335 0 2.7148 0.6428 +mercurial.revlog:527(parents)
+133335 0 8.4869 0.5481 +mercurial.revlog:51(hash)
+133013 0 5.9247 0.5323 +mercurial.util:1299(o)
211 0 60.5237 4.6208 mercurial.manifest:54(read)
+109 0 31.4669 16.5265 +mercurial.manifest:39(parse)
+109 0 24.4357 0.4517 +mercurial.revlog:919(revision)
+32 0 0.0001 0.0001 +mercurial.manifest:13(__init__)
+1 0 0.0002 0.0000
+mercurial.demandimport:71(__getattribute__)
interrupted!
As expected, much time is spent reading netbeans manifest (75000 files) in
localrepo.status(). So even a perfect matching code won't really help, there is
a deeper issue.
|