[PATCH 1/1] churn: add --paths option

Marc Bevand m.bevand at gmail.com
Fri Jan 9 17:19:24 CST 2009


I enhanced the churn extension to add a new option:

  -p --paths       group by file paths or path prefixes

Demonstration:

  $ hg init .
  $ mkdir foo bar
  $ date >foo/1
  $ date >foo/2
  $ date >bar/1
  $ hg ci -Am xxx
  $ date >foo/2
  $ hg ci -m xxx
  $ hg churn -p 
  foo/2      3 ************************************************
  foo/1      1 ****************
  bar/1      1 ****************

As shown above, the default behavior of -p is to group the changes by file
paths. This is kind of related to the functionality provided by diffstat(1),
except that churn takes into account each intermediate revision. For example if
a file at rev A, is changed in B, and reverted in C, a diffstat between A and C
will not show any changes, while hg churn -r A:C will show the changes.

I also added a churn config option:

  [churn]
  prefixes = foo bar

It will cause -p to group the changesets according to the specified path
prefixes:

  $ hg churn -p
  foo      4 **************************************************
  bar      1 ***********

Of course -p can also be combined with -c to count by number of changesets.
The patch applies cleanly to the current tip (dafcc96c1285). I'd like to
have it applied. Comments ?

-marc

Signed-off-by: Marc Bevand <m.bevand <at> gmail.com>

diff -r 2a67430f92f1 -r b885f782fe04 hgext/churn.py
--- a/hgext/churn.py	Tue Dec 02 13:05:40 2008 -0600
+++ b/hgext/churn.py	Thu Jan 08 17:06:06 2009 -0800
@@ -11,6 +11,7 @@
 from mercurial import patch, cmdutil, util, templater
 import os, sys
 import time, datetime
+import re
 
 def get_tty_width():
     if 'COLUMNS' in os.environ:
@@ -42,29 +43,53 @@
     t.use_template(tmpl)
     return t
 
-def changedlines(ui, repo, ctx1, ctx2):
-    lines = 0
+def changedlines(ui, repo, ctx1, ctx2, per_file):
     diff = ''.join(patch.diff(repo, ctx1.node(), ctx2.node()))
-    for l in diff.split('\n'):
-        if (l.startswith("+") and not l.startswith("+++ ") or
-            l.startswith("-") and not l.startswith("--- ")):
-            lines += 1
-    return lines
+    if per_file:
+        # report per-file statistics
+        filename = None
+        res = {}
+        p = re.compile('^diff -r [^ ]+ -r [^ ]+ (.+)')
+        for l in diff.split('\n'):
+            if l.startswith("diff "):
+                if filename is not None:
+                    res[filename] = lines
+                m = p.match(l)
+                filename = m.group(1)
+                lines = 0
+            elif (l.startswith("+") and not l.startswith("+++ ") or
+                l.startswith("-") and not l.startswith("--- ")):
+                lines += 1
+        if filename is not None:
+            res[filename] = lines
+        return res
+    else:
+        # report overall number of changed lines
+        lines = 0
+        for l in diff.split('\n'):
+            if (l.startswith("+") and not l.startswith("+++ ") or
+                l.startswith("-") and not l.startswith("--- ")):
+                lines += 1
+        return lines
 
 def countrate(ui, repo, amap, *pats, **opts):
     """Calculate stats"""
-    if opts.get('dateformat'):
-        def getkey(ctx):
+    if opts.get('paths'):
+        def getkeys(ctx):
+            l = repo.changelog.read(ctx.node())
+            return l[3] # list of files modified in this changeset
+    elif opts.get('dateformat'):
+        def getkeys(ctx):
             t, tz = ctx.date()
             date = datetime.datetime(*time.gmtime(float(t) - tz)[:6])
-            return date.strftime(opts['dateformat'])
+            return [date.strftime(opts['dateformat'])]
     else:
         tmpl = opts.get('template', '{author|email}')
         tmpl = maketemplater(ui, repo, tmpl)
-        def getkey(ctx):
+        def getkeys(ctx):
             ui.pushbuffer()
             tmpl.show(ctx)
-            return ui.popbuffer()
+            return [ui.popbuffer()]
 
     count = pct = 0
     rate = {}
@@ -81,10 +106,12 @@
             continue
 
         ctx = repo[rev]
-        key = getkey(ctx)
-        key = amap.get(key, key) # alias remap
+        keys = getkeys(ctx)
+        if len(keys) == 1:
+            keys[0] = amap.get(keys[0], keys[0]) # alias remap
         if opts.get('changesets'):
-            rate[key] = rate.get(key, 0) + 1
+            for key in keys:
+                rate[key] = rate.get(key, 0) + 1
         else:
             parents = ctx.parents()
             if len(parents) > 1:
@@ -92,8 +119,14 @@
                 continue
 
             ctx1 = parents[0]
-            lines = changedlines(ui, repo, ctx1, ctx)
-            rate[key] = rate.get(key, 0) + lines
+            if opts.get('paths'):
+                files = changedlines(ui, repo, ctx1, ctx, opts.get('paths'))
+                for f in files:
+                   rate[f] = rate.get(f, 0) + files[f]
+            else:
+                lines = changedlines(ui, repo, ctx1, ctx, False)
+                for key in keys:
+                    rate[key] = rate.get(key, 0) + lines
 
         if opts.get('progress'):
             count += 1
@@ -103,6 +136,19 @@
                 ui.write(_("\rGenerating stats: %d%%") % pct)
                 sys.stdout.flush()
 
+    # if path prefixes have been defined, group the files by path prefixes
+    prefixes = ui.configlist('churn', 'prefixes')
+    if opts.get('paths') and len(prefixes):
+        # sort from the longest to shortest
+        prefixes.sort(lambda a, b: len(b) - len(a))
+        newrate = {}
+        for k in rate:
+            for p in prefixes:
+                if k.startswith(p):
+                    newrate[p] = newrate.get(p, 0) + rate[k]
+                    break
+        rate = newrate
+
     if opts.get('progress'):
         ui.write("\r")
         sys.stdout.flush()
@@ -174,6 +220,7 @@
           ('t', 'template', '{author|email}', _('template to group changesets')),
           ('f', 'dateformat', '',
               _('strftime-compatible format for grouping by date')),
+          ('p', 'paths', False, _('group by file paths or path prefixes')),
           ('c', 'changesets', False, _('count rate by number of changesets')),
           ('s', 'sort', False, _('sort by key (default: sort by count)')),
           ('', 'aliases', '', _('file with email aliases')),


More information about the Mercurial mailing list