[PATCH] hgweb: return data in increasing chunk sizes

Brendan Cully brendan at kublai.com
Wed Nov 19 20:37:37 CST 2008


# HG changeset patch
# User Brendan Cully <brendan at kublai.com>
# Date 1227148600 28800
# Node ID 8c4bdff155f5a9c19c9e58f2900a2816758a7312
# Parent  92c952c4470c41647a30184b21466554021d580e
hgweb: return data in increasing chunk sizes

Currently hgweb is not streaming its output -- it accumulates the
entire response before sending it. This patch restores streaming
behaviour, buffering the output sensibly (currently, it produces 1K,
then 2K, then 4K up to a maximum buffer size of 64K). Local testing of
a fetch of a 100,000 line file with wget produces a slight slowdown
overall (up from 6.5 seconds to 7.2 seconds), but instead of waiting 6
seconds for headers to arrive, output begins immediately.

diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -182,20 +182,20 @@
                 content = getattr(webcommands, cmd)(self, req, tmpl)
                 req.respond(HTTP_OK, ctype)
 
-            return ''.join(content),
+            return content
 
         except revlog.LookupError, err:
             req.respond(HTTP_NOT_FOUND, ctype)
             msg = str(err)
             if 'manifest' not in msg:
                 msg = 'revision not found: %s' % err.name
-            return ''.join(tmpl('error', error=msg)),
+            return tmpl('error', error=msg)
         except (RepoError, revlog.RevlogError), inst:
             req.respond(HTTP_SERVER_ERROR, ctype)
-            return ''.join(tmpl('error', error=str(inst))),
+            return tmpl('error', error=str(inst))
         except ErrorResponse, inst:
             req.respond(inst.code, ctype)
-            return ''.join(tmpl('error', error=inst.message)),
+            return tmpl('error', error=inst.message)
 
     def templater(self, req):
 
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -116,7 +116,7 @@
                 # top-level index
                 elif not virtual:
                     req.respond(HTTP_OK, ctype)
-                    return ''.join(self.makeindex(req, tmpl)),
+                    return self.makeindex(req, tmpl)
 
                 # nested indexes and hgwebs
 
@@ -138,7 +138,7 @@
                     subdir = virtual + '/'
                     if [r for r in repos if r.startswith(subdir)]:
                         req.respond(HTTP_OK, ctype)
-                        return ''.join(self.makeindex(req, tmpl, subdir)),
+                        return self.makeindex(req, tmpl, subdir)
 
                     up = virtual.rfind('/')
                     if up < 0:
@@ -147,11 +147,11 @@
 
                 # prefixes not found
                 req.respond(HTTP_NOT_FOUND, ctype)
-                return ''.join(tmpl("notfound", repo=virtual)),
+                return tmpl("notfound", repo=virtual)
 
             except ErrorResponse, err:
                 req.respond(err.code, ctype)
-                return ''.join(tmpl('error', error=err.message or '')),
+                return tmpl('error', error=err.message or '')
         finally:
             tmpl = None
 
diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py
--- a/mercurial/hgweb/server.py
+++ b/mercurial/hgweb/server.py
@@ -122,7 +122,7 @@
         self.saved_headers = []
         self.sent_headers = False
         self.length = None
-        for chunk in self.server.application(env, self._start_response):
+        for chunk in util.increasingchunks(self.server.application(env, self._start_response)):
             self._write(chunk)
 
     def send_headers(self):
diff --git a/mercurial/hgweb/wsgicgi.py b/mercurial/hgweb/wsgicgi.py
--- a/mercurial/hgweb/wsgicgi.py
+++ b/mercurial/hgweb/wsgicgi.py
@@ -63,5 +63,5 @@
         return write
 
     content = application(environ, start_response)
-    for chunk in content:
+    for chunk in util.increasingchunks(content):
         write(chunk)
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -290,6 +290,37 @@
     l.sort()
     return l
 
+def increasingchunks(source, min=1024, max=65536):
+    '''return no less than min bytes per chunk while data remains,
+    doubling min after each chunk until it reaches max'''
+    def log2(x):
+        if not x:
+            return 0
+        i = 0
+        while x:
+            x >>= 1
+            i += 1
+        return i - 1
+
+    buf = []
+    blen = 0
+    for chunk in source:
+        buf.append(chunk)
+        blen += len(chunk)
+        if blen >= min:
+            if min < max:
+                min = min << 1
+                nmin = 1 << log2(blen)
+                if nmin > min:
+                    min = nmin
+                if min > max:
+                    min = max
+            yield ''.join(buf)
+            blen = 0
+            buf = []
+    if buf:
+        yield ''.join(buf)
+
 class Abort(Exception):
     """Raised if a command needs to print an error and exit."""
 


More information about the Mercurial mailing list