- Speed up ``traversal.model_url`` execution by using a custom url

quoting function instead of Python's ``urllib.quote``, by caching URL path segment quoting and encoding results, and by disusing Python's ``urlparse.urljoin`` in favor of a simple string concatenation.
author: Chris McDonough <chrism@agendaless.com> 2008-12-18 07:28:17 +0000
committer: Chris McDonough <chrism@agendaless.com> 2008-12-18 07:28:17 +0000
commit: 38e09dc89a36f801f3892ae99fa784344ca8cfe8 (patch)
tree: 78bddbcd10c684da7ad418037a9a1172aeb4e7da
parent: 5b72fa1f90b297b2ace6b722482d2e1f046d60fe (diff)
download: pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.gz
pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.bz2
pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.zip
3 files changed, 76 insertions, 37 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 9edbd433c..4cca670d3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,11 @@
+Next release
+
+  - Speed up ``traversal.model_url`` execution by using a custom url
+    quoting function instead of Python's ``urllib.quote``, by caching
+    URL path segment quoting and encoding results, and by disusing
+    Python's ``urlparse.urljoin`` in favor of a simple string
+    concatenation.
+
 0.5.5 (12/17/2008)
 
   Backwards Incompatibilities
diff --git a/repoze/bfg/tests/test_traversal.py b/repoze/bfg/tests/test_traversal.py
index 21e2b6180..70938cbf6 100644
--- a/repoze/bfg/tests/test_traversal.py
+++ b/repoze/bfg/tests/test_traversal.py
@@ -250,37 +250,15 @@ class ModelURLTests(unittest.TestCase):
             result,
             'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that')
 
-    def test_root_default_app_url_endswith_slash(self):
+    def test_root_default_app_url(self):
         root = DummyContext()
         root.__parent__ = None
         root.__name__ = None
         request = DummyRequest()
-        request.application_url = 'http://example.com:5432/'
         result = self._callFUT(root, request)
         self.assertEqual(result, 'http://example.com:5432/')
 
-    def test_root_default_app_url_endswith_nonslash(self):
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        request = DummyRequest()
-        request.application_url = 'http://example.com:5432'
-        result = self._callFUT(root, request)
-        self.assertEqual(result, 'http://example.com:5432/')
-
-    def test_nonroot_default_app_url_endswith_slash(self):
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        other = DummyContext()
-        other.__parent__ = root
-        other.__name__ = 'nonroot object'
-        request = DummyRequest()
-        request.application_url = 'http://example.com:5432/'
-        result = self._callFUT(other, request)
-        self.assertEqual(result, 'http://example.com:5432/nonroot%20object/')
-
-    def test_nonroot_default_app_url_endswith_nonslash(self):
+    def test_nonroot_default_app_url(self):
         root = DummyContext()
         root.__parent__ = None
         root.__name__ = None
@@ -288,7 +266,6 @@ class ModelURLTests(unittest.TestCase):
         other.__parent__ = root
         other.__name__ = 'nonroot object'
         request = DummyRequest()
-        request.application_url = 'http://example.com:5432'
         result = self._callFUT(other, request)
         self.assertEqual(result, 'http://example.com:5432/nonroot%20object/')
 
@@ -303,7 +280,6 @@ class ModelURLTests(unittest.TestCase):
         two.__parent__ = one
         two.__name__ = 'La Pe\xc3\xb1a'
         request = DummyRequest()
-        request.application_url = 'http://example.com:5432'
         result = self._callFUT(two, request)
         self.assertEqual(result,
                      'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/')
@@ -317,7 +293,6 @@ class ModelURLTests(unittest.TestCase):
         one.__parent__ = root
         one.__name__ = uc
         request = DummyRequest()
-        request.application_url = 'http://example.com:5432'
         result = self._callFUT(one, request, uc)
         self.assertEqual(result,
                      'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a')
@@ -327,7 +302,6 @@ class ModelURLTests(unittest.TestCase):
         root.__parent__ = None
         root.__name__ = None
         request = DummyRequest()
-        request.application_url = 'http://example.com:5432'
         result = self._callFUT(root, request, 'a b c')
         self.assertEqual(result, 'http://example.com:5432/a%20b%20c')
 
@@ -456,7 +430,7 @@ class DummyContext(object):
         return self.next
 
 class DummyRequest:
-    application_url = 'http://example.com:5432/'
+    application_url = 'http://example.com:5432' # app_url never ends with slash
 
 class DummySettings:
     def __init__(self, **kw):
diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py
index 89495a223..37f9e0d2e 100644
--- a/repoze/bfg/traversal.py
+++ b/repoze/bfg/traversal.py
@@ -1,5 +1,5 @@
+import re
 import urllib
-import urlparse
 
 from zope.component import queryUtility
    
@@ -120,10 +120,22 @@ def find_interface(model, interface):
         if interface.providedBy(location):
             return location
 
+_segment_cache = {}
+
 def _urlsegment(s):
-    if isinstance(s, unicode):
-        s = s.encode('utf-8')
-    return urllib.quote(s)
+    """ The bit of this code that deals with ``_segment_cache`` is an
+    optimization: we cache all the computation of URL path segments in
+    this module-scope dictionary with the original string (or unicode
+    value) as the key, so we can look it up later without needing to
+    reencode or re-url-quote it """
+    result = _segment_cache.get(s)
+    if result is None:
+        if isinstance(s, unicode):
+            result = _url_quote(s.encode('utf-8'))
+        else:
+            result = _url_quote(s)
+        _segment_cache[s] = result
+    return result
 
 def model_url(model, request, *elements):
     """ Return a string representing the absolute URL of the model
@@ -154,10 +166,10 @@ def model_url(model, request, *elements):
     prefix = '/'.join(reversed(rpath))
     suffix = '/'.join([_urlsegment(s) for s in elements])
     path = '/'.join([prefix, suffix])
-    app_url = request.application_url
-    if not app_url.endswith('/'):
-        app_url = app_url + '/'
-    return urlparse.urljoin(app_url, path)
+    if not path.startswith('/'):
+        path = '/' + path
+    app_url = request.application_url # never ends in a slash
+    return app_url + path
 
 def model_path(model, *elements):
     """ Return a string representing the absolute path of the model
@@ -185,3 +197,48 @@ def model_path(model, *elements):
         path = '/'.join([path, suffix])
     return path
 
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+               'abcdefghijklmnopqrstuvwxyz'
+               '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def _url_quote(s, safe = '/'):
+    """quote('abc def') -> 'abc%20def'
+
+    Faster version of Python stdlib urllib.quote.  See
+    http://bugs.python.org/issue1285086 for more information.
+
+    Each part of a URL, e.g. the path info, the query, etc., has a
+    different set of reserved characters that must be quoted.
+
+    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+    the following reserved characters.
+
+    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+                  "$" | ","
+
+    Each of these characters is reserved in some component of a URL,
+    but not necessarily in all of them.
+
+    By default, the quote function is intended for quoting the path
+    section of a URL.  Thus, it will not encode '/'.  This character
+    is reserved, but in typical usage the quote function is being
+    called on a path where the existing slash characters are used as
+    reserved characters.
+    """
+    cachekey = (safe, always_safe)
+    try:
+        safe_map = _safemaps[cachekey]
+        if not _must_quote[cachekey].search(s):
+            return s
+    except KeyError:
+        safe += always_safe
+        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+        safe_map = {}
+        for i in range(256):
+            c = chr(i)
+            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+        _safemaps[cachekey] = safe_map
+    res = map(safe_map.__getitem__, s)
+    return ''.join(res)
author	Chris McDonough <chrism@agendaless.com>	2008-12-18 07:28:17 +0000
committer	Chris McDonough <chrism@agendaless.com>	2008-12-18 07:28:17 +0000
commit	38e09dc89a36f801f3892ae99fa784344ca8cfe8 (patch)
tree	78bddbcd10c684da7ad418037a9a1172aeb4e7da
parent	5b72fa1f90b297b2ace6b722482d2e1f046d60fe (diff)
download	pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.gz pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.bz2 pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.zip