- Speed up ``traversal.model_url`` execution by using a custom url

quoting function instead of Python's ``urllib.quote``, by caching URL path segment quoting and encoding results, and by disusing Python's ``urlparse.urljoin`` in favor of a simple string concatenation.
author: Chris McDonough <chrism@agendaless.com> 2008-12-18 07:28:17 +0000
committer: Chris McDonough <chrism@agendaless.com> 2008-12-18 07:28:17 +0000
commit: 38e09dc89a36f801f3892ae99fa784344ca8cfe8 (patch)
tree: 78bddbcd10c684da7ad418037a9a1172aeb4e7da /repoze/bfg/traversal.py
parent: 5b72fa1f90b297b2ace6b722482d2e1f046d60fe (diff)
download: pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.gz
pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.bz2
pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.zip
1 files changed, 65 insertions, 8 deletions
diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py
index 89495a223..37f9e0d2e 100644
--- a/repoze/bfg/traversal.py
+++ b/repoze/bfg/traversal.py
@@ -1,5 +1,5 @@
+import re
 import urllib
-import urlparse
 
 from zope.component import queryUtility
    
@@ -120,10 +120,22 @@ def find_interface(model, interface):
         if interface.providedBy(location):
             return location
 
+_segment_cache = {}
+
 def _urlsegment(s):
-    if isinstance(s, unicode):
-        s = s.encode('utf-8')
-    return urllib.quote(s)
+    """ The bit of this code that deals with ``_segment_cache`` is an
+    optimization: we cache all the computation of URL path segments in
+    this module-scope dictionary with the original string (or unicode
+    value) as the key, so we can look it up later without needing to
+    reencode or re-url-quote it """
+    result = _segment_cache.get(s)
+    if result is None:
+        if isinstance(s, unicode):
+            result = _url_quote(s.encode('utf-8'))
+        else:
+            result = _url_quote(s)
+        _segment_cache[s] = result
+    return result
 
 def model_url(model, request, *elements):
     """ Return a string representing the absolute URL of the model
@@ -154,10 +166,10 @@ def model_url(model, request, *elements):
     prefix = '/'.join(reversed(rpath))
     suffix = '/'.join([_urlsegment(s) for s in elements])
     path = '/'.join([prefix, suffix])
-    app_url = request.application_url
-    if not app_url.endswith('/'):
-        app_url = app_url + '/'
-    return urlparse.urljoin(app_url, path)
+    if not path.startswith('/'):
+        path = '/' + path
+    app_url = request.application_url # never ends in a slash
+    return app_url + path
 
 def model_path(model, *elements):
     """ Return a string representing the absolute path of the model
@@ -185,3 +197,48 @@ def model_path(model, *elements):
         path = '/'.join([path, suffix])
     return path
 
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+               'abcdefghijklmnopqrstuvwxyz'
+               '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def _url_quote(s, safe = '/'):
+    """quote('abc def') -> 'abc%20def'
+
+    Faster version of Python stdlib urllib.quote.  See
+    http://bugs.python.org/issue1285086 for more information.
+
+    Each part of a URL, e.g. the path info, the query, etc., has a
+    different set of reserved characters that must be quoted.
+
+    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+    the following reserved characters.
+
+    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+                  "$" | ","
+
+    Each of these characters is reserved in some component of a URL,
+    but not necessarily in all of them.
+
+    By default, the quote function is intended for quoting the path
+    section of a URL.  Thus, it will not encode '/'.  This character
+    is reserved, but in typical usage the quote function is being
+    called on a path where the existing slash characters are used as
+    reserved characters.
+    """
+    cachekey = (safe, always_safe)
+    try:
+        safe_map = _safemaps[cachekey]
+        if not _must_quote[cachekey].search(s):
+            return s
+    except KeyError:
+        safe += always_safe
+        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+        safe_map = {}
+        for i in range(256):
+            c = chr(i)
+            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+        _safemaps[cachekey] = safe_map
+    res = map(safe_map.__getitem__, s)
+    return ''.join(res)
author	Chris McDonough <chrism@agendaless.com>	2008-12-18 07:28:17 +0000
committer	Chris McDonough <chrism@agendaless.com>	2008-12-18 07:28:17 +0000
commit	38e09dc89a36f801f3892ae99fa784344ca8cfe8 (patch)
tree	78bddbcd10c684da7ad418037a9a1172aeb4e7da /repoze/bfg/traversal.py
parent	5b72fa1f90b297b2ace6b722482d2e1f046d60fe (diff)
download	pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.gz pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.bz2 pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.zip