From eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5 Mon Sep 17 00:00:00 2001
From: Chris McDonough <chrism@agendaless.com>
Date: Wed, 23 Sep 2009 10:24:42 +0000
Subject: Features --------

- Speed up ``repoze.bfg.encode.urlencode`` (nee'
  ``repoze.bfg.url.urlencode``) slightly.

- Speed up ``repoze.bfg.traversal.model_path`` and
  ``repoze.bfg.traversal.model_path_tuple`` slightly.

Internal
--------

- Move ``repoze.bfg.traversal._url_quote`` into ``repoze.bfg.encode``
  as ``url_quote``.

Backwards Incompatibilities
---------------------------

- We previously had a Unicode-aware wrapper for the
  ``urllib.urlencode`` function named ``repoze.bfg.url.urlencode``
  which delegated to the stdlib function, but which marshalled all
  unicode values to utf-8 strings before calling the stdlib version.
  A newer replacement now lives in ``repoze.bfg.encode`` (old imports
  will still work).  The replacement does not delegate to the stdlib.

  The replacement diverges from the stdlib implementation and the
  previous ``repoze.bfg.url`` url implementation inasmuch as its
  ``doseq`` argument is a decoy: it always behaves in the
  ``doseq=True`` way (which is the only sane behavior) for speed
  purposes.

  The old import location (``repoze.bfg.url.urlencode``) still
  functions and has not been deprecated.
---
 CHANGES.txt                     |  30 +++++++++++
 repoze/bfg/encode.py            | 107 ++++++++++++++++++++++++++++++++++++++++
 repoze/bfg/tests/test_encode.py |  61 +++++++++++++++++++++++
 repoze/bfg/tests/test_url.py    |  28 -----------
 repoze/bfg/traversal.py         |  66 ++++++-------------------
 repoze/bfg/url.py               |  47 +-----------------
 repoze/bfg/urldispatch.py       |   6 +--
 7 files changed, 217 insertions(+), 128 deletions(-)
 create mode 100644 repoze/bfg/encode.py
 create mode 100644 repoze/bfg/tests/test_encode.py

diff --git a/CHANGES.txt b/CHANGES.txt
index 42a87940c..8e8a901f1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,36 @@ Features
 
 - Speed up ``repoze.bfg.location.lineage`` slightly.
 
+- Speed up ``repoze.bfg.encode.urlencode`` (nee'
+  ``repoze.bfg.url.urlencode``) slightly.
+
+- Speed up ``repoze.bfg.traversal.model_path`` and
+  ``repoze.bfg.traversal.model_path_tuple`` slightly.
+
+Internal
+--------
+
+- Move ``repoze.bfg.traversal._url_quote`` into ``repoze.bfg.encode``
+  as ``url_quote``.
+
+Backwards Incompatibilities
+---------------------------
+
+- We previously had a Unicode-aware wrapper for the
+  ``urllib.urlencode`` function named ``repoze.bfg.url.urlencode``
+  which delegated to the stdlib function, but which marshalled all
+  unicode values to utf-8 strings before calling the stdlib version.
+  A newer replacement now lives in ``repoze.bfg.encode`` (old imports
+  will still work).  The replacement does not delegate to the stdlib.
+
+  The replacement diverges from the stdlib implementation and the
+  previous ``repoze.bfg.url`` url implementation inasmuch as its
+  ``doseq`` argument is a decoy: it always behaves in the
+  ``doseq=True`` way (which is the only sane behavior) for speed
+  purposes.
+
+  The old import location (``repoze.bfg.url.urlencode``) still
+  functions and has not been deprecated.
 
 1.1a4 (2009-09-23)
 ==================
diff --git a/repoze/bfg/encode.py b/repoze/bfg/encode.py
new file mode 100644
index 000000000..127c405ed
--- /dev/null
+++ b/repoze/bfg/encode.py
@@ -0,0 +1,107 @@
+import re
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+               'abcdefghijklmnopqrstuvwxyz'
+               '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def url_quote(s, safe=''):
+    """quote('abc def') -> 'abc%20def'
+
+    Faster version of Python stdlib urllib.quote which also quotes
+    the '/' character.  
+
+    Each part of a URL, e.g. the path info, the query, etc., has a
+    different set of reserved characters that must be quoted.
+
+    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+    the following reserved characters.
+
+    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+                  "$" | ","
+
+    Each of these characters is reserved in some component of a URL,
+    but not necessarily in all of them.
+
+    Unlike the default version of this function in the Python stdlib,
+    by default, the quote function is intended for quoting individual
+    path segments instead of an already composed path that might have
+    '/' characters in it.  Thus, it *will* encode any '/' character it
+    finds in a string.
+    """
+    cachekey = (safe, always_safe)
+    try:
+        safe_map = _safemaps[cachekey]
+        if not _must_quote[cachekey].search(s):
+            return s
+    except KeyError:
+        safe += always_safe
+        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+        safe_map = {}
+        for i in range(256):
+            c = chr(i)
+            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+        _safemaps[cachekey] = safe_map
+    res = map(safe_map.__getitem__, s)
+    return ''.join(res)
+
+def quote_plus(s, safe=''):
+    """ Version of stdlib quote_plus which uses faster url_quote """
+    if ' ' in s:
+        s = url_quote(s, safe + ' ')
+        return s.replace(' ', '+')
+    return url_quote(s, safe)
+
+def urlencode(query, doseq=True):
+    """
+    An alternate implementation of Python's stdlib `urllib.urlencode
+    function <http://docs.python.org/library/urllib.html>`_ which
+    accepts unicode keys and values within the ``query``
+    dict/sequence; all Unicode keys and values are first converted to
+    UTF-8 before being used to compose the query string.
+
+    The value of ``query`` must be a sequence of two-tuples
+    representing key/value pairs *or* an object (often a dictionary)
+    with an ``.items()`` method that returns a sequence of two-tuples
+    representing key/value pairs.
+
+    For minimal calling convention backwards compatibility, this
+    version of urlencode accepts *but ignores* a second argument
+    conventionally named ``doseq``.  The Python stdlib version behaves
+    differently when ``doseq`` is False and when a sequence is
+    presented as one of the values.  This version always behaves in
+    the ``doseq=True`` mode, no matter what the value of the second
+    argument.
+
+    See the Python stdlib documentation for ``urllib.urlencode`` for
+    more information.
+    """
+    try:
+        # presumed to be a dictionary
+        query = query.items()
+    except AttributeError:
+        pass
+
+    result = ''
+    prefix = ''
+
+    for (k, v) in query:
+        if k.__class__ is unicode:
+            k = k.encode('utf-8')
+        k = quote_plus(str(k))
+        if hasattr(v, '__iter__'):
+            for x in v:
+                if x.__class__ is unicode:
+                    x = x.encode('utf-8')
+                x = quote_plus(str(x))
+                result += '%s%s=%s' % (prefix, k, x)
+                prefix = '&'
+        else:
+            if v.__class__ is unicode:
+                v = v.encode('utf-8')
+            v = quote_plus(str(v))
+            result += '%s%s=%s' % (prefix, k, v)
+        prefix = '&'
+
+    return result
diff --git a/repoze/bfg/tests/test_encode.py b/repoze/bfg/tests/test_encode.py
new file mode 100644
index 000000000..364247fb3
--- /dev/null
+++ b/repoze/bfg/tests/test_encode.py
@@ -0,0 +1,61 @@
+import unittest
+
+class UrlEncodeTests(unittest.TestCase):
+    def _callFUT(self, query, doseq=False):
+        from repoze.bfg.encode import urlencode
+        return urlencode(query, doseq)
+
+    def test_ascii_only(self):
+        result = self._callFUT([('a',1), ('b',2)])
+        self.assertEqual(result, 'a=1&b=2')
+
+    def test_unicode_key(self):
+        la = unicode('LaPe\xc3\xb1a', 'utf-8')
+        result = self._callFUT([(la, 1), ('b',2)])
+        self.assertEqual(result, 'LaPe%C3%B1a=1&b=2')
+
+    def test_unicode_val_single(self):
+        la = unicode('LaPe\xc3\xb1a', 'utf-8')
+        result = self._callFUT([('a', la), ('b',2)])
+        self.assertEqual(result, 'a=LaPe%C3%B1a&b=2')
+
+    def test_unicode_val_multiple(self):
+        la = [unicode('LaPe\xc3\xb1a', 'utf-8')] * 2
+        result = self._callFUT([('a', la), ('b',2)], doseq=True)
+        self.assertEqual(result, 'a=LaPe%C3%B1a&a=LaPe%C3%B1a&b=2')
+
+    def test_dict(self):
+        result = self._callFUT({'a':1})
+        self.assertEqual(result, 'a=1')
+
+class URLQuoteTests(unittest.TestCase):
+    def _callFUT(self, val, safe=''):
+        from repoze.bfg.encode import url_quote
+        return url_quote(val, safe)
+
+    def test_it_default(self):
+        la = 'La/Pe\xc3\xb1a'
+        result = self._callFUT(la)
+        self.assertEqual(result, 'La%2FPe%C3%B1a')
+        
+    def test_it_with_safe(self):
+        la = 'La/Pe\xc3\xb1a'
+        result = self._callFUT(la, '/')
+        self.assertEqual(result, 'La/Pe%C3%B1a')
+
+class TestQuotePlus(unittest.TestCase):
+    def _callFUT(self, val, safe=''):
+        from repoze.bfg.encode import quote_plus
+        return quote_plus(val, safe)
+    
+    def test_it_default(self):
+        la = 'La Pe\xc3\xb1a'
+        result = self._callFUT(la)
+        self.assertEqual(result, 'La+Pe%C3%B1a')
+        
+    def test_it_with_safe(self):
+        la = 'La /Pe\xc3\xb1a'
+        result = self._callFUT(la, '/')
+        self.assertEqual(result, 'La+/Pe%C3%B1a')
+
+        
diff --git a/repoze/bfg/tests/test_url.py b/repoze/bfg/tests/test_url.py
index 5833b8880..1199328e3 100644
--- a/repoze/bfg/tests/test_url.py
+++ b/repoze/bfg/tests/test_url.py
@@ -129,34 +129,6 @@ class ModelURLTests(unittest.TestCase):
         result = self._callFUT(root, request)
         self.assertEqual(result, 'http://example.com:5432/')
 
-class UrlEncodeTests(unittest.TestCase):
-    def _callFUT(self, query, doseq=False):
-        from repoze.bfg.url import urlencode
-        return urlencode(query, doseq)
-
-    def test_ascii_only(self):
-        result = self._callFUT([('a',1), ('b',2)])
-        self.assertEqual(result, 'a=1&b=2')
-
-    def test_unicode_key(self):
-        la = unicode('LaPe\xc3\xb1a', 'utf-8')
-        result = self._callFUT([(la, 1), ('b',2)])
-        self.assertEqual(result, 'LaPe%C3%B1a=1&b=2')
-
-    def test_unicode_val_single(self):
-        la = unicode('LaPe\xc3\xb1a', 'utf-8')
-        result = self._callFUT([('a', la), ('b',2)])
-        self.assertEqual(result, 'a=LaPe%C3%B1a&b=2')
-
-    def test_unicode_val_multiple(self):
-        la = [unicode('LaPe\xc3\xb1a', 'utf-8')] * 2
-        result = self._callFUT([('a', la), ('b',2)], doseq=True)
-        self.assertEqual(result, 'a=LaPe%C3%B1a&a=LaPe%C3%B1a&b=2')
-
-    def test_dict(self):
-        result = self._callFUT({'a':1})
-        self.assertEqual(result, 'a=1')
-
 class TestRouteUrl(unittest.TestCase):
     def setUp(self):
         cleanUp()
diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py
index 108174924..b43bbc295 100644
--- a/repoze/bfg/traversal.py
+++ b/repoze/bfg/traversal.py
@@ -1,4 +1,3 @@
-import re
 import urllib
 
 from zope.component import queryMultiAdapter
@@ -16,6 +15,7 @@ from repoze.bfg.interfaces import ITraverserFactory
 from repoze.bfg.interfaces import VH_ROOT_KEY
 
 from repoze.bfg.location import lineage
+from repoze.bfg.encode import url_quote
 
 def find_root(model):
     """ Find the root node in the graph to which ``model``
@@ -137,8 +137,13 @@ def model_path(model, *elements):
               will be prepended to the generated path rather than a
               single leading '/' character.
     """
-    path = _model_path_list(model, *elements)
-    return path and '/'.join([quote_path_segment(x) for x in path]) or '/'
+    # joining strings is a bit expensive so we delegate to a function
+    # which caches the joined result for us
+    return _join_path_tuple(model_path_tuple(model, *elements))
+
+@lru_cache(1000)
+def _join_path_tuple(tuple):
+    return tuple and '/'.join([quote_path_segment(x) for x in tuple]) or '/'
 
 def traverse(model, path):
     """Given a model object as ``model`` and a string or tuple
@@ -345,8 +350,8 @@ def model_path_tuple(model, *elements):
 
 def _model_path_list(model, *elements):
     """ Implementation detail shared by model_path and model_path_tuple """
-    lpath = reversed(list(lineage(model)))
-    path = [ location.__name__ or '' for location in lpath ]
+    path = [loc.__name__ or '' for loc in lineage(model)]
+    path.reverse()
     path.extend(elements)
     return path
 
@@ -485,9 +490,9 @@ def quote_path_segment(segment):
         return _segment_cache[segment]
     except KeyError:
         if segment.__class__ is unicode: # isinstance slighly slower (~15%)
-            result = _url_quote(segment.encode('utf-8'))
+            result = url_quote(segment.encode('utf-8'))
         else:
-            result = _url_quote(segment)
+            result = url_quote(segment)
         # we don't need a lock to mutate _segment_cache, as the below
         # will generate exactly one Python bytecode (STORE_SUBSCR)
         _segment_cache[segment] = result
@@ -643,48 +648,7 @@ class TraversalContextURL(object):
         app_url = request.application_url # never ends in a slash
         return app_url + path
 
-always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-               'abcdefghijklmnopqrstuvwxyz'
-               '0123456789' '_.-')
-_safemaps = {}
-_must_quote = {}
-
-def _url_quote(s, safe = ''):
-    """quote('abc def') -> 'abc%20def'
-
-    Faster version of Python stdlib urllib.quote which also quotes
-    the '/' character.  
-
-    Each part of a URL, e.g. the path info, the query, etc., has a
-    different set of reserved characters that must be quoted.
-
-    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
-    the following reserved characters.
-
-    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
-                  "$" | ","
-
-    Each of these characters is reserved in some component of a URL,
-    but not necessarily in all of them.
+@lru_cache(1000)
+def _join_path_tuple(tuple):
+    return tuple and '/'.join([quote_path_segment(x) for x in tuple]) or '/'
 
-    Unlike the default version of this function in the Python stdlib,
-    by default, the quote function is intended for quoting individual
-    path segments instead of an already composed path that might have
-    '/' characters in it.  Thus, it *will* encode any '/' character it
-    finds in a string.
-    """
-    cachekey = (safe, always_safe)
-    try:
-        safe_map = _safemaps[cachekey]
-        if not _must_quote[cachekey].search(s):
-            return s
-    except KeyError:
-        safe += always_safe
-        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
-        safe_map = {}
-        for i in range(256):
-            c = chr(i)
-            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
-        _safemaps[cachekey] = safe_map
-    res = map(safe_map.__getitem__, s)
-    return ''.join(res)
diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py
index 5d1f08faa..1fd9bab1e 100644
--- a/repoze/bfg/url.py
+++ b/repoze/bfg/url.py
@@ -1,7 +1,6 @@
 """ Utility functions for dealing with URLs in repoze.bfg """
 
 import os
-import urllib
 
 from zope.component import getUtility
 from zope.component import queryMultiAdapter
@@ -9,6 +8,7 @@ from zope.component import queryMultiAdapter
 from repoze.bfg.interfaces import IContextURL
 from repoze.bfg.interfaces import IRoutesMapper
 
+from repoze.bfg.encode import urlencode
 from repoze.bfg.path import caller_package
 from repoze.bfg.static import StaticRootFactory
 from repoze.bfg.traversal import TraversalContextURL
@@ -249,48 +249,3 @@ def static_url(path, request, **kw):
 
     raise ValueError('No static URL definition matching %s' % path)
 
-def urlencode(query, doseq=False):
-    """
-    A wrapper around Python's stdlib `urllib.urlencode function
-    <http://docs.python.org/library/urllib.html>`_ which accepts
-    unicode keys and values within the ``query`` dict/sequence; all
-    Unicode keys and values are first converted to UTF-8 before being
-    used to compose the query string.  The behavior of the function is
-    otherwise the same as the stdlib version.
-
-    The value of ``query`` must be a sequence of two-tuples
-    representing key/value pairs *or* an object (often a dictionary)
-    with an ``.items()`` method that returns a sequence of two-tuples
-    representing key/value pairs.  ``doseq`` controls what happens
-    when a sequence is presented as one of the values.  See the Python
-    stdlib documentation for ``urllib.urlencode`` for more
-    information.
-    """
-    if hasattr(query, 'items'):
-        # presumed to be a dictionary
-        query = query.items()
-
-    newquery = []
-    for k, v in query:
-
-        if k.__class__ is unicode:
-            k = k.encode('utf-8')
-
-        try:
-            v.__iter__
-        except AttributeError:
-            if v.__class__ is unicode:
-                v = v.encode('utf-8')
-        else:
-            L = []
-            for x in v:
-                if x.__class__ is unicode:
-                    x = x.encode('utf-8')
-                L.append(x)
-            v = L
-
-        newquery.append((k, v))
-
-    return urllib.urlencode(newquery, doseq=doseq)
-
-
diff --git a/repoze/bfg/urldispatch.py b/repoze/bfg/urldispatch.py
index c1d1f71e4..58ea192c6 100644
--- a/repoze/bfg/urldispatch.py
+++ b/repoze/bfg/urldispatch.py
@@ -1,9 +1,9 @@
 import re
 from urllib import unquote
 
-from repoze.bfg.traversal import _url_quote
-from repoze.bfg.traversal import quote_path_segment
 from repoze.bfg.traversal import traversal_path
+from repoze.bfg.traversal import quote_path_segment
+from repoze.bfg.encode import url_quote
 
 _marker = object()
 
@@ -111,7 +111,7 @@ def _compile_route(route):
                 v = '/'.join([quote_path_segment(x) for x in v])
             elif k != star:
                 try:
-                    v = _url_quote(v)
+                    v = url_quote(v)
                 except TypeError:
                     pass
             newdict[k] = v
-- 
cgit v1.2.3