From 7ae0c2522bbb2b026bc0370dbb1b1608f146137d Mon Sep 17 00:00:00 2001
From: Chris McDonough <chrism@agendaless.com>
Date: Mon, 5 Jan 2009 23:36:27 +0000
Subject:   New Modules

   - A new module ``repoze.bfg.url`` has been added.  It contains the
     ``model_url`` API (moved from ``repoze.bfg.traversal``) and an
     implementation of ``urlencode`` (like Python's
     ``urllib.urlencode``) which can handle Unicode keys and values in
     parameters to the ``query`` argument.

  Deprecations

   - The ``model_url`` function has been moved from
     ``repoze.bfg.traversal`` into ``repoze.bfg.url``.  It can still
     be imported from ``repoze.bfg.traversal`` but an import from
     ``repoze.bfg.traversal`` will emit a DeprecationWarning.

  Features

   - The ``repoze.bfg.url.model_url`` API (nee'
     ``repoze.bfg.traversal.model_url``) now accepts and honors a
     keyword argument named ``query``.  The value of this argument
     will be used to compose a query string, which will be attached to
     the generated URL before it is returned.  See the API docs (in
     the docs directory or `on the web
     <http://static.repoze.org/bfgdocs>`_) for more information.
---
 CHANGES.txt                        |  29 ++++++-
 docs/api/traversal.rst             |   4 +-
 docs/api/url.rst                   |  11 +++
 docs/index.rst                     |   1 +
 repoze/bfg/tests/test_traversal.py |  80 -----------------
 repoze/bfg/tests/test_url.py       | 134 ++++++++++++++++++++++++++++
 repoze/bfg/traversal.py            | 145 +++++++------------------------
 repoze/bfg/url.py                  | 174 +++++++++++++++++++++++++++++++++++++
 8 files changed, 381 insertions(+), 197 deletions(-)
 create mode 100644 docs/api/url.rst
 create mode 100644 repoze/bfg/tests/test_url.py
 create mode 100644 repoze/bfg/url.py

diff --git a/CHANGES.txt b/CHANGES.txt
index e63cb025e..12fe3d911 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,31 @@
-0.6 (12/26/2008)
+Next Release
+
+  New Modules
+
+   - A new module ``repoze.bfg.url`` has been added.  It contains the
+     ``model_url`` API (moved from ``repoze.bfg.traversal``) and an
+     implementation of ``urlencode`` (like Python's
+     ``urllib.urlencode``) which can handle Unicode keys and values in
+     parameters to the ``query`` argument.
+
+  Deprecations
+
+   - The ``model_url`` function has been moved from
+     ``repoze.bfg.traversal`` into ``repoze.bfg.url``.  It can still
+     be imported from ``repoze.bfg.traversal`` but an import from
+     ``repoze.bfg.traversal`` will emit a DeprecationWarning.
+
+  Features
+
+   - The ``repoze.bfg.url.model_url`` API (nee'
+     ``repoze.bfg.traversal.model_url``) now accepts and honors a
+     keyword argument named ``query``.  The value of this argument
+     will be used to compose a query string, which will be attached to
+     the generated URL before it is returned.  See the API docs (in
+     the docs directory or `on the web
+     <http://static.repoze.org/bfgdocs>`_) for more information.
+
+ 0.6 (12/26/2008)
 
   Backwards Incompatibilities
 
diff --git a/docs/api/traversal.rst b/docs/api/traversal.rst
index c9fab7d98..8195ddec4 100644
--- a/docs/api/traversal.rst
+++ b/docs/api/traversal.rst
@@ -11,8 +11,8 @@
 
   .. autofunction:: find_root
 
-  .. autofunction:: model_url
-
   .. autofunction:: model_path
 
+.. note:: A function named ``model_url`` used to be present in this
+   module.  It was moved to :ref:`url_module` in version 0.6.1.
 
diff --git a/docs/api/url.rst b/docs/api/url.rst
new file mode 100644
index 000000000..d984dcc07
--- /dev/null
+++ b/docs/api/url.rst
@@ -0,0 +1,11 @@
+.. _url_module:
+
+:mod:`repoze.bfg.url`
+---------------------
+
+.. automodule:: repoze.bfg.url
+
+  .. autofunction:: model_url
+
+  .. autofunction:: urlencode
+
diff --git a/docs/index.rst b/docs/index.rst
index 0c8fd295e..bd17236aa 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -76,6 +76,7 @@ Per-module :mod:`repoze.bfg` API documentation.
    api/testing
    api/traversal
    api/location
+   api/url
    api/urldispatch
    api/view
    api/wsgi
diff --git a/repoze/bfg/tests/test_traversal.py b/repoze/bfg/tests/test_traversal.py
index 70938cbf6..dcb47e6ed 100644
--- a/repoze/bfg/tests/test_traversal.py
+++ b/repoze/bfg/tests/test_traversal.py
@@ -225,86 +225,6 @@ class FindInterfaceTests(unittest.TestCase):
         result = self._callFUT(baz, IFoo)
         self.assertEqual(result.__name__, 'root')
 
-class ModelURLTests(unittest.TestCase):
-    def _callFUT(self, model, request, *elements):
-        from repoze.bfg.traversal import model_url
-        return model_url(model, request, *elements)
-
-    def test_extra_args(self):
-        baz = DummyContext()
-        bar = DummyContext(baz)
-        foo = DummyContext(bar)
-        root = DummyContext(foo)
-        root.__parent__ = None
-        root.__name__ = None
-        foo.__parent__ = root
-        foo.__name__ = 'foo '
-        bar.__parent__ = foo
-        bar.__name__ = 'bar'
-        baz.__parent__ = bar
-        baz.__name__ = 'baz'
-        request = DummyRequest()
-        result = self._callFUT(baz, request, 'this/theotherthing', 'that')
-
-        self.assertEqual(
-            result,
-            'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that')
-
-    def test_root_default_app_url(self):
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        request = DummyRequest()
-        result = self._callFUT(root, request)
-        self.assertEqual(result, 'http://example.com:5432/')
-
-    def test_nonroot_default_app_url(self):
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        other = DummyContext()
-        other.__parent__ = root
-        other.__name__ = 'nonroot object'
-        request = DummyRequest()
-        result = self._callFUT(other, request)
-        self.assertEqual(result, 'http://example.com:5432/nonroot%20object/')
-
-    def test_unicode_mixed_with_bytes_in_model_names(self):
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        one = DummyContext()
-        one.__parent__ = root
-        one.__name__ = unicode('La Pe\xc3\xb1a', 'utf-8')
-        two = DummyContext()
-        two.__parent__ = one
-        two.__name__ = 'La Pe\xc3\xb1a'
-        request = DummyRequest()
-        result = self._callFUT(two, request)
-        self.assertEqual(result,
-                     'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/')
-
-    def test_unicode_in_element_names(self):
-        uc = unicode('La Pe\xc3\xb1a', 'utf-8')
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        one = DummyContext()
-        one.__parent__ = root
-        one.__name__ = uc
-        request = DummyRequest()
-        result = self._callFUT(one, request, uc)
-        self.assertEqual(result,
-                     'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a')
-
-    def test_element_names_url_quoted(self):
-        root = DummyContext()
-        root.__parent__ = None
-        root.__name__ = None
-        request = DummyRequest()
-        result = self._callFUT(root, request, 'a b c')
-        self.assertEqual(result, 'http://example.com:5432/a%20b%20c')
-
 class FindRootTests(unittest.TestCase):
     def _callFUT(self, context):
         from repoze.bfg.traversal import find_root
diff --git a/repoze/bfg/tests/test_url.py b/repoze/bfg/tests/test_url.py
new file mode 100644
index 000000000..2a47068cd
--- /dev/null
+++ b/repoze/bfg/tests/test_url.py
@@ -0,0 +1,134 @@
+import unittest
+
+class ModelURLTests(unittest.TestCase):
+    def _callFUT(self, model, request, *elements, **kw):
+        from repoze.bfg.url import model_url
+        return model_url(model, request, *elements, **kw)
+
+    def test_extra_args(self):
+        baz = DummyContext()
+        bar = DummyContext(baz)
+        foo = DummyContext(bar)
+        root = DummyContext(foo)
+        root.__parent__ = None
+        root.__name__ = None
+        foo.__parent__ = root
+        foo.__name__ = 'foo '
+        bar.__parent__ = foo
+        bar.__name__ = 'bar'
+        baz.__parent__ = bar
+        baz.__name__ = 'baz'
+        request = DummyRequest()
+        result = self._callFUT(baz, request, 'this/theotherthing', 'that')
+
+        self.assertEqual(
+            result,
+            'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that')
+
+    def test_root_default_app_url(self):
+        root = DummyContext()
+        root.__parent__ = None
+        root.__name__ = None
+        request = DummyRequest()
+        result = self._callFUT(root, request)
+        self.assertEqual(result, 'http://example.com:5432/')
+
+    def test_nonroot_default_app_url(self):
+        root = DummyContext()
+        root.__parent__ = None
+        root.__name__ = None
+        other = DummyContext()
+        other.__parent__ = root
+        other.__name__ = 'nonroot object'
+        request = DummyRequest()
+        result = self._callFUT(other, request)
+        self.assertEqual(result, 'http://example.com:5432/nonroot%20object/')
+
+    def test_unicode_mixed_with_bytes_in_model_names(self):
+        root = DummyContext()
+        root.__parent__ = None
+        root.__name__ = None
+        one = DummyContext()
+        one.__parent__ = root
+        one.__name__ = unicode('La Pe\xc3\xb1a', 'utf-8')
+        two = DummyContext()
+        two.__parent__ = one
+        two.__name__ = 'La Pe\xc3\xb1a'
+        request = DummyRequest()
+        result = self._callFUT(two, request)
+        self.assertEqual(result,
+                     'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/')
+
+    def test_unicode_in_element_names(self):
+        uc = unicode('La Pe\xc3\xb1a', 'utf-8')
+        root = DummyContext()
+        root.__parent__ = None
+        root.__name__ = None
+        one = DummyContext()
+        one.__parent__ = root
+        one.__name__ = uc
+        request = DummyRequest()
+        result = self._callFUT(one, request, uc)
+        self.assertEqual(result,
+                     'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a')
+
+    def test_element_names_url_quoted(self):
+        root = DummyContext()
+        root.__parent__ = None
+        root.__name__ = None
+        request = DummyRequest()
+        result = self._callFUT(root, request, 'a b c')
+        self.assertEqual(result, 'http://example.com:5432/a%20b%20c')
+
+    def test_with_query(self):
+        root = DummyContext()
+        root.__parent__ = None
+        root.__name__ = None
+        request = DummyRequest()
+        result = self._callFUT(root, request, 'a', query=[('a', 1), ('b', 2)])
+        self.assertEqual(result, 'http://example.com:5432/a?a=1&b=2')
+
+
+class UrlEncodeTests(unittest.TestCase):
+    def _callFUT(self, query, doseq=False):
+        from repoze.bfg.url import urlencode
+        return urlencode(query, doseq)
+
+    def test_ascii_only(self):
+        result = self._callFUT([('a',1), ('b',2)])
+        self.assertEqual(result, 'a=1&b=2')
+
+    def test_unicode_key(self):
+        la = unicode('LaPe\xc3\xb1a', 'utf-8')
+        result = self._callFUT([(la, 1), ('b',2)])
+        self.assertEqual(result, 'LaPe%C3%B1a=1&b=2')
+
+    def test_unicode_val_single(self):
+        la = unicode('LaPe\xc3\xb1a', 'utf-8')
+        result = self._callFUT([('a', la), ('b',2)])
+        self.assertEqual(result, 'a=LaPe%C3%B1a&b=2')
+
+    def test_unicode_val_multiple(self):
+        la = [unicode('LaPe\xc3\xb1a', 'utf-8')] * 2
+        result = self._callFUT([('a', la), ('b',2)], doseq=True)
+        self.assertEqual(result, 'a=LaPe%C3%B1a&a=LaPe%C3%B1a&b=2')
+
+    def test_dict(self):
+        result = self._callFUT({'a':1})
+        self.assertEqual(result, 'a=1')
+        
+class DummyContext(object):
+    def __init__(self, next=None):
+        self.next = next
+        
+    def __getitem__(self, name):
+        if self.next is None:
+            raise KeyError, name
+        return self.next
+
+class DummyRequest:
+    application_url = 'http://example.com:5432' # app_url never ends with slash
+
+class DummySettings:
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py
index 3c6756de1..e2e31fdfa 100644
--- a/repoze/bfg/traversal.py
+++ b/repoze/bfg/traversal.py
@@ -2,17 +2,26 @@ import re
 import urllib
 
 from zope.component import queryUtility
+from zope.deferredimport import deprecated
    
 from zope.interface import classProvides
 from zope.interface import implements
 from repoze.bfg.location import LocationProxy
 from repoze.bfg.location import lineage
+from repoze.bfg.url import urlencode
+
 
 from repoze.bfg.interfaces import ILocation
 from repoze.bfg.interfaces import ITraverser
 from repoze.bfg.interfaces import ITraverserFactory
 from repoze.bfg.interfaces import ISettings
 
+deprecated(
+    "('from repoze.bfg.traversal import model_url' is now "
+    "deprecated; instead use 'from repoze.bfg.url import model_url')",
+    model_url = "repoze.bfg.url:model_url",
+    )
+
 def split_path(path):
     while path.startswith('/'):
         path = path[1:]
@@ -112,83 +121,36 @@ def find_model(model, path):
     return ob
 
 def find_interface(model, interface):
-    """ Return the first object found which provides the interface
+    """
+    Return the first object found which provides the interface
     ``interface`` in the parent chain of ``model`` or ``None`` if no
     object providing ``interface`` can be found in the parent chain.
-    The ``model`` passed in should be :term:`location`-aware."""
+    The ``model`` passed in should be :term:`location`-aware.
+    """
     for location in lineage(model):
         if interface.providedBy(location):
             return location
 
-_segment_cache = {}
-
-def _urlsegment(s):
-    """ The bit of this code that deals with ``_segment_cache`` is an
-    optimization: we cache all the computation of URL path segments in
-    this module-scope dictionary with the original string (or unicode
-    value) as the key, so we can look it up later without needing to
-    reencode or re-url-quote it """
-    result = _segment_cache.get(s)
-    if result is None:
-        if s.__class__ is unicode: # isinstance slighly slower (~15%)
-            result = _url_quote(s.encode('utf-8'))
-        else:
-            result = _url_quote(s)
-        # we don't need a lock to mutate _segment_cache, as the below
-        # will generate exactly one Python bytecode (STORE_SUBSCR)
-        _segment_cache[s] = result
-    return result
-
-def model_url(model, request, *elements):
-    """ Return a string representing the absolute URL of the model
-    object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or
-    ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``.  If any
-    model in the lineage has a unicode name, it will be converted to
-    UTF-8 before being attached to the URL.  Empty names in the model
-    graph are ignored.
-
-    Any positional arguments passed in as ``elements`` must be strings
-    or unicode objects.  These will be joined by slashes and appended
-    to the model URL.  Each of the elements passed in is URL-quoted
-    before being appended; if any element is unicode, it will
-    converted to a UTF-8 bytestring before being URL-quoted.
-
-    If ``elements`` are not used, the model URL will end with a
-    trailing slash.  If ``elements`` are used, the generated URL will
-    *not* end in trailing a slash.
-
-    The ``model`` passed in must be :term:`location`-aware.  The
-    overall result of this function is always a string (never
-    unicode). """
-    rpath = []
-    for location in lineage(model):
-        name = location.__name__
-        if name:
-            rpath.append(_urlsegment(name))
-    prefix = '/'.join(reversed(rpath))
-    suffix = '/'.join([_urlsegment(s) for s in elements])
-    path = '/'.join([prefix, suffix])
-    if not path.startswith('/'):
-        path = '/' + path
-    app_url = request.application_url # never ends in a slash
-    return app_url + path
-
 def model_path(model, *elements):
     """ Return a string representing the absolute path of the model
-    object based on its position in the model graph, e.g
-    ``/foo/bar``. Any positional arguments passed in as ``elements``
-    will be joined by slashes and appended to the generated path.  The
-    ``model`` passed in must be :term:`location`-aware.
-
-    Note that this function is not equivalent to ``model_url``:
-    individual segments in the path are not quoted in any way.  Thus,
-    to ensure that this function generates paths that can later be
-    resolved to models via ``find_model``, you should ensure that your
-    model names do not contain any slashes.
-
-    Note also that the path returned may be a Unicode string if any
-    model name in the model graph is Unicode; otherwise it will be a
-    byte-string."""
+    object based on its position in the model graph, e.g ``/foo/bar``.
+    This function is the inverse of ``find_model``: it can be used to
+    generate paths that can later be resolved via ``find_model``.  Any
+    positional arguments passed in as ``elements`` will be joined by
+    slashes and appended to the generated path.  The ``model`` passed
+    in must be :term:`location`-aware.
+
+    Note that the way this function treats path segments is not
+    equivalent to how the ``repoze.bfg.url.model_url`` API treats path
+    segments: individual segments that make up the path are not quoted
+    in any way.  Thus, to ensure that this function generates paths
+    that can later be resolved to models via ``find_model``, you
+    should ensure that your model __name__ attributes do not contain
+    any forward slashes.
+
+    The path returned may be a unicode object if any model name in the
+    model graph is a unicode object; otherwise it will be a string.
+    """
     rpath = []
     for location in lineage(model):
         if location.__name__:
@@ -199,48 +161,3 @@ def model_path(model, *elements):
         path = '/'.join([path, suffix])
     return path
 
-always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-               'abcdefghijklmnopqrstuvwxyz'
-               '0123456789' '_.-')
-_safemaps = {}
-_must_quote = {}
-
-def _url_quote(s, safe = '/'):
-    """quote('abc def') -> 'abc%20def'
-
-    Faster version of Python stdlib urllib.quote.  See
-    http://bugs.python.org/issue1285086 for more information.
-
-    Each part of a URL, e.g. the path info, the query, etc., has a
-    different set of reserved characters that must be quoted.
-
-    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
-    the following reserved characters.
-
-    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
-                  "$" | ","
-
-    Each of these characters is reserved in some component of a URL,
-    but not necessarily in all of them.
-
-    By default, the quote function is intended for quoting the path
-    section of a URL.  Thus, it will not encode '/'.  This character
-    is reserved, but in typical usage the quote function is being
-    called on a path where the existing slash characters are used as
-    reserved characters.
-    """
-    cachekey = (safe, always_safe)
-    try:
-        safe_map = _safemaps[cachekey]
-        if not _must_quote[cachekey].search(s):
-            return s
-    except KeyError:
-        safe += always_safe
-        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
-        safe_map = {}
-        for i in range(256):
-            c = chr(i)
-            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
-        _safemaps[cachekey] = safe_map
-    res = map(safe_map.__getitem__, s)
-    return ''.join(res)
diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py
new file mode 100644
index 000000000..4c1545de5
--- /dev/null
+++ b/repoze/bfg/url.py
@@ -0,0 +1,174 @@
+""" Utility functions for dealing with URLs in repoze.bfg """
+
+import re
+import urllib
+
+from repoze.bfg.location import lineage
+
+def model_url(model, request, *elements, **kw):
+    """
+    Generate a string representing the absolute URL of the model
+    object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or
+    ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``.  If a
+    ``query`` keyword argument is provided, a query string based on
+    its value will be composed and appended to the generated URL
+    string (see details below).  The overall result of this function
+    is always a string (never unicode).  The ``model`` passed in must
+    be :term:`location`-aware.
+
+    .. note:: If any model in the lineage has a unicode name, it will
+              be converted to UTF-8 before being attached to the URL.
+              When composing the path based on the model lineage,
+              empty names in the model graph are ignored.
+
+    Any positional arguments passed in as ``elements`` must be strings
+    or unicode objects.  These will be joined by slashes and appended
+    to the generated model URL.  Each of the elements passed in is
+    URL-quoted before being appended; if any element is unicode, it
+    will converted to a UTF-8 bytestring before being URL-quoted.
+
+    .. warning:: if no ``elements`` arguments are specified, the model
+                 URL will end with a trailing slash.  If any
+                 ``elements`` are used, the generated URL will *not*
+                 end in trailing a slash.
+
+    If a keyword argument ``query`` is present, it will used to
+    compose a query string that will be tacked on to the end of the
+    URL.  The value of ``query`` must be a sequence of two-tuples *or*
+    a data structure with an ``.items()`` method that returns a
+    sequence of two-tuples (presumably a dictionary).  This data
+    structure will be turned into a query string per the documentation
+    of ``repoze.url.urlencode`` function.  After the query data is
+    turned into a query string, a leading ``?`` is prepended, and the
+    the resulting string is appended to the generated URL.
+
+    .. note:: Python data structures that are passed as ``query``
+              which are sequences or dictionaries are turned into a
+              string under the same rules as when run through
+              urllib.urlencode with the ``doseq`` argument equal to
+              ``True``.  This means that sequences can be passed as
+              values, and a k=v pair will be placed into the query
+              string for each value.
+    """
+
+    qs = ''
+    if 'query' in kw:
+        qs = '?' + urlencode(kw['query'], doseq=True)
+        
+    rpath = []
+    for location in lineage(model):
+        name = location.__name__
+        if name:
+            rpath.append(_urlsegment(name))
+    prefix = '/'.join(reversed(rpath))
+    suffix = '/'.join([_urlsegment(s) for s in elements])
+    path = '/'.join([prefix, suffix])
+    if not path.startswith('/'):
+        path = '/' + path
+    app_url = request.application_url # never ends in a slash
+    return app_url + path + qs
+
+def urlencode(query, doseq=False):
+    """
+    A wrapper around Python's stdlib `urllib.urlencode function
+    <http://docs.python.org/library/urllib.html>`_ which
+    accepts unicode keys and values within the ``query``
+    dict/sequence; all keys and values are first converted to UTF-8
+    before being used to compose the query string.  The behavior of
+    the function is otherwise the same as the stdlib version.
+
+    The value of ``query`` must be a sequence of two-tuples
+    representing key/value pairs *or* an object (often a dictionary)
+    with an ``.items()`` method that returns a sequence of two-tuples
+    representing key/value pairs.  ``doseq`` controls what happens
+    when a sequence is presented as one of the values.  See the Python
+    stdlib documentation for more information.
+    """
+    if hasattr(query, 'items'):
+        # dictionary
+        query = query.items()
+    # presumed to be a sequence of two-tuples
+    newquery = []
+    for k, v in query:
+        if k.__class__ is unicode:
+            k = k.encode('utf-8')
+
+        if isinstance(v, (tuple, list)):
+            L = []
+            for x in v:
+                if x.__class__ is unicode:
+                    x = x.encode('utf-8')
+                L.append(x)
+            v = L
+        elif v.__class__ is unicode:
+            v = v.encode('utf-8')
+        newquery.append((k, v))
+
+    return urllib.urlencode(newquery, doseq=doseq)
+
+_segment_cache = {}
+
+def _urlsegment(s):
+    """ The bit of this code that deals with ``_segment_cache`` is an
+    optimization: we cache all the computation of URL path segments in
+    this module-scope dictionary with the original string (or unicode
+    value) as the key, so we can look it up later without needing to
+    reencode or re-url-quote it """
+    result = _segment_cache.get(s)
+    if result is None:
+        if s.__class__ is unicode: # isinstance slighly slower (~15%)
+            result = _url_quote(s.encode('utf-8'))
+        else:
+            result = _url_quote(s)
+        # we don't need a lock to mutate _segment_cache, as the below
+        # will generate exactly one Python bytecode (STORE_SUBSCR)
+        _segment_cache[s] = result
+    return result
+
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+               'abcdefghijklmnopqrstuvwxyz'
+               '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def _url_quote(s, safe = '/'):
+    """quote('abc def') -> 'abc%20def'
+
+    Faster version of Python stdlib urllib.quote.  See
+    http://bugs.python.org/issue1285086 for more information.
+
+    Each part of a URL, e.g. the path info, the query, etc., has a
+    different set of reserved characters that must be quoted.
+
+    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+    the following reserved characters.
+
+    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+                  "$" | ","
+
+    Each of these characters is reserved in some component of a URL,
+    but not necessarily in all of them.
+
+    By default, the quote function is intended for quoting the path
+    section of a URL.  Thus, it will not encode '/'.  This character
+    is reserved, but in typical usage the quote function is being
+    called on a path where the existing slash characters are used as
+    reserved characters.
+    """
+    cachekey = (safe, always_safe)
+    try:
+        safe_map = _safemaps[cachekey]
+        if not _must_quote[cachekey].search(s):
+            return s
+    except KeyError:
+        safe += always_safe
+        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+        safe_map = {}
+        for i in range(256):
+            c = chr(i)
+            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+        _safemaps[cachekey] = safe_map
+    res = map(safe_map.__getitem__, s)
+    return ''.join(res)
+
-- 
cgit v1.2.3