summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris McDonough <chrism@agendaless.com>2009-09-23 10:24:42 +0000
committerChris McDonough <chrism@agendaless.com>2009-09-23 10:24:42 +0000
commiteb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5 (patch)
tree26afc97a9ddc53feca88bfac957bb376357edc6b
parent02e1fe45e4bb2d7473296d2b8c2114680d9c75c2 (diff)
downloadpyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.tar.gz
pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.tar.bz2
pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.zip
Features
-------- - Speed up ``repoze.bfg.encode.urlencode`` (nee' ``repoze.bfg.url.urlencode``) slightly. - Speed up ``repoze.bfg.traversal.model_path`` and ``repoze.bfg.traversal.model_path_tuple`` slightly. Internal -------- - Move ``repoze.bfg.traversal._url_quote`` into ``repoze.bfg.encode`` as ``url_quote``. Backwards Incompatibilities --------------------------- - We previously had a Unicode-aware wrapper for the ``urllib.urlencode`` function named ``repoze.bfg.url.urlencode`` which delegated to the stdlib function, but which marshalled all unicode values to utf-8 strings before calling the stdlib version. A newer replacement now lives in ``repoze.bfg.encode`` (old imports will still work). The replacement does not delegate to the stdlib. The replacement diverges from the stdlib implementation and the previous ``repoze.bfg.url`` url implementation inasmuch as its ``doseq`` argument is a decoy: it always behaves in the ``doseq=True`` way (which is the only sane behavior) for speed purposes. The old import location (``repoze.bfg.url.urlencode``) still functions and has not been deprecated.
-rw-r--r--CHANGES.txt30
-rw-r--r--repoze/bfg/encode.py107
-rw-r--r--repoze/bfg/tests/test_encode.py61
-rw-r--r--repoze/bfg/tests/test_url.py28
-rw-r--r--repoze/bfg/traversal.py66
-rw-r--r--repoze/bfg/url.py47
-rw-r--r--repoze/bfg/urldispatch.py6
7 files changed, 217 insertions, 128 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 42a87940c..8e8a901f1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,36 @@ Features
- Speed up ``repoze.bfg.location.lineage`` slightly.
+- Speed up ``repoze.bfg.encode.urlencode`` (nee'
+ ``repoze.bfg.url.urlencode``) slightly.
+
+- Speed up ``repoze.bfg.traversal.model_path`` and
+ ``repoze.bfg.traversal.model_path_tuple`` slightly.
+
+Internal
+--------
+
+- Move ``repoze.bfg.traversal._url_quote`` into ``repoze.bfg.encode``
+ as ``url_quote``.
+
+Backwards Incompatibilities
+---------------------------
+
+- We previously had a Unicode-aware wrapper for the
+ ``urllib.urlencode`` function named ``repoze.bfg.url.urlencode``
+ which delegated to the stdlib function, but which marshalled all
+ unicode values to utf-8 strings before calling the stdlib version.
+ A newer replacement now lives in ``repoze.bfg.encode`` (old imports
+ will still work). The replacement does not delegate to the stdlib.
+
+ The replacement diverges from the stdlib implementation and the
+ previous ``repoze.bfg.url`` url implementation inasmuch as its
+ ``doseq`` argument is a decoy: it always behaves in the
+ ``doseq=True`` way (which is the only sane behavior) for speed
+ purposes.
+
+ The old import location (``repoze.bfg.url.urlencode``) still
+ functions and has not been deprecated.
1.1a4 (2009-09-23)
==================
diff --git a/repoze/bfg/encode.py b/repoze/bfg/encode.py
new file mode 100644
index 000000000..127c405ed
--- /dev/null
+++ b/repoze/bfg/encode.py
@@ -0,0 +1,107 @@
+import re
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def url_quote(s, safe=''):
+ """quote('abc def') -> 'abc%20def'
+
+ Faster version of Python stdlib urllib.quote which also quotes
+ the '/' character.
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ Unlike the default version of this function in the Python stdlib,
+ by default, the quote function is intended for quoting individual
+ path segments instead of an already composed path that might have
+ '/' characters in it. Thus, it *will* encode any '/' character it
+ finds in a string.
+ """
+ cachekey = (safe, always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ if not _must_quote[cachekey].search(s):
+ return s
+ except KeyError:
+ safe += always_safe
+ _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+ _safemaps[cachekey] = safe_map
+ res = map(safe_map.__getitem__, s)
+ return ''.join(res)
+
+def quote_plus(s, safe=''):
+ """ Version of stdlib quote_plus which uses faster url_quote """
+ if ' ' in s:
+ s = url_quote(s, safe + ' ')
+ return s.replace(' ', '+')
+ return url_quote(s, safe)
+
+def urlencode(query, doseq=True):
+ """
+ An alternate implementation of Python's stdlib `urllib.urlencode
+ function <http://docs.python.org/library/urllib.html>`_ which
+ accepts unicode keys and values within the ``query``
+ dict/sequence; all Unicode keys and values are first converted to
+ UTF-8 before being used to compose the query string.
+
+ The value of ``query`` must be a sequence of two-tuples
+ representing key/value pairs *or* an object (often a dictionary)
+ with an ``.items()`` method that returns a sequence of two-tuples
+ representing key/value pairs.
+
+ For minimal calling convention backwards compatibility, this
+ version of urlencode accepts *but ignores* a second argument
+ conventionally named ``doseq``. The Python stdlib version behaves
+ differently when ``doseq`` is False and when a sequence is
+ presented as one of the values. This version always behaves in
+ the ``doseq=True`` mode, no matter what the value of the second
+ argument.
+
+ See the Python stdlib documentation for ``urllib.urlencode`` for
+ more information.
+ """
+ try:
+ # presumed to be a dictionary
+ query = query.items()
+ except AttributeError:
+ pass
+
+ result = ''
+ prefix = ''
+
+ for (k, v) in query:
+ if k.__class__ is unicode:
+ k = k.encode('utf-8')
+ k = quote_plus(str(k))
+ if hasattr(v, '__iter__'):
+ for x in v:
+ if x.__class__ is unicode:
+ x = x.encode('utf-8')
+ x = quote_plus(str(x))
+ result += '%s%s=%s' % (prefix, k, x)
+ prefix = '&'
+ else:
+ if v.__class__ is unicode:
+ v = v.encode('utf-8')
+ v = quote_plus(str(v))
+ result += '%s%s=%s' % (prefix, k, v)
+ prefix = '&'
+
+ return result
diff --git a/repoze/bfg/tests/test_encode.py b/repoze/bfg/tests/test_encode.py
new file mode 100644
index 000000000..364247fb3
--- /dev/null
+++ b/repoze/bfg/tests/test_encode.py
@@ -0,0 +1,61 @@
+import unittest
+
+class UrlEncodeTests(unittest.TestCase):
+ def _callFUT(self, query, doseq=False):
+ from repoze.bfg.encode import urlencode
+ return urlencode(query, doseq)
+
+ def test_ascii_only(self):
+ result = self._callFUT([('a',1), ('b',2)])
+ self.assertEqual(result, 'a=1&b=2')
+
+ def test_unicode_key(self):
+ la = unicode('LaPe\xc3\xb1a', 'utf-8')
+ result = self._callFUT([(la, 1), ('b',2)])
+ self.assertEqual(result, 'LaPe%C3%B1a=1&b=2')
+
+ def test_unicode_val_single(self):
+ la = unicode('LaPe\xc3\xb1a', 'utf-8')
+ result = self._callFUT([('a', la), ('b',2)])
+ self.assertEqual(result, 'a=LaPe%C3%B1a&b=2')
+
+ def test_unicode_val_multiple(self):
+ la = [unicode('LaPe\xc3\xb1a', 'utf-8')] * 2
+ result = self._callFUT([('a', la), ('b',2)], doseq=True)
+ self.assertEqual(result, 'a=LaPe%C3%B1a&a=LaPe%C3%B1a&b=2')
+
+ def test_dict(self):
+ result = self._callFUT({'a':1})
+ self.assertEqual(result, 'a=1')
+
+class URLQuoteTests(unittest.TestCase):
+ def _callFUT(self, val, safe=''):
+ from repoze.bfg.encode import url_quote
+ return url_quote(val, safe)
+
+ def test_it_default(self):
+ la = 'La/Pe\xc3\xb1a'
+ result = self._callFUT(la)
+ self.assertEqual(result, 'La%2FPe%C3%B1a')
+
+ def test_it_with_safe(self):
+ la = 'La/Pe\xc3\xb1a'
+ result = self._callFUT(la, '/')
+ self.assertEqual(result, 'La/Pe%C3%B1a')
+
+class TestQuotePlus(unittest.TestCase):
+ def _callFUT(self, val, safe=''):
+ from repoze.bfg.encode import quote_plus
+ return quote_plus(val, safe)
+
+ def test_it_default(self):
+ la = 'La Pe\xc3\xb1a'
+ result = self._callFUT(la)
+ self.assertEqual(result, 'La+Pe%C3%B1a')
+
+ def test_it_with_safe(self):
+ la = 'La /Pe\xc3\xb1a'
+ result = self._callFUT(la, '/')
+ self.assertEqual(result, 'La+/Pe%C3%B1a')
+
+
diff --git a/repoze/bfg/tests/test_url.py b/repoze/bfg/tests/test_url.py
index 5833b8880..1199328e3 100644
--- a/repoze/bfg/tests/test_url.py
+++ b/repoze/bfg/tests/test_url.py
@@ -129,34 +129,6 @@ class ModelURLTests(unittest.TestCase):
result = self._callFUT(root, request)
self.assertEqual(result, 'http://example.com:5432/')
-class UrlEncodeTests(unittest.TestCase):
- def _callFUT(self, query, doseq=False):
- from repoze.bfg.url import urlencode
- return urlencode(query, doseq)
-
- def test_ascii_only(self):
- result = self._callFUT([('a',1), ('b',2)])
- self.assertEqual(result, 'a=1&b=2')
-
- def test_unicode_key(self):
- la = unicode('LaPe\xc3\xb1a', 'utf-8')
- result = self._callFUT([(la, 1), ('b',2)])
- self.assertEqual(result, 'LaPe%C3%B1a=1&b=2')
-
- def test_unicode_val_single(self):
- la = unicode('LaPe\xc3\xb1a', 'utf-8')
- result = self._callFUT([('a', la), ('b',2)])
- self.assertEqual(result, 'a=LaPe%C3%B1a&b=2')
-
- def test_unicode_val_multiple(self):
- la = [unicode('LaPe\xc3\xb1a', 'utf-8')] * 2
- result = self._callFUT([('a', la), ('b',2)], doseq=True)
- self.assertEqual(result, 'a=LaPe%C3%B1a&a=LaPe%C3%B1a&b=2')
-
- def test_dict(self):
- result = self._callFUT({'a':1})
- self.assertEqual(result, 'a=1')
-
class TestRouteUrl(unittest.TestCase):
def setUp(self):
cleanUp()
diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py
index 108174924..b43bbc295 100644
--- a/repoze/bfg/traversal.py
+++ b/repoze/bfg/traversal.py
@@ -1,4 +1,3 @@
-import re
import urllib
from zope.component import queryMultiAdapter
@@ -16,6 +15,7 @@ from repoze.bfg.interfaces import ITraverserFactory
from repoze.bfg.interfaces import VH_ROOT_KEY
from repoze.bfg.location import lineage
+from repoze.bfg.encode import url_quote
def find_root(model):
""" Find the root node in the graph to which ``model``
@@ -137,8 +137,13 @@ def model_path(model, *elements):
will be prepended to the generated path rather than a
single leading '/' character.
"""
- path = _model_path_list(model, *elements)
- return path and '/'.join([quote_path_segment(x) for x in path]) or '/'
+ # joining strings is a bit expensive so we delegate to a function
+ # which caches the joined result for us
+ return _join_path_tuple(model_path_tuple(model, *elements))
+
+@lru_cache(1000)
+def _join_path_tuple(tuple):
+ return tuple and '/'.join([quote_path_segment(x) for x in tuple]) or '/'
def traverse(model, path):
"""Given a model object as ``model`` and a string or tuple
@@ -345,8 +350,8 @@ def model_path_tuple(model, *elements):
def _model_path_list(model, *elements):
""" Implementation detail shared by model_path and model_path_tuple """
- lpath = reversed(list(lineage(model)))
- path = [ location.__name__ or '' for location in lpath ]
+ path = [loc.__name__ or '' for loc in lineage(model)]
+ path.reverse()
path.extend(elements)
return path
@@ -485,9 +490,9 @@ def quote_path_segment(segment):
return _segment_cache[segment]
except KeyError:
if segment.__class__ is unicode: # isinstance slighly slower (~15%)
- result = _url_quote(segment.encode('utf-8'))
+ result = url_quote(segment.encode('utf-8'))
else:
- result = _url_quote(segment)
+ result = url_quote(segment)
# we don't need a lock to mutate _segment_cache, as the below
# will generate exactly one Python bytecode (STORE_SUBSCR)
_segment_cache[segment] = result
@@ -643,48 +648,7 @@ class TraversalContextURL(object):
app_url = request.application_url # never ends in a slash
return app_url + path
-always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- 'abcdefghijklmnopqrstuvwxyz'
- '0123456789' '_.-')
-_safemaps = {}
-_must_quote = {}
-
-def _url_quote(s, safe = ''):
- """quote('abc def') -> 'abc%20def'
-
- Faster version of Python stdlib urllib.quote which also quotes
- the '/' character.
-
- Each part of a URL, e.g. the path info, the query, etc., has a
- different set of reserved characters that must be quoted.
-
- RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
- the following reserved characters.
-
- reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
- "$" | ","
-
- Each of these characters is reserved in some component of a URL,
- but not necessarily in all of them.
+@lru_cache(1000)
+def _join_path_tuple(tuple):
+ return tuple and '/'.join([quote_path_segment(x) for x in tuple]) or '/'
- Unlike the default version of this function in the Python stdlib,
- by default, the quote function is intended for quoting individual
- path segments instead of an already composed path that might have
- '/' characters in it. Thus, it *will* encode any '/' character it
- finds in a string.
- """
- cachekey = (safe, always_safe)
- try:
- safe_map = _safemaps[cachekey]
- if not _must_quote[cachekey].search(s):
- return s
- except KeyError:
- safe += always_safe
- _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
- safe_map = {}
- for i in range(256):
- c = chr(i)
- safe_map[c] = (c in safe) and c or ('%%%02X' % i)
- _safemaps[cachekey] = safe_map
- res = map(safe_map.__getitem__, s)
- return ''.join(res)
diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py
index 5d1f08faa..1fd9bab1e 100644
--- a/repoze/bfg/url.py
+++ b/repoze/bfg/url.py
@@ -1,7 +1,6 @@
""" Utility functions for dealing with URLs in repoze.bfg """
import os
-import urllib
from zope.component import getUtility
from zope.component import queryMultiAdapter
@@ -9,6 +8,7 @@ from zope.component import queryMultiAdapter
from repoze.bfg.interfaces import IContextURL
from repoze.bfg.interfaces import IRoutesMapper
+from repoze.bfg.encode import urlencode
from repoze.bfg.path import caller_package
from repoze.bfg.static import StaticRootFactory
from repoze.bfg.traversal import TraversalContextURL
@@ -249,48 +249,3 @@ def static_url(path, request, **kw):
raise ValueError('No static URL definition matching %s' % path)
-def urlencode(query, doseq=False):
- """
- A wrapper around Python's stdlib `urllib.urlencode function
- <http://docs.python.org/library/urllib.html>`_ which accepts
- unicode keys and values within the ``query`` dict/sequence; all
- Unicode keys and values are first converted to UTF-8 before being
- used to compose the query string. The behavior of the function is
- otherwise the same as the stdlib version.
-
- The value of ``query`` must be a sequence of two-tuples
- representing key/value pairs *or* an object (often a dictionary)
- with an ``.items()`` method that returns a sequence of two-tuples
- representing key/value pairs. ``doseq`` controls what happens
- when a sequence is presented as one of the values. See the Python
- stdlib documentation for ``urllib.urlencode`` for more
- information.
- """
- if hasattr(query, 'items'):
- # presumed to be a dictionary
- query = query.items()
-
- newquery = []
- for k, v in query:
-
- if k.__class__ is unicode:
- k = k.encode('utf-8')
-
- try:
- v.__iter__
- except AttributeError:
- if v.__class__ is unicode:
- v = v.encode('utf-8')
- else:
- L = []
- for x in v:
- if x.__class__ is unicode:
- x = x.encode('utf-8')
- L.append(x)
- v = L
-
- newquery.append((k, v))
-
- return urllib.urlencode(newquery, doseq=doseq)
-
-
diff --git a/repoze/bfg/urldispatch.py b/repoze/bfg/urldispatch.py
index c1d1f71e4..58ea192c6 100644
--- a/repoze/bfg/urldispatch.py
+++ b/repoze/bfg/urldispatch.py
@@ -1,9 +1,9 @@
import re
from urllib import unquote
-from repoze.bfg.traversal import _url_quote
-from repoze.bfg.traversal import quote_path_segment
from repoze.bfg.traversal import traversal_path
+from repoze.bfg.traversal import quote_path_segment
+from repoze.bfg.encode import url_quote
_marker = object()
@@ -111,7 +111,7 @@ def _compile_route(route):
v = '/'.join([quote_path_segment(x) for x in v])
elif k != star:
try:
- v = _url_quote(v)
+ v = url_quote(v)
except TypeError:
pass
newdict[k] = v