diff options
| author | Chris McDonough <chrism@agendaless.com> | 2008-12-18 07:28:17 +0000 |
|---|---|---|
| committer | Chris McDonough <chrism@agendaless.com> | 2008-12-18 07:28:17 +0000 |
| commit | 38e09dc89a36f801f3892ae99fa784344ca8cfe8 (patch) | |
| tree | 78bddbcd10c684da7ad418037a9a1172aeb4e7da | |
| parent | 5b72fa1f90b297b2ace6b722482d2e1f046d60fe (diff) | |
| download | pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.gz pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.bz2 pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.zip | |
- Speed up ``traversal.model_url`` execution by using a custom url
quoting function instead of Python's ``urllib.quote``, by caching
URL path segment quoting and encoding results, and by disusing
Python's ``urlparse.urljoin`` in favor of a simple string
concatenation.
| -rw-r--r-- | CHANGES.txt | 8 | ||||
| -rw-r--r-- | repoze/bfg/tests/test_traversal.py | 32 | ||||
| -rw-r--r-- | repoze/bfg/traversal.py | 73 |
3 files changed, 76 insertions, 37 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 9edbd433c..4cca670d3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,11 @@ +Next release + + - Speed up ``traversal.model_url`` execution by using a custom url + quoting function instead of Python's ``urllib.quote``, by caching + URL path segment quoting and encoding results, and by disusing + Python's ``urlparse.urljoin`` in favor of a simple string + concatenation. + 0.5.5 (12/17/2008) Backwards Incompatibilities diff --git a/repoze/bfg/tests/test_traversal.py b/repoze/bfg/tests/test_traversal.py index 21e2b6180..70938cbf6 100644 --- a/repoze/bfg/tests/test_traversal.py +++ b/repoze/bfg/tests/test_traversal.py @@ -250,37 +250,15 @@ class ModelURLTests(unittest.TestCase): result, 'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that') - def test_root_default_app_url_endswith_slash(self): + def test_root_default_app_url(self): root = DummyContext() root.__parent__ = None root.__name__ = None request = DummyRequest() - request.application_url = 'http://example.com:5432/' result = self._callFUT(root, request) self.assertEqual(result, 'http://example.com:5432/') - def test_root_default_app_url_endswith_nonslash(self): - root = DummyContext() - root.__parent__ = None - root.__name__ = None - request = DummyRequest() - request.application_url = 'http://example.com:5432' - result = self._callFUT(root, request) - self.assertEqual(result, 'http://example.com:5432/') - - def test_nonroot_default_app_url_endswith_slash(self): - root = DummyContext() - root.__parent__ = None - root.__name__ = None - other = DummyContext() - other.__parent__ = root - other.__name__ = 'nonroot object' - request = DummyRequest() - request.application_url = 'http://example.com:5432/' - result = self._callFUT(other, request) - self.assertEqual(result, 'http://example.com:5432/nonroot%20object/') - - def test_nonroot_default_app_url_endswith_nonslash(self): + def test_nonroot_default_app_url(self): root = DummyContext() root.__parent__ = None root.__name__ = None @@ -288,7 +266,6 @@ class ModelURLTests(unittest.TestCase): other.__parent__ = root other.__name__ = 'nonroot object' request = DummyRequest() - request.application_url = 'http://example.com:5432' result = self._callFUT(other, request) self.assertEqual(result, 'http://example.com:5432/nonroot%20object/') @@ -303,7 +280,6 @@ class ModelURLTests(unittest.TestCase): two.__parent__ = one two.__name__ = 'La Pe\xc3\xb1a' request = DummyRequest() - request.application_url = 'http://example.com:5432' result = self._callFUT(two, request) self.assertEqual(result, 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/') @@ -317,7 +293,6 @@ class ModelURLTests(unittest.TestCase): one.__parent__ = root one.__name__ = uc request = DummyRequest() - request.application_url = 'http://example.com:5432' result = self._callFUT(one, request, uc) self.assertEqual(result, 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a') @@ -327,7 +302,6 @@ class ModelURLTests(unittest.TestCase): root.__parent__ = None root.__name__ = None request = DummyRequest() - request.application_url = 'http://example.com:5432' result = self._callFUT(root, request, 'a b c') self.assertEqual(result, 'http://example.com:5432/a%20b%20c') @@ -456,7 +430,7 @@ class DummyContext(object): return self.next class DummyRequest: - application_url = 'http://example.com:5432/' + application_url = 'http://example.com:5432' # app_url never ends with slash class DummySettings: def __init__(self, **kw): diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py index 89495a223..37f9e0d2e 100644 --- a/repoze/bfg/traversal.py +++ b/repoze/bfg/traversal.py @@ -1,5 +1,5 @@ +import re import urllib -import urlparse from zope.component import queryUtility @@ -120,10 +120,22 @@ def find_interface(model, interface): if interface.providedBy(location): return location +_segment_cache = {} + def _urlsegment(s): - if isinstance(s, unicode): - s = s.encode('utf-8') - return urllib.quote(s) + """ The bit of this code that deals with ``_segment_cache`` is an + optimization: we cache all the computation of URL path segments in + this module-scope dictionary with the original string (or unicode + value) as the key, so we can look it up later without needing to + reencode or re-url-quote it """ + result = _segment_cache.get(s) + if result is None: + if isinstance(s, unicode): + result = _url_quote(s.encode('utf-8')) + else: + result = _url_quote(s) + _segment_cache[s] = result + return result def model_url(model, request, *elements): """ Return a string representing the absolute URL of the model @@ -154,10 +166,10 @@ def model_url(model, request, *elements): prefix = '/'.join(reversed(rpath)) suffix = '/'.join([_urlsegment(s) for s in elements]) path = '/'.join([prefix, suffix]) - app_url = request.application_url - if not app_url.endswith('/'): - app_url = app_url + '/' - return urlparse.urljoin(app_url, path) + if not path.startswith('/'): + path = '/' + path + app_url = request.application_url # never ends in a slash + return app_url + path def model_path(model, *elements): """ Return a string representing the absolute path of the model @@ -185,3 +197,48 @@ def model_path(model, *elements): path = '/'.join([path, suffix]) return path +always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' '_.-') +_safemaps = {} +_must_quote = {} + +def _url_quote(s, safe = '/'): + """quote('abc def') -> 'abc%20def' + + Faster version of Python stdlib urllib.quote. See + http://bugs.python.org/issue1285086 for more information. + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + """ + cachekey = (safe, always_safe) + try: + safe_map = _safemaps[cachekey] + if not _must_quote[cachekey].search(s): + return s + except KeyError: + safe += always_safe + _must_quote[cachekey] = re.compile(r'[^%s]' % safe) + safe_map = {} + for i in range(256): + c = chr(i) + safe_map[c] = (c in safe) and c or ('%%%02X' % i) + _safemaps[cachekey] = safe_map + res = map(safe_map.__getitem__, s) + return ''.join(res) |
