summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris McDonough <chrism@agendaless.com>2008-12-18 07:28:17 +0000
committerChris McDonough <chrism@agendaless.com>2008-12-18 07:28:17 +0000
commit38e09dc89a36f801f3892ae99fa784344ca8cfe8 (patch)
tree78bddbcd10c684da7ad418037a9a1172aeb4e7da
parent5b72fa1f90b297b2ace6b722482d2e1f046d60fe (diff)
downloadpyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.gz
pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.tar.bz2
pyramid-38e09dc89a36f801f3892ae99fa784344ca8cfe8.zip
- Speed up ``traversal.model_url`` execution by using a custom url
quoting function instead of Python's ``urllib.quote``, by caching URL path segment quoting and encoding results, and by disusing Python's ``urlparse.urljoin`` in favor of a simple string concatenation.
-rw-r--r--CHANGES.txt8
-rw-r--r--repoze/bfg/tests/test_traversal.py32
-rw-r--r--repoze/bfg/traversal.py73
3 files changed, 76 insertions, 37 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 9edbd433c..4cca670d3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,11 @@
+Next release
+
+ - Speed up ``traversal.model_url`` execution by using a custom url
+ quoting function instead of Python's ``urllib.quote``, by caching
+ URL path segment quoting and encoding results, and by disusing
+ Python's ``urlparse.urljoin`` in favor of a simple string
+ concatenation.
+
0.5.5 (12/17/2008)
Backwards Incompatibilities
diff --git a/repoze/bfg/tests/test_traversal.py b/repoze/bfg/tests/test_traversal.py
index 21e2b6180..70938cbf6 100644
--- a/repoze/bfg/tests/test_traversal.py
+++ b/repoze/bfg/tests/test_traversal.py
@@ -250,37 +250,15 @@ class ModelURLTests(unittest.TestCase):
result,
'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that')
- def test_root_default_app_url_endswith_slash(self):
+ def test_root_default_app_url(self):
root = DummyContext()
root.__parent__ = None
root.__name__ = None
request = DummyRequest()
- request.application_url = 'http://example.com:5432/'
result = self._callFUT(root, request)
self.assertEqual(result, 'http://example.com:5432/')
- def test_root_default_app_url_endswith_nonslash(self):
- root = DummyContext()
- root.__parent__ = None
- root.__name__ = None
- request = DummyRequest()
- request.application_url = 'http://example.com:5432'
- result = self._callFUT(root, request)
- self.assertEqual(result, 'http://example.com:5432/')
-
- def test_nonroot_default_app_url_endswith_slash(self):
- root = DummyContext()
- root.__parent__ = None
- root.__name__ = None
- other = DummyContext()
- other.__parent__ = root
- other.__name__ = 'nonroot object'
- request = DummyRequest()
- request.application_url = 'http://example.com:5432/'
- result = self._callFUT(other, request)
- self.assertEqual(result, 'http://example.com:5432/nonroot%20object/')
-
- def test_nonroot_default_app_url_endswith_nonslash(self):
+ def test_nonroot_default_app_url(self):
root = DummyContext()
root.__parent__ = None
root.__name__ = None
@@ -288,7 +266,6 @@ class ModelURLTests(unittest.TestCase):
other.__parent__ = root
other.__name__ = 'nonroot object'
request = DummyRequest()
- request.application_url = 'http://example.com:5432'
result = self._callFUT(other, request)
self.assertEqual(result, 'http://example.com:5432/nonroot%20object/')
@@ -303,7 +280,6 @@ class ModelURLTests(unittest.TestCase):
two.__parent__ = one
two.__name__ = 'La Pe\xc3\xb1a'
request = DummyRequest()
- request.application_url = 'http://example.com:5432'
result = self._callFUT(two, request)
self.assertEqual(result,
'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/')
@@ -317,7 +293,6 @@ class ModelURLTests(unittest.TestCase):
one.__parent__ = root
one.__name__ = uc
request = DummyRequest()
- request.application_url = 'http://example.com:5432'
result = self._callFUT(one, request, uc)
self.assertEqual(result,
'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a')
@@ -327,7 +302,6 @@ class ModelURLTests(unittest.TestCase):
root.__parent__ = None
root.__name__ = None
request = DummyRequest()
- request.application_url = 'http://example.com:5432'
result = self._callFUT(root, request, 'a b c')
self.assertEqual(result, 'http://example.com:5432/a%20b%20c')
@@ -456,7 +430,7 @@ class DummyContext(object):
return self.next
class DummyRequest:
- application_url = 'http://example.com:5432/'
+ application_url = 'http://example.com:5432' # app_url never ends with slash
class DummySettings:
def __init__(self, **kw):
diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py
index 89495a223..37f9e0d2e 100644
--- a/repoze/bfg/traversal.py
+++ b/repoze/bfg/traversal.py
@@ -1,5 +1,5 @@
+import re
import urllib
-import urlparse
from zope.component import queryUtility
@@ -120,10 +120,22 @@ def find_interface(model, interface):
if interface.providedBy(location):
return location
+_segment_cache = {}
+
def _urlsegment(s):
- if isinstance(s, unicode):
- s = s.encode('utf-8')
- return urllib.quote(s)
+ """ The bit of this code that deals with ``_segment_cache`` is an
+ optimization: we cache all the computation of URL path segments in
+ this module-scope dictionary with the original string (or unicode
+ value) as the key, so we can look it up later without needing to
+ reencode or re-url-quote it """
+ result = _segment_cache.get(s)
+ if result is None:
+ if isinstance(s, unicode):
+ result = _url_quote(s.encode('utf-8'))
+ else:
+ result = _url_quote(s)
+ _segment_cache[s] = result
+ return result
def model_url(model, request, *elements):
""" Return a string representing the absolute URL of the model
@@ -154,10 +166,10 @@ def model_url(model, request, *elements):
prefix = '/'.join(reversed(rpath))
suffix = '/'.join([_urlsegment(s) for s in elements])
path = '/'.join([prefix, suffix])
- app_url = request.application_url
- if not app_url.endswith('/'):
- app_url = app_url + '/'
- return urlparse.urljoin(app_url, path)
+ if not path.startswith('/'):
+ path = '/' + path
+ app_url = request.application_url # never ends in a slash
+ return app_url + path
def model_path(model, *elements):
""" Return a string representing the absolute path of the model
@@ -185,3 +197,48 @@ def model_path(model, *elements):
path = '/'.join([path, suffix])
return path
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def _url_quote(s, safe = '/'):
+ """quote('abc def') -> 'abc%20def'
+
+ Faster version of Python stdlib urllib.quote. See
+ http://bugs.python.org/issue1285086 for more information.
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ By default, the quote function is intended for quoting the path
+ section of a URL. Thus, it will not encode '/'. This character
+ is reserved, but in typical usage the quote function is being
+ called on a path where the existing slash characters are used as
+ reserved characters.
+ """
+ cachekey = (safe, always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ if not _must_quote[cachekey].search(s):
+ return s
+ except KeyError:
+ safe += always_safe
+ _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+ _safemaps[cachekey] = safe_map
+ res = map(safe_map.__getitem__, s)
+ return ''.join(res)