From 7ae0c2522bbb2b026bc0370dbb1b1608f146137d Mon Sep 17 00:00:00 2001 From: Chris McDonough Date: Mon, 5 Jan 2009 23:36:27 +0000 Subject: New Modules - A new module ``repoze.bfg.url`` has been added. It contains the ``model_url`` API (moved from ``repoze.bfg.traversal``) and an implementation of ``urlencode`` (like Python's ``urllib.urlencode``) which can handle Unicode keys and values in parameters to the ``query`` argument. Deprecations - The ``model_url`` function has been moved from ``repoze.bfg.traversal`` into ``repoze.bfg.url``. It can still be imported from ``repoze.bfg.traversal`` but an import from ``repoze.bfg.traversal`` will emit a DeprecationWarning. Features - The ``repoze.bfg.url.model_url`` API (nee' ``repoze.bfg.traversal.model_url``) now accepts and honors a keyword argument named ``query``. The value of this argument will be used to compose a query string, which will be attached to the generated URL before it is returned. See the API docs (in the docs directory or `on the web `_) for more information. --- CHANGES.txt | 29 ++++++- docs/api/traversal.rst | 4 +- docs/api/url.rst | 11 +++ docs/index.rst | 1 + repoze/bfg/tests/test_traversal.py | 80 ----------------- repoze/bfg/tests/test_url.py | 134 ++++++++++++++++++++++++++++ repoze/bfg/traversal.py | 145 +++++++------------------------ repoze/bfg/url.py | 174 +++++++++++++++++++++++++++++++++++++ 8 files changed, 381 insertions(+), 197 deletions(-) create mode 100644 docs/api/url.rst create mode 100644 repoze/bfg/tests/test_url.py create mode 100644 repoze/bfg/url.py diff --git a/CHANGES.txt b/CHANGES.txt index e63cb025e..12fe3d911 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,31 @@ -0.6 (12/26/2008) +Next Release + + New Modules + + - A new module ``repoze.bfg.url`` has been added. It contains the + ``model_url`` API (moved from ``repoze.bfg.traversal``) and an + implementation of ``urlencode`` (like Python's + ``urllib.urlencode``) which can handle Unicode keys and values in + parameters to the ``query`` argument. + + Deprecations + + - The ``model_url`` function has been moved from + ``repoze.bfg.traversal`` into ``repoze.bfg.url``. It can still + be imported from ``repoze.bfg.traversal`` but an import from + ``repoze.bfg.traversal`` will emit a DeprecationWarning. + + Features + + - The ``repoze.bfg.url.model_url`` API (nee' + ``repoze.bfg.traversal.model_url``) now accepts and honors a + keyword argument named ``query``. The value of this argument + will be used to compose a query string, which will be attached to + the generated URL before it is returned. See the API docs (in + the docs directory or `on the web + `_) for more information. + + 0.6 (12/26/2008) Backwards Incompatibilities diff --git a/docs/api/traversal.rst b/docs/api/traversal.rst index c9fab7d98..8195ddec4 100644 --- a/docs/api/traversal.rst +++ b/docs/api/traversal.rst @@ -11,8 +11,8 @@ .. autofunction:: find_root - .. autofunction:: model_url - .. autofunction:: model_path +.. note:: A function named ``model_url`` used to be present in this + module. It was moved to :ref:`url_module` in version 0.6.1. diff --git a/docs/api/url.rst b/docs/api/url.rst new file mode 100644 index 000000000..d984dcc07 --- /dev/null +++ b/docs/api/url.rst @@ -0,0 +1,11 @@ +.. _url_module: + +:mod:`repoze.bfg.url` +--------------------- + +.. automodule:: repoze.bfg.url + + .. autofunction:: model_url + + .. autofunction:: urlencode + diff --git a/docs/index.rst b/docs/index.rst index 0c8fd295e..bd17236aa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -76,6 +76,7 @@ Per-module :mod:`repoze.bfg` API documentation. api/testing api/traversal api/location + api/url api/urldispatch api/view api/wsgi diff --git a/repoze/bfg/tests/test_traversal.py b/repoze/bfg/tests/test_traversal.py index 70938cbf6..dcb47e6ed 100644 --- a/repoze/bfg/tests/test_traversal.py +++ b/repoze/bfg/tests/test_traversal.py @@ -225,86 +225,6 @@ class FindInterfaceTests(unittest.TestCase): result = self._callFUT(baz, IFoo) self.assertEqual(result.__name__, 'root') -class ModelURLTests(unittest.TestCase): - def _callFUT(self, model, request, *elements): - from repoze.bfg.traversal import model_url - return model_url(model, request, *elements) - - def test_extra_args(self): - baz = DummyContext() - bar = DummyContext(baz) - foo = DummyContext(bar) - root = DummyContext(foo) - root.__parent__ = None - root.__name__ = None - foo.__parent__ = root - foo.__name__ = 'foo ' - bar.__parent__ = foo - bar.__name__ = 'bar' - baz.__parent__ = bar - baz.__name__ = 'baz' - request = DummyRequest() - result = self._callFUT(baz, request, 'this/theotherthing', 'that') - - self.assertEqual( - result, - 'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that') - - def test_root_default_app_url(self): - root = DummyContext() - root.__parent__ = None - root.__name__ = None - request = DummyRequest() - result = self._callFUT(root, request) - self.assertEqual(result, 'http://example.com:5432/') - - def test_nonroot_default_app_url(self): - root = DummyContext() - root.__parent__ = None - root.__name__ = None - other = DummyContext() - other.__parent__ = root - other.__name__ = 'nonroot object' - request = DummyRequest() - result = self._callFUT(other, request) - self.assertEqual(result, 'http://example.com:5432/nonroot%20object/') - - def test_unicode_mixed_with_bytes_in_model_names(self): - root = DummyContext() - root.__parent__ = None - root.__name__ = None - one = DummyContext() - one.__parent__ = root - one.__name__ = unicode('La Pe\xc3\xb1a', 'utf-8') - two = DummyContext() - two.__parent__ = one - two.__name__ = 'La Pe\xc3\xb1a' - request = DummyRequest() - result = self._callFUT(two, request) - self.assertEqual(result, - 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/') - - def test_unicode_in_element_names(self): - uc = unicode('La Pe\xc3\xb1a', 'utf-8') - root = DummyContext() - root.__parent__ = None - root.__name__ = None - one = DummyContext() - one.__parent__ = root - one.__name__ = uc - request = DummyRequest() - result = self._callFUT(one, request, uc) - self.assertEqual(result, - 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a') - - def test_element_names_url_quoted(self): - root = DummyContext() - root.__parent__ = None - root.__name__ = None - request = DummyRequest() - result = self._callFUT(root, request, 'a b c') - self.assertEqual(result, 'http://example.com:5432/a%20b%20c') - class FindRootTests(unittest.TestCase): def _callFUT(self, context): from repoze.bfg.traversal import find_root diff --git a/repoze/bfg/tests/test_url.py b/repoze/bfg/tests/test_url.py new file mode 100644 index 000000000..2a47068cd --- /dev/null +++ b/repoze/bfg/tests/test_url.py @@ -0,0 +1,134 @@ +import unittest + +class ModelURLTests(unittest.TestCase): + def _callFUT(self, model, request, *elements, **kw): + from repoze.bfg.url import model_url + return model_url(model, request, *elements, **kw) + + def test_extra_args(self): + baz = DummyContext() + bar = DummyContext(baz) + foo = DummyContext(bar) + root = DummyContext(foo) + root.__parent__ = None + root.__name__ = None + foo.__parent__ = root + foo.__name__ = 'foo ' + bar.__parent__ = foo + bar.__name__ = 'bar' + baz.__parent__ = bar + baz.__name__ = 'baz' + request = DummyRequest() + result = self._callFUT(baz, request, 'this/theotherthing', 'that') + + self.assertEqual( + result, + 'http://example.com:5432/foo%20/bar/baz/this/theotherthing/that') + + def test_root_default_app_url(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + request = DummyRequest() + result = self._callFUT(root, request) + self.assertEqual(result, 'http://example.com:5432/') + + def test_nonroot_default_app_url(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + other = DummyContext() + other.__parent__ = root + other.__name__ = 'nonroot object' + request = DummyRequest() + result = self._callFUT(other, request) + self.assertEqual(result, 'http://example.com:5432/nonroot%20object/') + + def test_unicode_mixed_with_bytes_in_model_names(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + one = DummyContext() + one.__parent__ = root + one.__name__ = unicode('La Pe\xc3\xb1a', 'utf-8') + two = DummyContext() + two.__parent__ = one + two.__name__ = 'La Pe\xc3\xb1a' + request = DummyRequest() + result = self._callFUT(two, request) + self.assertEqual(result, + 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/') + + def test_unicode_in_element_names(self): + uc = unicode('La Pe\xc3\xb1a', 'utf-8') + root = DummyContext() + root.__parent__ = None + root.__name__ = None + one = DummyContext() + one.__parent__ = root + one.__name__ = uc + request = DummyRequest() + result = self._callFUT(one, request, uc) + self.assertEqual(result, + 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a') + + def test_element_names_url_quoted(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + request = DummyRequest() + result = self._callFUT(root, request, 'a b c') + self.assertEqual(result, 'http://example.com:5432/a%20b%20c') + + def test_with_query(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + request = DummyRequest() + result = self._callFUT(root, request, 'a', query=[('a', 1), ('b', 2)]) + self.assertEqual(result, 'http://example.com:5432/a?a=1&b=2') + + +class UrlEncodeTests(unittest.TestCase): + def _callFUT(self, query, doseq=False): + from repoze.bfg.url import urlencode + return urlencode(query, doseq) + + def test_ascii_only(self): + result = self._callFUT([('a',1), ('b',2)]) + self.assertEqual(result, 'a=1&b=2') + + def test_unicode_key(self): + la = unicode('LaPe\xc3\xb1a', 'utf-8') + result = self._callFUT([(la, 1), ('b',2)]) + self.assertEqual(result, 'LaPe%C3%B1a=1&b=2') + + def test_unicode_val_single(self): + la = unicode('LaPe\xc3\xb1a', 'utf-8') + result = self._callFUT([('a', la), ('b',2)]) + self.assertEqual(result, 'a=LaPe%C3%B1a&b=2') + + def test_unicode_val_multiple(self): + la = [unicode('LaPe\xc3\xb1a', 'utf-8')] * 2 + result = self._callFUT([('a', la), ('b',2)], doseq=True) + self.assertEqual(result, 'a=LaPe%C3%B1a&a=LaPe%C3%B1a&b=2') + + def test_dict(self): + result = self._callFUT({'a':1}) + self.assertEqual(result, 'a=1') + +class DummyContext(object): + def __init__(self, next=None): + self.next = next + + def __getitem__(self, name): + if self.next is None: + raise KeyError, name + return self.next + +class DummyRequest: + application_url = 'http://example.com:5432' # app_url never ends with slash + +class DummySettings: + def __init__(self, **kw): + self.__dict__.update(kw) diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py index 3c6756de1..e2e31fdfa 100644 --- a/repoze/bfg/traversal.py +++ b/repoze/bfg/traversal.py @@ -2,17 +2,26 @@ import re import urllib from zope.component import queryUtility +from zope.deferredimport import deprecated from zope.interface import classProvides from zope.interface import implements from repoze.bfg.location import LocationProxy from repoze.bfg.location import lineage +from repoze.bfg.url import urlencode + from repoze.bfg.interfaces import ILocation from repoze.bfg.interfaces import ITraverser from repoze.bfg.interfaces import ITraverserFactory from repoze.bfg.interfaces import ISettings +deprecated( + "('from repoze.bfg.traversal import model_url' is now " + "deprecated; instead use 'from repoze.bfg.url import model_url')", + model_url = "repoze.bfg.url:model_url", + ) + def split_path(path): while path.startswith('/'): path = path[1:] @@ -112,83 +121,36 @@ def find_model(model, path): return ob def find_interface(model, interface): - """ Return the first object found which provides the interface + """ + Return the first object found which provides the interface ``interface`` in the parent chain of ``model`` or ``None`` if no object providing ``interface`` can be found in the parent chain. - The ``model`` passed in should be :term:`location`-aware.""" + The ``model`` passed in should be :term:`location`-aware. + """ for location in lineage(model): if interface.providedBy(location): return location -_segment_cache = {} - -def _urlsegment(s): - """ The bit of this code that deals with ``_segment_cache`` is an - optimization: we cache all the computation of URL path segments in - this module-scope dictionary with the original string (or unicode - value) as the key, so we can look it up later without needing to - reencode or re-url-quote it """ - result = _segment_cache.get(s) - if result is None: - if s.__class__ is unicode: # isinstance slighly slower (~15%) - result = _url_quote(s.encode('utf-8')) - else: - result = _url_quote(s) - # we don't need a lock to mutate _segment_cache, as the below - # will generate exactly one Python bytecode (STORE_SUBSCR) - _segment_cache[s] = result - return result - -def model_url(model, request, *elements): - """ Return a string representing the absolute URL of the model - object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or - ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``. If any - model in the lineage has a unicode name, it will be converted to - UTF-8 before being attached to the URL. Empty names in the model - graph are ignored. - - Any positional arguments passed in as ``elements`` must be strings - or unicode objects. These will be joined by slashes and appended - to the model URL. Each of the elements passed in is URL-quoted - before being appended; if any element is unicode, it will - converted to a UTF-8 bytestring before being URL-quoted. - - If ``elements`` are not used, the model URL will end with a - trailing slash. If ``elements`` are used, the generated URL will - *not* end in trailing a slash. - - The ``model`` passed in must be :term:`location`-aware. The - overall result of this function is always a string (never - unicode). """ - rpath = [] - for location in lineage(model): - name = location.__name__ - if name: - rpath.append(_urlsegment(name)) - prefix = '/'.join(reversed(rpath)) - suffix = '/'.join([_urlsegment(s) for s in elements]) - path = '/'.join([prefix, suffix]) - if not path.startswith('/'): - path = '/' + path - app_url = request.application_url # never ends in a slash - return app_url + path - def model_path(model, *elements): """ Return a string representing the absolute path of the model - object based on its position in the model graph, e.g - ``/foo/bar``. Any positional arguments passed in as ``elements`` - will be joined by slashes and appended to the generated path. The - ``model`` passed in must be :term:`location`-aware. - - Note that this function is not equivalent to ``model_url``: - individual segments in the path are not quoted in any way. Thus, - to ensure that this function generates paths that can later be - resolved to models via ``find_model``, you should ensure that your - model names do not contain any slashes. - - Note also that the path returned may be a Unicode string if any - model name in the model graph is Unicode; otherwise it will be a - byte-string.""" + object based on its position in the model graph, e.g ``/foo/bar``. + This function is the inverse of ``find_model``: it can be used to + generate paths that can later be resolved via ``find_model``. Any + positional arguments passed in as ``elements`` will be joined by + slashes and appended to the generated path. The ``model`` passed + in must be :term:`location`-aware. + + Note that the way this function treats path segments is not + equivalent to how the ``repoze.bfg.url.model_url`` API treats path + segments: individual segments that make up the path are not quoted + in any way. Thus, to ensure that this function generates paths + that can later be resolved to models via ``find_model``, you + should ensure that your model __name__ attributes do not contain + any forward slashes. + + The path returned may be a unicode object if any model name in the + model graph is a unicode object; otherwise it will be a string. + """ rpath = [] for location in lineage(model): if location.__name__: @@ -199,48 +161,3 @@ def model_path(model, *elements): path = '/'.join([path, suffix]) return path -always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' - 'abcdefghijklmnopqrstuvwxyz' - '0123456789' '_.-') -_safemaps = {} -_must_quote = {} - -def _url_quote(s, safe = '/'): - """quote('abc def') -> 'abc%20def' - - Faster version of Python stdlib urllib.quote. See - http://bugs.python.org/issue1285086 for more information. - - Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. - - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists - the following reserved characters. - - reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," - - Each of these characters is reserved in some component of a URL, - but not necessarily in all of them. - - By default, the quote function is intended for quoting the path - section of a URL. Thus, it will not encode '/'. This character - is reserved, but in typical usage the quote function is being - called on a path where the existing slash characters are used as - reserved characters. - """ - cachekey = (safe, always_safe) - try: - safe_map = _safemaps[cachekey] - if not _must_quote[cachekey].search(s): - return s - except KeyError: - safe += always_safe - _must_quote[cachekey] = re.compile(r'[^%s]' % safe) - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py new file mode 100644 index 000000000..4c1545de5 --- /dev/null +++ b/repoze/bfg/url.py @@ -0,0 +1,174 @@ +""" Utility functions for dealing with URLs in repoze.bfg """ + +import re +import urllib + +from repoze.bfg.location import lineage + +def model_url(model, request, *elements, **kw): + """ + Generate a string representing the absolute URL of the model + object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or + ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``. If a + ``query`` keyword argument is provided, a query string based on + its value will be composed and appended to the generated URL + string (see details below). The overall result of this function + is always a string (never unicode). The ``model`` passed in must + be :term:`location`-aware. + + .. note:: If any model in the lineage has a unicode name, it will + be converted to UTF-8 before being attached to the URL. + When composing the path based on the model lineage, + empty names in the model graph are ignored. + + Any positional arguments passed in as ``elements`` must be strings + or unicode objects. These will be joined by slashes and appended + to the generated model URL. Each of the elements passed in is + URL-quoted before being appended; if any element is unicode, it + will converted to a UTF-8 bytestring before being URL-quoted. + + .. warning:: if no ``elements`` arguments are specified, the model + URL will end with a trailing slash. If any + ``elements`` are used, the generated URL will *not* + end in trailing a slash. + + If a keyword argument ``query`` is present, it will used to + compose a query string that will be tacked on to the end of the + URL. The value of ``query`` must be a sequence of two-tuples *or* + a data structure with an ``.items()`` method that returns a + sequence of two-tuples (presumably a dictionary). This data + structure will be turned into a query string per the documentation + of ``repoze.url.urlencode`` function. After the query data is + turned into a query string, a leading ``?`` is prepended, and the + the resulting string is appended to the generated URL. + + .. note:: Python data structures that are passed as ``query`` + which are sequences or dictionaries are turned into a + string under the same rules as when run through + urllib.urlencode with the ``doseq`` argument equal to + ``True``. This means that sequences can be passed as + values, and a k=v pair will be placed into the query + string for each value. + """ + + qs = '' + if 'query' in kw: + qs = '?' + urlencode(kw['query'], doseq=True) + + rpath = [] + for location in lineage(model): + name = location.__name__ + if name: + rpath.append(_urlsegment(name)) + prefix = '/'.join(reversed(rpath)) + suffix = '/'.join([_urlsegment(s) for s in elements]) + path = '/'.join([prefix, suffix]) + if not path.startswith('/'): + path = '/' + path + app_url = request.application_url # never ends in a slash + return app_url + path + qs + +def urlencode(query, doseq=False): + """ + A wrapper around Python's stdlib `urllib.urlencode function + `_ which + accepts unicode keys and values within the ``query`` + dict/sequence; all keys and values are first converted to UTF-8 + before being used to compose the query string. The behavior of + the function is otherwise the same as the stdlib version. + + The value of ``query`` must be a sequence of two-tuples + representing key/value pairs *or* an object (often a dictionary) + with an ``.items()`` method that returns a sequence of two-tuples + representing key/value pairs. ``doseq`` controls what happens + when a sequence is presented as one of the values. See the Python + stdlib documentation for more information. + """ + if hasattr(query, 'items'): + # dictionary + query = query.items() + # presumed to be a sequence of two-tuples + newquery = [] + for k, v in query: + if k.__class__ is unicode: + k = k.encode('utf-8') + + if isinstance(v, (tuple, list)): + L = [] + for x in v: + if x.__class__ is unicode: + x = x.encode('utf-8') + L.append(x) + v = L + elif v.__class__ is unicode: + v = v.encode('utf-8') + newquery.append((k, v)) + + return urllib.urlencode(newquery, doseq=doseq) + +_segment_cache = {} + +def _urlsegment(s): + """ The bit of this code that deals with ``_segment_cache`` is an + optimization: we cache all the computation of URL path segments in + this module-scope dictionary with the original string (or unicode + value) as the key, so we can look it up later without needing to + reencode or re-url-quote it """ + result = _segment_cache.get(s) + if result is None: + if s.__class__ is unicode: # isinstance slighly slower (~15%) + result = _url_quote(s.encode('utf-8')) + else: + result = _url_quote(s) + # we don't need a lock to mutate _segment_cache, as the below + # will generate exactly one Python bytecode (STORE_SUBSCR) + _segment_cache[s] = result + return result + + +always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' '_.-') +_safemaps = {} +_must_quote = {} + +def _url_quote(s, safe = '/'): + """quote('abc def') -> 'abc%20def' + + Faster version of Python stdlib urllib.quote. See + http://bugs.python.org/issue1285086 for more information. + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + """ + cachekey = (safe, always_safe) + try: + safe_map = _safemaps[cachekey] + if not _must_quote[cachekey].search(s): + return s + except KeyError: + safe += always_safe + _must_quote[cachekey] = re.compile(r'[^%s]' % safe) + safe_map = {} + for i in range(256): + c = chr(i) + safe_map[c] = (c in safe) and c or ('%%%02X' % i) + _safemaps[cachekey] = safe_map + res = map(safe_map.__getitem__, s) + return ''.join(res) + -- cgit v1.2.3