From c8a91c6fa1eb4daa6be6dc7098518345bc6aa9bb Mon Sep 17 00:00:00 2001 From: Chris McDonough Date: Sun, 14 Dec 2008 04:00:07 +0000 Subject: Backwards Incompatibilities - URL-quote "extra" element names passed in as ``**elements`` to the ``traversal.model_url`` API. If any of these names is a Unicode string, encode it to UTF-8 before URL-quoting. This is a slight backwards incompatibility that will impact you if you were already UTF-8 encoding or URL-quoting the values you passed in as ``elements`` to this API. Bugfixes - UTF-8 encode each segment in the model path used to generate a URL before url-quoting it within the ``traversal.model_url`` API. This is a bugfix, as Unicode cannot always be successfully URL-quoted. --- CHANGES.txt | 18 ++++++++++ repoze/bfg/tests/test_traversal.py | 39 ++++++++++++++++++++ repoze/bfg/traversal.py | 74 ++++++++++++++++++++++++++------------ 3 files changed, 109 insertions(+), 22 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8d1b6d59d..beba77193 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,23 @@ Next release + Backwards Incompatibilities + + - URL-quote "extra" element names passed in as ``**elements`` to the + ``traversal.model_url`` API. If any of these names is a Unicode + string, encode it to UTF-8 before URL-quoting. This is a slight + backwards incompatibility that will impact you if you were already + UTF-8 encoding or URL-quoting the values you passed in as + ``elements`` to this API. + + Bugfixes + + - UTF-8 encode each segment in the model path used to generate a URL + before url-quoting it within the ``traversal.model_url`` API. + This is a bugfix, as Unicode cannot always be successfully + URL-quoted. + + Features + - Make it possible to run unit tests using a buildout-generated Python "interpreter". diff --git a/repoze/bfg/tests/test_traversal.py b/repoze/bfg/tests/test_traversal.py index 2c02d7d63..32be1ebbb 100644 --- a/repoze/bfg/tests/test_traversal.py +++ b/repoze/bfg/tests/test_traversal.py @@ -256,6 +256,45 @@ class ModelURLTests(unittest.TestCase): result = self._callFUT(other, request) self.assertEqual(result, 'http://example.com:5432/nonroot%20object/') + def test_unicode_mixed_with_bytes_in_model_names(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + one = DummyContext() + one.__parent__ = root + one.__name__ = unicode('La Pe\xc3\xb1a', 'utf-8') + two = DummyContext() + two.__parent__ = one + two.__name__ = 'La Pe\xc3\xb1a' + request = DummyRequest() + request.application_url = 'http://example.com:5432' + result = self._callFUT(two, request) + self.assertEqual(result, + 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a/') + + def test_unicode_in_element_names(self): + uc = unicode('La Pe\xc3\xb1a', 'utf-8') + root = DummyContext() + root.__parent__ = None + root.__name__ = None + one = DummyContext() + one.__parent__ = root + one.__name__ = uc + request = DummyRequest() + request.application_url = 'http://example.com:5432' + result = self._callFUT(one, request, uc) + self.assertEqual(result, + 'http://example.com:5432/La%20Pe%C3%B1a/La%20Pe%C3%B1a') + + def test_element_names_url_quoted(self): + root = DummyContext() + root.__parent__ = None + root.__name__ = None + request = DummyRequest() + request.application_url = 'http://example.com:5432' + result = self._callFUT(root, request, 'a b c') + self.assertEqual(result, 'http://example.com:5432/a%20b%20c') + class FindRootTests(unittest.TestCase): def _callFUT(self, context): from repoze.bfg.traversal import find_root diff --git a/repoze/bfg/traversal.py b/repoze/bfg/traversal.py index 61b66c040..78b7b2b60 100644 --- a/repoze/bfg/traversal.py +++ b/repoze/bfg/traversal.py @@ -67,7 +67,7 @@ class ModelGraphTraverser(object): def find_root(model): """ Find the root node in the graph to which ``model`` belongs. Note that ``model`` should be :term:`location`-aware. - Note that the root node is available in the request object by + Note that the root node is available in the request object by accessing the ``request.root`` attribute. """ for location in lineage(model): @@ -78,14 +78,15 @@ def find_root(model): def find_model(model, path): """ Given a model object and a string representing a path - reference (a set of names delimited by forward-slashes), return an - context in this application's model graph at the specified path. - If the path starts with a slash, the path is considered absolute - and the graph traversal will start at the root object. If the - path does not start with a slash, the path is considered relative - and graph traversal will begin at the model object supplied to the - function. In either case, if the path cannot be resolved, a - KeyError will be thrown. Note that the model passed in should be + reference (a set of names delimited by forward-slashes, such as + the return value of ``model_path``), return an context in this + application's model graph at the specified path. If the path + starts with a slash, the path is considered absolute and the graph + traversal will start at the root object. If the path does not + start with a slash, the path is considered relative and graph + traversal will begin at the model object supplied to the function. + In either case, if the path cannot be resolved, a KeyError will be + thrown. Note that the model passed in should be :term:`location`-aware.""" if path.startswith('/'): @@ -105,24 +106,43 @@ def find_interface(model, interface): if interface.providedBy(location): return location +def _urlsegment(s): + if isinstance(s, unicode): + s = s.encode('utf-8') + return urllib.quote(s) + def model_url(model, request, *elements): - """ Return the absolute URL of the model object based on the - ``wsgi.url_scheme``, ``HTTP_HOST`` or ``SERVER_NAME`` in the - request, plus any ``SCRIPT_NAME``. The model URL will end with a - trailing slash. Any positional arguments passed in as - ``elements`` will be joined by slashes and appended to the model - URL. The passed in elements are *not* URL-quoted. The ``model`` - passed in must be :term:`location`-aware.""" + """ Return a string representing the absolute URL of the model + object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or + ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``. If any + model in the lineage has a unicode name, it will be converted to + UTF-8 before being attached to the URL. Empty names in the model + graph are ignored. + + Any positional arguments passed in as ``elements`` must be strings + or unicode objects. These will be joined by slashes and appended + to the model URL. Each of the elements passed in is URL-quoted + before being appended; if any element is unicode, it will + converted to a UTF-8 bytestring before being URL-quoted. + + If ``elements`` are not used, the model URL will end with a + trailing slash. If ``elements`` are used, the generated URL will + *not* end in trailing a slash. + + The ``model`` passed in must be :term:`location`-aware. The + overall result of this function is always a string (never + unicode). """ rpath = [] for location in lineage(model): - if location.__name__: - rpath.append(urllib.quote(location.__name__)) + name = location.__name__ + if name: + rpath.append(_urlsegment(name)) prefix = '/'.join(reversed(rpath)) - suffix = '/'.join(elements) - path = '/'.join([prefix, suffix]) # always have trailing slash + suffix = '/'.join([_urlsegment(s) for s in elements]) + path = '/'.join([prefix, suffix]) app_url = request.application_url if not app_url.endswith('/'): - app_url = app_url+'/' + app_url = app_url + '/' return urlparse.urljoin(app_url, path) def model_path(model, *elements): @@ -130,7 +150,17 @@ def model_path(model, *elements): object based on its position in the model graph, e.g ``/foo/bar``. Any positional arguments passed in as ``elements`` will be joined by slashes and appended to the generated path. The - ``model`` passed in must be :term:`location`-aware.""" + ``model`` passed in must be :term:`location`-aware. + + Note that this function is not equivalent to ``model_url``: + individual segments in the path are not quoted in any way. Thus, + to ensure that this function generates paths that can later be + resolved to models via ``find_model``, you should ensure that your + model names do not contain any slashes. + + Note also that the path returned may be a Unicode string if any + model name in the model graph is Unicode; otherwise it will be a + byte-string.""" rpath = [] for location in lineage(model): if location.__name__: -- cgit v1.2.3