From a66593d25e77f1a0e749f5590b45498bbaa66755 Mon Sep 17 00:00:00 2001 From: Chris McDonough Date: Thu, 18 Nov 2010 16:56:05 -0500 Subject: - Fix apparent failures when calling ``pyramid.traversal.find_model(root, path)`` or ``pyramid.traversal.traverse(path)`` when ``path`` is (erroneously) a Unicode object. The user is meant to pass these APIs a string object, never a Unicode object. In practice, however, users indeed pass Unicode. Because the string that is passed must be ASCII encodeable, now, if they pass a Unicode object, its data is eagerly converted to an ASCII string rather than being passed along to downstream code as a convenience to the user and to prevent puzzling second-order failures from cropping up (all failures will occur within ``pyramid.traversal.traverse`` rather than later down the line as the result of calling ``traversal_path``). --- CHANGES.txt | 12 ++++++++++++ pyramid/tests/test_traversal.py | 26 ++++++++++++++++++++++++++ pyramid/traversal.py | 13 ++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 14b2c569d..0720034b2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -28,6 +28,18 @@ Bug Fixes - The ``pyramid_alchemy`` paster template had a typo, preventing an import from working. +- Fix apparent failures when calling ``pyramid.traversal.find_model(root, + path)`` or ``pyramid.traversal.traverse(path)`` when ``path`` is + (erroneously) a Unicode object. The user is meant to pass these APIs a + string object, never a Unicode object. In practice, however, users indeed + pass Unicode. Because the string that is passed must be ASCII encodeable, + now, if they pass a Unicode object, its data is eagerly converted to an + ASCII string rather than being passed along to downstream code as a + convenience to the user and to prevent puzzling second-order failures from + cropping up (all failures will occur within ``pyramid.traversal.traverse`` + rather than later down the line as the result of calling + ``traversal_path``). + Backwards Incompatibilities --------------------------- diff --git a/pyramid/tests/test_traversal.py b/pyramid/tests/test_traversal.py index 2deb5982c..3245d6302 100644 --- a/pyramid/tests/test_traversal.py +++ b/pyramid/tests/test_traversal.py @@ -522,6 +522,32 @@ class FindModelTests(unittest.TestCase): self.assertEqual(root.wascontext, True) self.assertEqual(root.request.environ['PATH_INFO'], '/') + def test_absolute_unicode_found(self): + # test for bug wiggy found in wild, traceback stack: + # root = u'/%E6%B5%81%E8%A1%8C%E8%B6%8B%E5%8A%BF' + # wiggy's code: section=find_model(page, root) + # find_model L76: D = traverse(model, path) + # traverse L291: return traverser(request) + # __call__ line 568: vpath_tuple = traversal_path(vpath) + # lru_cached line 91: f(*arg) + # traversal_path line 443: path.encode('ascii') + # UnicodeEncodeError: 'ascii' codec can't encode characters in + # position 1-12: ordinal not in range(128) + # + # solution: encode string to ascii in pyramid.traversal.traverse + # before passing it along to webob as path_info + from pyramid.traversal import ModelGraphTraverser + unprintable = DummyContext() + root = DummyContext(unprintable) + unprintable.__parent__ = root + unprintable.__name__ = unicode( + '/\xe6\xb5\x81\xe8\xa1\x8c\xe8\xb6\x8b\xe5\x8a\xbf', 'utf-8') + root.__parent__ = None + root.__name__ = None + traverser = ModelGraphTraverser + self._registerTraverser(traverser) + result = self._callFUT(root, u'/%E6%B5%81%E8%A1%8C%E8%B6%8B%E5%8A%BF') + self.assertEqual(result, unprintable) class ModelPathTests(unittest.TestCase): def _callFUT(self, model, *elements): diff --git a/pyramid/traversal.py b/pyramid/traversal.py index e928c33f7..fb73ad906 100644 --- a/pyramid/traversal.py +++ b/pyramid/traversal.py @@ -228,7 +228,7 @@ def traverse(model, path): object supplied to the function as the ``model`` argument. If an empty string is passed as ``path``, the ``model`` passed in will be returned. Model path strings must be escaped in the following - manner: each Unicode path segment must be encoded as UTF-8 and as + manner: each Unicode path segment must be encoded as UTF-8 and each path segment must escaped via Python's :mod:`urllib.quote`. For example, ``/path/to%20the/La%20Pe%C3%B1a`` (absolute) or ``to%20the/La%20Pe%C3%B1a`` (relative). The @@ -272,6 +272,17 @@ def traverse(model, path): else: path = '' + # The user is supposed to pass us a string object, never Unicode. In + # practice, however, users indeed pass Unicode to this API. If they do + # pass a Unicode object, its data *must* be entirely encodeable to ASCII, + # so we encode it here as a convenience to the user and to prevent + # second-order failures from cropping up (all failures will occur at this + # step rather than later down the line as the result of calling + # ``traversal_path``). + + if isinstance(path, unicode): + path = path.encode('ascii') + if path and path[0] == '/': model = find_root(model) -- cgit v1.2.3