From f8414776aac292c178f96ea07fce634ec3af9f7a Mon Sep 17 00:00:00 2001 From: Chris McDonough Date: Sun, 25 Sep 2011 01:17:17 -0400 Subject: add path_traversal_info method to match reality of latin-1 encoded envvars, fix traversal tests under 3.2 --- pyramid/compat.py | 19 ++++++++- pyramid/config/__init__.py | 2 +- pyramid/config/testing.py | 4 +- pyramid/config/util.py | 4 +- pyramid/static.py | 8 ++-- pyramid/tests/test_traversal.py | 83 ++++++++++++++++++++++++++------------- pyramid/tests/test_urldispatch.py | 21 ++++++---- pyramid/traversal.py | 74 ++++++++++++++++++++-------------- pyramid/urldispatch.py | 9 ++--- 9 files changed, 143 insertions(+), 81 deletions(-) diff --git a/pyramid/compat.py b/pyramid/compat.py index 1936d58ac..e1f5f16a8 100644 --- a/pyramid/compat.py +++ b/pyramid/compat.py @@ -49,6 +49,17 @@ def bytes_(s, encoding='latin-1', errors='strict'): return s.encode(encoding, errors) return s +if PY3: # pragma: no cover + def ascii_native_(s): + if isinstance(s, text_type): + s = s.encode('ascii') + return str(s, 'ascii', 'strict') +else: + def ascii_native_(s): + if isinstance(s, text_type): + s = s.encode('ascii') + return str(s) + if PY3: # pragma: no cover def native_(s, encoding='latin-1', errors='strict'): if isinstance(s, text_type): @@ -60,7 +71,7 @@ else: return s.encode(encoding, errors) return str(s) -try: # pragma: no cover +if PY3: # pragma: no cover from urllib import parse urlparse = parse from urllib.parse import quote as url_quote @@ -69,7 +80,8 @@ try: # pragma: no cover from urllib.parse import urlencode as url_encode from urllib.request import urlopen as url_open url_unquote_text = url_unquote -except ImportError: + url_unquote_native = url_unquote +else: import urlparse from urllib import quote as url_quote from urllib import quote_plus as url_quote_plus @@ -79,6 +91,9 @@ except ImportError: def url_unquote_text(v, encoding='utf-8', errors='replace'): v = url_unquote(v) return v.decode(encoding, errors) + def url_unquote_native(v, encoding='utf-8', errors='replace'): + return native_(url_unquote_text(v, encoding, errors)) + if PY3: # pragma: no cover import builtins diff --git a/pyramid/config/__init__.py b/pyramid/config/__init__.py index 02a78445e..3b5d88d5f 100644 --- a/pyramid/config/__init__.py +++ b/pyramid/config/__init__.py @@ -660,7 +660,7 @@ class Configurator( c, action_wrap = c if action_wrap: c = action_method(c) - if PY3: + if PY3: # pragma: no cover m = types.MethodType(c, self) else: m = types.MethodType(c, self, self.__class__) diff --git a/pyramid/config/testing.py b/pyramid/config/testing.py index 0f709f634..36729acdf 100644 --- a/pyramid/config/testing.py +++ b/pyramid/config/testing.py @@ -6,7 +6,7 @@ from pyramid.interfaces import IAuthenticationPolicy from pyramid.interfaces import IRendererFactory from pyramid.renderers import RendererHelper -from pyramid.traversal import traversal_path +from pyramid.traversal import traversal_path_info from pyramid.config.util import action_method @@ -66,7 +66,7 @@ class TestingConfiguratorMixin(object): def __call__(self, request): path = request.environ['PATH_INFO'] ob = resources[path] - traversed = traversal_path(path) + traversed = traversal_path_info(path) return {'context':ob, 'view_name':'','subpath':(), 'traversed':traversed, 'virtual_root':ob, 'virtual_root_path':(), 'root':ob} diff --git a/pyramid/config/util.py b/pyramid/config/util.py index 498fe7629..919862a9d 100644 --- a/pyramid/config/util.py +++ b/pyramid/config/util.py @@ -6,7 +6,7 @@ from pyramid.compat import bytes_ from pyramid.compat import is_nonstr_iter from pyramid.exceptions import ConfigurationError from pyramid.traversal import find_interface -from pyramid.traversal import traversal_path +from pyramid.traversal import traversal_path_info from hashlib import md5 @@ -241,7 +241,7 @@ def make_predicates(xhr=None, request_method=None, path_info=None, return True m = context['match'] tvalue = tgenerate(m) - m['traverse'] = traversal_path(tvalue) + m['traverse'] = traversal_path_info(tvalue) return True # This isn't actually a predicate, it's just a infodict # modifier that injects ``traverse`` into the matchdict. As a diff --git a/pyramid/static.py b/pyramid/static.py index edb20b54f..79384da09 100644 --- a/pyramid/static.py +++ b/pyramid/static.py @@ -20,7 +20,7 @@ from pyramid.httpexceptions import HTTPNotFound from pyramid.httpexceptions import HTTPMovedPermanently from pyramid.path import caller_package from pyramid.response import Response -from pyramid.traversal import traversal_path +from pyramid.traversal import traversal_path_info slash = text_('/') @@ -112,8 +112,8 @@ class static_view(object): ``PATH_INFO`` when calling the underlying WSGI application which actually serves the static files. If it is ``True``, the static application will consider ``request.subpath`` as ``PATH_INFO`` input. If it is ``False``, - the static application will consider request.path_info as ``PATH_INFO`` - input. By default, this is ``False``. + the static application will consider request.environ[``PATH_INFO``] as + ``PATH_INFO`` input. By default, this is ``False``. .. note:: If the ``root_dir`` is relative to a :term:`package`, or is a :term:`asset specification` the :app:`Pyramid` @@ -142,7 +142,7 @@ class static_view(object): if self.use_subpath: path_tuple = request.subpath else: - path_tuple = traversal_path(request.path_info) + path_tuple = traversal_path_info(request.environ['PATH_INFO']) path = _secure_path(path_tuple) diff --git a/pyramid/tests/test_traversal.py b/pyramid/tests/test_traversal.py index 2f1ffcf5a..72192b23b 100644 --- a/pyramid/tests/test_traversal.py +++ b/pyramid/tests/test_traversal.py @@ -11,6 +11,39 @@ class TraversalPathTests(unittest.TestCase): from pyramid.traversal import traversal_path return traversal_path(path) + def test_utf8(self): + la = b'La Pe\xc3\xb1a' + encoded = url_quote(la) + decoded = text_(la, 'utf-8') + path = '/'.join([encoded, encoded]) + result = self._callFUT(path) + self.assertEqual(result, (decoded, decoded)) + + def test_utf16(self): + from pyramid.exceptions import URLDecodeError + la = text_(b'La Pe\xc3\xb1a', 'utf-8').encode('utf-16') + encoded = url_quote(la) + path = '/'.join([encoded, encoded]) + self.assertRaises(URLDecodeError, self._callFUT, path) + + def test_unicode_highorder_chars(self): + path = text_('/%E6%B5%81%E8%A1%8C%E8%B6%8B%E5%8A%BF') + self.assertEqual(self._callFUT(path), + (text_('\u6d41\u884c\u8d8b\u52bf', 'unicode_escape'),)) + + def test_element_urllquoted(self): + self.assertEqual(self._callFUT('/foo/space%20thing/bar'), + (text_('foo'), text_('space thing'), text_('bar'))) + + def test_unicode_undecodeable_to_ascii(self): + path = text_(b'/La Pe\xc3\xb1a', 'utf-8') + self.assertRaises(UnicodeEncodeError, self._callFUT, path) + +class TraversalPathInfoTests(unittest.TestCase): + def _callFUT(self, path): + from pyramid.traversal import traversal_path_info + return traversal_path_info(path) + def test_path_startswith_endswith(self): self.assertEqual(self._callFUT('/foo/'), (text_('foo'),)) @@ -27,10 +60,6 @@ class TraversalPathTests(unittest.TestCase): def test_twodots_at_start(self): self.assertEqual(self._callFUT('../../bar'), (text_('bar'),)) - def test_element_urllquoted(self): - self.assertEqual(self._callFUT('/foo/space%20thing/bar'), - (text_('foo'), text_('space thing'), text_('bar'))) - def test_segments_are_unicode(self): result = self._callFUT('/foo/bar') self.assertEqual(type(result[0]), text_type) @@ -42,32 +71,21 @@ class TraversalPathTests(unittest.TestCase): self.assertEqual(result1, (text_('foo'), text_('bar'))) self.assertEqual(result2, (text_('foo'), text_('bar'))) - def test_utf8(self): - la = b'La Pe\xc3\xb1a' - encoded = url_quote(la) - decoded = text_(la, 'utf-8') - path = '/'.join([encoded, encoded]) - self.assertEqual(self._callFUT(path), (decoded, decoded)) - - def test_utf16(self): - from pyramid.exceptions import URLDecodeError - la = text_(b'La Pe\xc3\xb1a', 'utf-8').encode('utf-16') - encoded = url_quote(la) - path = '/'.join([encoded, encoded]) - self.assertRaises(URLDecodeError, self._callFUT, path) - - def test_unicode_highorder_chars(self): - path = text_('/%E6%B5%81%E8%A1%8C%E8%B6%8B%E5%8A%BF') - self.assertEqual(self._callFUT(path), - (text_('\u6d41\u884c\u8d8b\u52bf', 'unicode_escape'),)) - def test_unicode_simple(self): path = text_('/abc') self.assertEqual(self._callFUT(path), (text_('abc'),)) - def test_unicode_undecodeable_to_ascii(self): - path = text_(b'/La Pe\xc3\xb1a', 'utf-8') - self.assertRaises(UnicodeEncodeError, self._callFUT, path) + def test_highorder(self): + la = b'La Pe\xc3\xb1a' + latin1 = native_(la) + result = self._callFUT(latin1) + self.assertEqual(result, (text_(la, 'utf-8'),)) + + def test_highorder_undecodeable(self): + from pyramid.exceptions import URLDecodeError + la = text_(b'La Pe\xc3\xb1a', 'utf-8') + notlatin1 = native_(la) + self.assertRaises(URLDecodeError, self._callFUT, notlatin1) class ResourceTreeTraverserTests(unittest.TestCase): def setUp(self): @@ -278,22 +296,24 @@ class ResourceTreeTraverserTests(unittest.TestCase): self.assertEqual(result['virtual_root_path'], ()) def test_non_utf8_path_segment_unicode_path_segments_fails(self): + from pyramid.exceptions import URLDecodeError foo = DummyContext() root = DummyContext(foo) policy = self._makeOne(root) segment = native_(text_(b'LaPe\xc3\xb1a', 'utf-8'), 'utf-16') environ = self._getEnviron(PATH_INFO='/%s' % segment) request = DummyRequest(environ) - self.assertRaises(UnicodeEncodeError, policy, request) + self.assertRaises(URLDecodeError, policy, request) def test_non_utf8_path_segment_settings_unicode_path_segments_fails(self): + from pyramid.exceptions import URLDecodeError foo = DummyContext() root = DummyContext(foo) policy = self._makeOne(root) segment = native_(text_(b'LaPe\xc3\xb1a', 'utf-8'), 'utf-16') environ = self._getEnviron(PATH_INFO='/%s' % segment) request = DummyRequest(environ) - self.assertRaises(UnicodeEncodeError, policy, request) + self.assertRaises(URLDecodeError, policy, request) def test_withroute_nothingfancy(self): resource = DummyContext() @@ -1049,6 +1069,13 @@ class TraverseTests(unittest.TestCase): self._callFUT(resource, '') self.assertEqual(resource.request.environ['PATH_INFO'], '') + def test_self_unicode_found(self): + resource = DummyContext() + traverser = make_traverser({'context':resource, 'view_name':''}) + self._registerTraverser(traverser) + self._callFUT(resource, text_('')) + self.assertEqual(resource.request.environ['PATH_INFO'], '') + def test_self_tuple_found(self): resource = DummyContext() traverser = make_traverser({'context':resource, 'view_name':''}) diff --git a/pyramid/tests/test_urldispatch.py b/pyramid/tests/test_urldispatch.py index 2f6182a66..be823b045 100644 --- a/pyramid/tests/test_urldispatch.py +++ b/pyramid/tests/test_urldispatch.py @@ -1,6 +1,7 @@ import unittest from pyramid import testing from pyramid.compat import text_ +from pyramid.compat import native_ class TestRoute(unittest.TestCase): def _getTargetClass(self): @@ -294,7 +295,8 @@ class TestCompileRoute(unittest.TestCase): def test_url_decode_error(self): from pyramid.exceptions import URLDecodeError matcher, generator = self._callFUT('/:foo') - self.assertRaises(URLDecodeError, matcher, '/%FF%FE%8B%00') + self.assertRaises(URLDecodeError, matcher, + native_(b'/\xff\xfe\x8b\x00')) def test_custom_regex(self): matcher, generator = self._callFUT('foo/{baz}/biz/{buz:[^/\.]+}.{bar}') @@ -364,9 +366,12 @@ class TestCompileRouteFunctional(unittest.TestCase): self.matches('zzz/{x}*traverse', '/zzz/abc/def/g', {'x':'abc', 'traverse':('def', 'g')}) self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')}) - self.matches('*traverse', '/zzz/%20abc', {'traverse':('zzz', ' abc')}) - self.matches('{x}', '/La%20Pe%C3%B1a', {'x':text_(b'La Pe\xf1a')}) - self.matches('*traverse', '/La%20Pe%C3%B1a/x', + self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')}) + #'/La%20Pe%C3%B1a' + self.matches('{x}', native_(b'/La Pe\xc3\xb1a'), + {'x':text_(b'La Pe\xf1a')}) + # '/La%20Pe%C3%B1a/x' + self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'), {'traverse':(text_(b'La Pe\xf1a'), 'x')}) self.matches('/foo/{id}.html', '/foo/bar.html', {'id':'bar'}) self.matches('/{num:[0-9]+}/*traverse', '/555/abc/def', @@ -387,10 +392,12 @@ class TestCompileRouteFunctional(unittest.TestCase): self.matches('zzz/:x*traverse', '/zzz/abc/def/g', {'x':'abc', 'traverse':('def', 'g')}) self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')}) - self.matches('*traverse', '/zzz/%20abc', {'traverse':('zzz', ' abc')}) - self.matches(':x', '/La%20Pe%C3%B1a', + self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')}) + #'/La%20Pe%C3%B1a' + self.matches(':x', native_(b'/La Pe\xc3\xb1a'), {'x':text_(b'La Pe\xf1a')}) - self.matches('*traverse', '/La%20Pe%C3%B1a/x', + # '/La%20Pe%C3%B1a/x' + self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'), {'traverse':(text_(b'La Pe\xf1a'), 'x')}) self.matches('/foo/:id.html', '/foo/bar.html', {'id':'bar'}) self.matches('/foo/:id_html', '/foo/bar_html', {'id_html':'bar_html'}) diff --git a/pyramid/traversal.py b/pyramid/traversal.py index 4d6a18b0d..49f9be55f 100644 --- a/pyramid/traversal.py +++ b/pyramid/traversal.py @@ -13,9 +13,11 @@ from pyramid.interfaces import VH_ROOT_KEY from pyramid.compat import PY3 from pyramid.compat import native_ from pyramid.compat import text_ +from pyramid.compat import bytes_ +from pyramid.compat import ascii_native_ from pyramid.compat import text_type from pyramid.compat import binary_type -from pyramid.compat import url_unquote_text +from pyramid.compat import url_unquote_native from pyramid.compat import is_nonstr_iter from pyramid.encode import url_quote from pyramid.exceptions import URLDecodeError @@ -66,6 +68,10 @@ def find_resource(resource, path): :func:`pyramid.traversal.resource_path` function generates strings which follow these rules (albeit only absolute ones). + Rules for passing *text* (Unicode) as the ``path`` argument are the same + as those for a string. In particular, the text may not have any nonascii + characters in it. + Rules for passing a *tuple* as the ``path`` argument: if the first element in the path tuple is the empty string (for example ``('', 'a', 'b', 'c')``, the path is considered absolute and the resource tree @@ -85,6 +91,8 @@ def find_resource(resource, path): be imported as :func:`pyramid.traversal.find_model`, although doing so will emit a deprecation warning. """ + if isinstance(path, text_type): + path = ascii_native_(path) D = traverse(resource, path) view_name = D['view_name'] context = D['context'] @@ -299,8 +307,7 @@ def traverse(resource, path): # step rather than later down the line as the result of calling # ``traversal_path``). - if isinstance(path, text_type): - path = native_(path, 'ascii') + path = ascii_native_(path) if path and path[0] == '/': resource = find_root(resource) @@ -410,26 +417,35 @@ def virtual_root(resource, request): urlgenerator = TraversalContextURL(resource, request) return urlgenerator.virtual_root() -@lru_cache(1000) def traversal_path(path): - """ Given a ``PATH_INFO`` string (slash-separated path segments), - return a tuple representing that path which can be used to - traverse a resource tree. - - The ``PATH_INFO`` is split on slashes, creating a list of - segments. Each segment is URL-unquoted, and subsequently decoded - into Unicode. Each segment is assumed to be encoded using the - UTF-8 encoding (or a subset, such as ASCII); a - :exc:`pyramid.exceptions.URLDecodeError` is raised if a segment - cannot be decoded. If a segment name is empty or if it is ``.``, - it is ignored. If a segment name is ``..``, the previous segment - is deleted, and the ``..`` is ignored. + """ Variant of :func:`pyramid.traversal.traversal_path_info` suitable for + decoding paths that are URL-encoded.""" + path = ascii_native_(path) + path = url_unquote_native(path, 'latin-1', 'strict') + return traversal_path_info(path) - If this function is passed a Unicode object instead of a string, - that Unicode object *must* directly encodeable to ASCII. For - example, u'/foo' will work but u'/' (a - Unicode object with characters that cannot be encoded to ascii) - will not. +@lru_cache(1000) +def traversal_path_info(path): + """ Given a ``PATH_INFO`` environ value (slash-separated path segments), + return a tuple representing that path which can be used to traverse a + resource tree. + + ``PATH_INFO`` is assumed to already be URL-decoded. It is encoded to + bytes using the Latin-1 encoding; the resulting set of bytes is + subsequently decoded to text using the UTF-8 encoding; a + :exc:`pyramid.exc.URLDecodeError` is raised if a the URL cannot be + decoded. + + The ``PATH_INFO`` is split on slashes, creating a list of segments. Each + segment subsequently decoded into Unicode. If a segment name is empty or + if it is ``.``, it is ignored. If a segment name is ``..``, the previous + segment is deleted, and the ``..`` is ignored. + + If this function is passed a Unicode object instead of a string, that + Unicode object *must* directly encodeable to ASCII. For example, u'/foo' + will work but u'/' (a Unicode object with characters + that cannot be encoded to ascii) will not. A :exc:`UnicodeError` will be + raised if the Unicode cannot be encoded directly to ASCII. Examples: @@ -478,15 +494,13 @@ def traversal_path(path): their own traversal machinery, as opposed to users writing applications in :app:`Pyramid`. """ - if isinstance(path, text_type): - path.encode('ascii') # check for asci-only-ness + try: + path = bytes_(path, 'latin-1').decode('utf-8') + except UnicodeDecodeError as e: + raise URLDecodeError(e.encoding, e.object, e.start, e.end, e.reason) path = path.strip('/') clean = [] for segment in path.split('/'): - try: - segment = url_unquote_text(segment, 'utf-8', 'strict') - except UnicodeDecodeError as e: - raise URLDecodeError(e.encoding, e.object, e.start, e.end, e.reason) if not segment or segment == '.': continue elif segment == '..': @@ -604,7 +618,7 @@ class ResourceTreeTraverser(object): subpath = matchdict.get('subpath', ()) if not is_nonstr_iter(subpath): # this is not a *subpath stararg (just a {subpath}) - subpath = traversal_path(subpath) + subpath = traversal_path_info(subpath) else: # this request did not match a route @@ -616,7 +630,7 @@ class ResourceTreeTraverser(object): if VH_ROOT_KEY in environ: vroot_path = environ[VH_ROOT_KEY] - vroot_tuple = traversal_path(vroot_path) + vroot_tuple = traversal_path_info(vroot_path) vpath = vroot_path + path vroot_idx = len(vroot_tuple) -1 else: @@ -637,7 +651,7 @@ class ResourceTreeTraverser(object): # and this hurts readability; apologies i = 0 view_selector = self.VIEW_SELECTOR - vpath_tuple = traversal_path(vpath) + vpath_tuple = traversal_path_info(vpath) for segment in vpath_tuple: if segment[:2] == view_selector: return {'context':ob, diff --git a/pyramid/urldispatch.py b/pyramid/urldispatch.py index 54b52336a..6fe49f442 100644 --- a/pyramid/urldispatch.py +++ b/pyramid/urldispatch.py @@ -4,14 +4,14 @@ from zope.interface import implementer from pyramid.interfaces import IRoutesMapper from pyramid.interfaces import IRoute -from pyramid.compat import url_unquote_text from pyramid.compat import native_ +from pyramid.compat import bytes_ from pyramid.compat import text_type from pyramid.compat import string_types from pyramid.compat import is_nonstr_iter from pyramid.compat import url_quote from pyramid.exceptions import URLDecodeError -from pyramid.traversal import traversal_path +from pyramid.traversal import traversal_path_info from pyramid.traversal import quote_path_segment _marker = object() @@ -139,11 +139,10 @@ def _compile_route(route): d = {} for k, v in m.groupdict().items(): if k == star: - d[k] = traversal_path(v) + d[k] = traversal_path_info(v) else: try: - val = url_unquote_text( - v, encoding='utf-8', errors='strict') + val = bytes_(v).decode('utf-8', 'strict') d[k] = val except UnicodeDecodeError as e: raise URLDecodeError( -- cgit v1.2.3