diff options
| author | Chris McDonough <chrism@plope.com> | 2012-01-05 02:41:32 -0500 |
|---|---|---|
| committer | Chris McDonough <chrism@plope.com> | 2012-01-05 02:41:32 -0500 |
| commit | a511b1423334f855e996bb06714b36aa86f861e9 (patch) | |
| tree | 62e4bcabdb465b28545110e0ac02b5b3ec55a364 | |
| parent | f2ef9a3026723cabbfaeffd128d7b7a874f74002 (diff) | |
| download | pyramid-a511b1423334f855e996bb06714b36aa86f861e9.tar.gz pyramid-a511b1423334f855e996bb06714b36aa86f861e9.tar.bz2 pyramid-a511b1423334f855e996bb06714b36aa86f861e9.zip | |
fix urldispatch matching and generation to cope with various inputs
| -rw-r--r-- | docs/narr/urldispatch.rst | 87 | ||||
| -rw-r--r-- | pyramid/tests/test_urldispatch.py | 59 | ||||
| -rw-r--r-- | pyramid/urldispatch.py | 99 |
3 files changed, 200 insertions, 45 deletions
diff --git a/docs/narr/urldispatch.rst b/docs/narr/urldispatch.rst index 35613ea1b..7e485f8ae 100644 --- a/docs/narr/urldispatch.rst +++ b/docs/narr/urldispatch.rst @@ -235,7 +235,7 @@ When matching the following URL: .. code-block:: text - foo/La%20Pe%C3%B1a + http://example.com/foo/La%20Pe%C3%B1a The matchdict will look like so (the value is URL-decoded / UTF-8 decoded): @@ -243,6 +243,50 @@ The matchdict will look like so (the value is URL-decoded / UTF-8 decoded): {'bar':u'La Pe\xf1a'} +Literal strings in the path segment should represent the *decoded* value of +the ``PATH_INFO`` provided to Pyramid. You don't want to use a URL-encoded +value or a bytestring representing the literal's UTF-8 in the pattern. For +example, rather than this: + +.. code-block:: text + + /Foo%20Bar/{baz} + +You'll want to use something like this: + +.. code-block:: text + + /Foo Bar/{baz} + +For patterns that contain "high-order" characters in its literals, you'll +want to use a Unicode value as the pattern as opposed to any URL-encoded or +UTF-8-encoded value. For example, you might be tempted to use a bytestring +pattern like this: + +.. code-block:: text + + /La Pe\xc3\xb1a/{x} + +But that probably won't match as you expect it to. You'll want to use a +Unicode value as the pattern instead rather than raw bytestring escapes. You +can use a high-order Unicode value as the pattern by using `Python source +file encoding <http://www.python.org/dev/peps/pep-0263/>`_ plus the "real" +character in the Unicode pattern in the source, like so: + +.. code-block:: text + + /La Peña/{x} + +Or you can ignore source file encoding and use equivalent Unicode escape +characters in the pattern. + +.. code-block:: text + + /La Pe\xf1a/{x} + +Dynamic segment names cannot contain high-order characters, so this applies +only to literals in the pattern. + If the pattern has a ``*`` in it, the name which follows it is considered a "remainder match". A remainder match *must* come at the end of the pattern. Unlike segment replacement markers, it does not need to be preceded by a @@ -612,7 +656,6 @@ Use the :meth:`pyramid.request.Request.route_url` method to generate URLs based on route patterns. For example, if you've configured a route with the ``name`` "foo" and the ``pattern`` "{a}/{b}/{c}", you might do this. -.. ignore-next-block .. code-block:: python :linenos: @@ -620,7 +663,45 @@ based on route patterns. For example, if you've configured a route with the This would return something like the string ``http://example.com/1/2/3`` (at least if the current protocol and hostname implied ``http://example.com``). -See the :meth:`~pyramid.request.Request.route_url` API documentation for more + +To get only the *path* of a route, use the +:meth:`pyramid.request.Request.route_path` API instead of +:meth:`~pyramid.request.Request.route_url`. + +.. code-block:: python + + url = request.route_path('foo', a='1', b='2', c='3') + +This will return the string ``/1/2/3`` rather than a full URL. + +Note that URLs and paths generated by ``route_path`` and ``route_url`` are +always URL-quoted string types (which contain no non-ASCII characters). +Therefore, if you've added a route like so: + +.. code-block:: python + + config.add_route('la', u'/La Peña/{city}') + +And you later generate a URL using ``route_path`` or ``route_url`` like so: + +.. code-block:: python + + url = request.route_path('la', city=u'Québec') + +You will wind up with the path encoded to UTF-8 and URL quoted like so: + +.. code-block:: python + + /La%20Pe%C3%B1a/Qu%C3%A9bec + +.. note:: + + Generating URL-quoted URLs and paths is new as of Pyramid 1.3 (and Pyramid + 1.2 after 1.2.6). Previous versions generated unquoted URLs and paths + (which was broken). + +See the :meth:`~pyramid.request.Request.route_url` and +:meth:`~pyramid.request.Request.route_path` API documentation for more information. .. index:: diff --git a/pyramid/tests/test_urldispatch.py b/pyramid/tests/test_urldispatch.py index be823b045..856bdcb78 100644 --- a/pyramid/tests/test_urldispatch.py +++ b/pyramid/tests/test_urldispatch.py @@ -292,12 +292,6 @@ class TestCompileRoute(unittest.TestCase): self.assertEqual(matcher('foo/baz/biz/buz/bar'), None) self.assertEqual(generator({'baz':1, 'buz':2}), '/foo/1/biz/2/bar') - def test_url_decode_error(self): - from pyramid.exceptions import URLDecodeError - matcher, generator = self._callFUT('/:foo') - self.assertRaises(URLDecodeError, matcher, - native_(b'/\xff\xfe\x8b\x00')) - def test_custom_regex(self): matcher, generator = self._callFUT('foo/{baz}/biz/{buz:[^/\.]+}.{bar}') self.assertEqual(matcher('/foo/baz/biz/buz.bar'), @@ -328,7 +322,8 @@ class TestCompileRoute(unittest.TestCase): self.assertEqual(generator({'buz':2001}), '/2001') def test_custom_regex_with_embedded_squigglies3(self): - matcher, generator = self._callFUT('/{buz:(\d{2}|\d{4})-[a-zA-Z]{3,4}-\d{2}}') + matcher, generator = self._callFUT( + '/{buz:(\d{2}|\d{4})-[a-zA-Z]{3,4}-\d{2}}') self.assertEqual(matcher('/2001-Nov-15'), {'buz':'2001-Nov-15'}) self.assertEqual(matcher('/99-June-10'), {'buz':'99-June-10'}) self.assertEqual(matcher('/2-Nov-15'), None) @@ -337,6 +332,39 @@ class TestCompileRoute(unittest.TestCase): self.assertEqual(generator({'buz':'2001-Nov-15'}), '/2001-Nov-15') self.assertEqual(generator({'buz':'99-June-10'}), '/99-June-10') + def test_pattern_with_high_order_literal(self): + pattern = text_(b'/La Pe\xc3\xb1a/{x}', 'utf-8') + matcher, generator = self._callFUT(pattern) + self.assertEqual(matcher(text_(b'/La Pe\xc3\xb1a/x', 'utf-8')), + {'x':'x'}) + self.assertEqual(generator({'x':'1'}), '/La%20Pe%C3%B1a/1') + + def test_pattern_generate_with_high_order_dynamic(self): + pattern = '/{x}' + _, generator = self._callFUT(pattern) + self.assertEqual( + generator({'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}), + '/La%20Pe%C3%B1a') + + def test_docs_sample_generate(self): + # sample from urldispatch.rst + pattern = text_(b'/La Pe\xc3\xb1a/{city}', 'utf-8') + _, generator = self._callFUT(pattern) + self.assertEqual( + generator({'city':text_(b'Qu\xc3\xa9bec', 'utf-8')}), + '/La%20Pe%C3%B1a/Qu%C3%A9bec') + + def test_generate_with_mixedtype_values(self): + pattern = '/{city}/{state}' + _, generator = self._callFUT(pattern) + result = generator( + {'city': text_(b'Qu\xc3\xa9bec', 'utf-8'), + 'state': b'La Pe\xc3\xb1a'} + ) + self.assertEqual(result, '/Qu%C3%A9bec/La%20Pe%C3%B1a') + # should be a native string + self.assertEqual(type(result), str) + class TestCompileRouteFunctional(unittest.TestCase): def matches(self, pattern, path, expected): from pyramid.urldispatch import _compile_route @@ -368,11 +396,11 @@ class TestCompileRouteFunctional(unittest.TestCase): self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')}) self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')}) #'/La%20Pe%C3%B1a' - self.matches('{x}', native_(b'/La Pe\xc3\xb1a'), - {'x':text_(b'La Pe\xf1a')}) + self.matches('{x}', text_(b'/La Pe\xc3\xb1a', 'utf-8'), + {'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}) # '/La%20Pe%C3%B1a/x' - self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'), - {'traverse':(text_(b'La Pe\xf1a'), 'x')}) + self.matches('*traverse', text_(b'/La Pe\xc3\xb1a/x'), + {'traverse':(text_(b'La Pe\xc3\xb1a'), 'x')}) self.matches('/foo/{id}.html', '/foo/bar.html', {'id':'bar'}) self.matches('/{num:[0-9]+}/*traverse', '/555/abc/def', {'num':'555', 'traverse':('abc', 'def')}) @@ -394,11 +422,12 @@ class TestCompileRouteFunctional(unittest.TestCase): self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')}) self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')}) #'/La%20Pe%C3%B1a' - self.matches(':x', native_(b'/La Pe\xc3\xb1a'), - {'x':text_(b'La Pe\xf1a')}) + # pattern, path, expected + self.matches(':x', text_(b'/La Pe\xc3\xb1a', 'utf-8'), + {'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}) # '/La%20Pe%C3%B1a/x' - self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'), - {'traverse':(text_(b'La Pe\xf1a'), 'x')}) + self.matches('*traverse', text_(b'/La Pe\xc3\xb1a/x', 'utf-8'), + {'traverse':(text_(b'La Pe\xc3\xb1a', 'utf-8'), 'x')}) self.matches('/foo/:id.html', '/foo/bar.html', {'id':'bar'}) self.matches('/foo/:id_html', '/foo/bar_html', {'id_html':'bar_html'}) self.matches('zzz/:_', '/zzz/abc', {'_':'abc'}) diff --git a/pyramid/urldispatch.py b/pyramid/urldispatch.py index 73875b675..cb0e57c4d 100644 --- a/pyramid/urldispatch.py +++ b/pyramid/urldispatch.py @@ -7,10 +7,12 @@ from pyramid.interfaces import ( ) from pyramid.compat import ( + PY3, native_, - bytes_, + text_, text_type, string_types, + binary_type, is_nonstr_iter, url_quote, ) @@ -103,72 +105,115 @@ def update_pattern(matchobj): return '{%s}' % name[1:] def _compile_route(route): + # This function really wants to consume Unicode patterns natively, but if + # someone passes us a bytestring, we allow it by converting it to Unicode + # using the ASCII decoding. We decode it using ASCII because we dont + # want to accept bytestrings with high-order characters in them here as + # we have no idea what the encoding represents. + if route.__class__ is not text_type: + route = text_(route, 'ascii') + if old_route_re.search(route) and not route_re.search(route): route = old_route_re.sub(update_pattern, route) if not route.startswith('/'): route = '/' + route - star = None + remainder = None if star_at_end.search(route): - route, star = route.rsplit('*', 1) + route, remainder = route.rsplit('*', 1) pat = route_re.split(route) + + # every element in "pat" will be Unicode (regardless of whether the + # route_re regex pattern is itself Unicode or str) pat.reverse() rpat = [] gen = [] prefix = pat.pop() # invar: always at least one element (route='/'+route) - rpat.append(re.escape(prefix)) - gen.append(prefix) + + # We want to generate URL-encoded URLs, so we url-quote the prefix, being + # careful not to quote any embedded slashes. We have to replace '%' with + # '%%' afterwards, as the strings that go into "gen" are used as string + # replacement targets. + gen.append(quote_path_segment(prefix, safe='/').replace('%', '%%')) # native + rpat.append(re.escape(prefix)) # unicode while pat: - name = pat.pop() + name = pat.pop() # unicode name = name[1:-1] if ':' in name: name, reg = name.split(':') else: reg = '[^/]+' - gen.append('%%(%s)s' % name) - name = '(?P<%s>%s)' % (name, reg) + gen.append('%%(%s)s' % native_(name)) # native + name = '(?P<%s>%s)' % (name, reg) # unicode rpat.append(name) - s = pat.pop() + s = pat.pop() # unicode if s: - rpat.append(re.escape(s)) - gen.append(s) + rpat.append(re.escape(s)) # unicode + # We want to generate URL-encoded URLs, so we url-quote this + # literal in the pattern, being careful not to quote the embedded + # slashes. We have to replace '%' with '%%' afterwards, as the + # strings that go into "gen" are used as string replacement + # targets. What is appended to gen is a native string. + gen.append(quote_path_segment(s, safe='/').replace('%', '%%')) - if star: - rpat.append('(?P<%s>.*?)' % star) - gen.append('%%(%s)s' % star) + if remainder: + rpat.append('(?P<%s>.*?)' % remainder) # unicode + gen.append('%%(%s)s' % native_(remainder)) # native - pattern = ''.join(rpat) + '$' + pattern = ''.join(rpat) + '$' # unicode match = re.compile(pattern).match def matcher(path): + # This function really wants to consume Unicode patterns natively, + # but if someone passes us a bytestring, we allow it by converting it + # to Unicode using the ASCII decoding. We decode it using ASCII + # because we dont want to accept bytestrings with high-order + # characters in them here as we have no idea what the encoding + # represents. + if path.__class__ is not text_type: + path = text_(path, 'ascii') m = match(path) if m is None: - return m + return None d = {} for k, v in m.groupdict().items(): - if k == star: - d[k] = split_path_info(v) + # k and v will be Unicode 2.6.4 and lower doesnt accept unicode + # kwargs as **kw, so we explicitly cast the keys to native + # strings in case someone wants to pass the result as **kw + nk = native_(k, 'ascii') + if k == remainder: + d[nk] = split_path_info(v) else: - d[k] = v + d[nk] = v return d - gen = ''.join(gen) def generator(dict): newdict = {} for k, v in dict.items(): - if v.__class__ is text_type: - v = native_(v, 'utf-8') - if k == star and is_nonstr_iter(v): - v = '/'.join([quote_path_segment(x) for x in v]) - elif k != star: + if PY3: + if v.__class__ is binary_type: + # url_quote below needs a native string, not bytes on Py3 + v = v.decode('utf-8') + else: + if v.__class__ is text_type: + # url_quote below needs bytes, not unicode on Py2 + v = v.encode('utf-8') + if k == remainder and is_nonstr_iter(v): + v = '/'.join([quote_path_segment(x) for x in v]) # native + elif k != remainder: if v.__class__ not in string_types: v = str(v) - v = url_quote(v, safe='') + # v may be bytes (py2) or native string (py3) + v = url_quote(v, safe='') # defaults to utf8 encoding on py3 + + # at this point, the value will be a native string newdict[k] = v - return gen % newdict + + result = gen % newdict # native string result + return result return matcher, generator |
