summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris McDonough <chrism@plope.com>2012-01-05 02:41:32 -0500
committerChris McDonough <chrism@plope.com>2012-01-05 02:41:32 -0500
commita511b1423334f855e996bb06714b36aa86f861e9 (patch)
tree62e4bcabdb465b28545110e0ac02b5b3ec55a364
parentf2ef9a3026723cabbfaeffd128d7b7a874f74002 (diff)
downloadpyramid-a511b1423334f855e996bb06714b36aa86f861e9.tar.gz
pyramid-a511b1423334f855e996bb06714b36aa86f861e9.tar.bz2
pyramid-a511b1423334f855e996bb06714b36aa86f861e9.zip
fix urldispatch matching and generation to cope with various inputs
-rw-r--r--docs/narr/urldispatch.rst87
-rw-r--r--pyramid/tests/test_urldispatch.py59
-rw-r--r--pyramid/urldispatch.py99
3 files changed, 200 insertions, 45 deletions
diff --git a/docs/narr/urldispatch.rst b/docs/narr/urldispatch.rst
index 35613ea1b..7e485f8ae 100644
--- a/docs/narr/urldispatch.rst
+++ b/docs/narr/urldispatch.rst
@@ -235,7 +235,7 @@ When matching the following URL:
.. code-block:: text
- foo/La%20Pe%C3%B1a
+ http://example.com/foo/La%20Pe%C3%B1a
The matchdict will look like so (the value is URL-decoded / UTF-8 decoded):
@@ -243,6 +243,50 @@ The matchdict will look like so (the value is URL-decoded / UTF-8 decoded):
{'bar':u'La Pe\xf1a'}
+Literal strings in the path segment should represent the *decoded* value of
+the ``PATH_INFO`` provided to Pyramid. You don't want to use a URL-encoded
+value or a bytestring representing the literal's UTF-8 in the pattern. For
+example, rather than this:
+
+.. code-block:: text
+
+ /Foo%20Bar/{baz}
+
+You'll want to use something like this:
+
+.. code-block:: text
+
+ /Foo Bar/{baz}
+
+For patterns that contain "high-order" characters in its literals, you'll
+want to use a Unicode value as the pattern as opposed to any URL-encoded or
+UTF-8-encoded value. For example, you might be tempted to use a bytestring
+pattern like this:
+
+.. code-block:: text
+
+ /La Pe\xc3\xb1a/{x}
+
+But that probably won't match as you expect it to. You'll want to use a
+Unicode value as the pattern instead rather than raw bytestring escapes. You
+can use a high-order Unicode value as the pattern by using `Python source
+file encoding <http://www.python.org/dev/peps/pep-0263/>`_ plus the "real"
+character in the Unicode pattern in the source, like so:
+
+.. code-block:: text
+
+ /La Peña/{x}
+
+Or you can ignore source file encoding and use equivalent Unicode escape
+characters in the pattern.
+
+.. code-block:: text
+
+ /La Pe\xf1a/{x}
+
+Dynamic segment names cannot contain high-order characters, so this applies
+only to literals in the pattern.
+
If the pattern has a ``*`` in it, the name which follows it is considered a
"remainder match". A remainder match *must* come at the end of the pattern.
Unlike segment replacement markers, it does not need to be preceded by a
@@ -612,7 +656,6 @@ Use the :meth:`pyramid.request.Request.route_url` method to generate URLs
based on route patterns. For example, if you've configured a route with the
``name`` "foo" and the ``pattern`` "{a}/{b}/{c}", you might do this.
-.. ignore-next-block
.. code-block:: python
:linenos:
@@ -620,7 +663,45 @@ based on route patterns. For example, if you've configured a route with the
This would return something like the string ``http://example.com/1/2/3`` (at
least if the current protocol and hostname implied ``http://example.com``).
-See the :meth:`~pyramid.request.Request.route_url` API documentation for more
+
+To get only the *path* of a route, use the
+:meth:`pyramid.request.Request.route_path` API instead of
+:meth:`~pyramid.request.Request.route_url`.
+
+.. code-block:: python
+
+ url = request.route_path('foo', a='1', b='2', c='3')
+
+This will return the string ``/1/2/3`` rather than a full URL.
+
+Note that URLs and paths generated by ``route_path`` and ``route_url`` are
+always URL-quoted string types (which contain no non-ASCII characters).
+Therefore, if you've added a route like so:
+
+.. code-block:: python
+
+ config.add_route('la', u'/La Peña/{city}')
+
+And you later generate a URL using ``route_path`` or ``route_url`` like so:
+
+.. code-block:: python
+
+ url = request.route_path('la', city=u'Québec')
+
+You will wind up with the path encoded to UTF-8 and URL quoted like so:
+
+.. code-block:: python
+
+ /La%20Pe%C3%B1a/Qu%C3%A9bec
+
+.. note::
+
+ Generating URL-quoted URLs and paths is new as of Pyramid 1.3 (and Pyramid
+ 1.2 after 1.2.6). Previous versions generated unquoted URLs and paths
+ (which was broken).
+
+See the :meth:`~pyramid.request.Request.route_url` and
+:meth:`~pyramid.request.Request.route_path` API documentation for more
information.
.. index::
diff --git a/pyramid/tests/test_urldispatch.py b/pyramid/tests/test_urldispatch.py
index be823b045..856bdcb78 100644
--- a/pyramid/tests/test_urldispatch.py
+++ b/pyramid/tests/test_urldispatch.py
@@ -292,12 +292,6 @@ class TestCompileRoute(unittest.TestCase):
self.assertEqual(matcher('foo/baz/biz/buz/bar'), None)
self.assertEqual(generator({'baz':1, 'buz':2}), '/foo/1/biz/2/bar')
- def test_url_decode_error(self):
- from pyramid.exceptions import URLDecodeError
- matcher, generator = self._callFUT('/:foo')
- self.assertRaises(URLDecodeError, matcher,
- native_(b'/\xff\xfe\x8b\x00'))
-
def test_custom_regex(self):
matcher, generator = self._callFUT('foo/{baz}/biz/{buz:[^/\.]+}.{bar}')
self.assertEqual(matcher('/foo/baz/biz/buz.bar'),
@@ -328,7 +322,8 @@ class TestCompileRoute(unittest.TestCase):
self.assertEqual(generator({'buz':2001}), '/2001')
def test_custom_regex_with_embedded_squigglies3(self):
- matcher, generator = self._callFUT('/{buz:(\d{2}|\d{4})-[a-zA-Z]{3,4}-\d{2}}')
+ matcher, generator = self._callFUT(
+ '/{buz:(\d{2}|\d{4})-[a-zA-Z]{3,4}-\d{2}}')
self.assertEqual(matcher('/2001-Nov-15'), {'buz':'2001-Nov-15'})
self.assertEqual(matcher('/99-June-10'), {'buz':'99-June-10'})
self.assertEqual(matcher('/2-Nov-15'), None)
@@ -337,6 +332,39 @@ class TestCompileRoute(unittest.TestCase):
self.assertEqual(generator({'buz':'2001-Nov-15'}), '/2001-Nov-15')
self.assertEqual(generator({'buz':'99-June-10'}), '/99-June-10')
+ def test_pattern_with_high_order_literal(self):
+ pattern = text_(b'/La Pe\xc3\xb1a/{x}', 'utf-8')
+ matcher, generator = self._callFUT(pattern)
+ self.assertEqual(matcher(text_(b'/La Pe\xc3\xb1a/x', 'utf-8')),
+ {'x':'x'})
+ self.assertEqual(generator({'x':'1'}), '/La%20Pe%C3%B1a/1')
+
+ def test_pattern_generate_with_high_order_dynamic(self):
+ pattern = '/{x}'
+ _, generator = self._callFUT(pattern)
+ self.assertEqual(
+ generator({'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}),
+ '/La%20Pe%C3%B1a')
+
+ def test_docs_sample_generate(self):
+ # sample from urldispatch.rst
+ pattern = text_(b'/La Pe\xc3\xb1a/{city}', 'utf-8')
+ _, generator = self._callFUT(pattern)
+ self.assertEqual(
+ generator({'city':text_(b'Qu\xc3\xa9bec', 'utf-8')}),
+ '/La%20Pe%C3%B1a/Qu%C3%A9bec')
+
+ def test_generate_with_mixedtype_values(self):
+ pattern = '/{city}/{state}'
+ _, generator = self._callFUT(pattern)
+ result = generator(
+ {'city': text_(b'Qu\xc3\xa9bec', 'utf-8'),
+ 'state': b'La Pe\xc3\xb1a'}
+ )
+ self.assertEqual(result, '/Qu%C3%A9bec/La%20Pe%C3%B1a')
+ # should be a native string
+ self.assertEqual(type(result), str)
+
class TestCompileRouteFunctional(unittest.TestCase):
def matches(self, pattern, path, expected):
from pyramid.urldispatch import _compile_route
@@ -368,11 +396,11 @@ class TestCompileRouteFunctional(unittest.TestCase):
self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')})
self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')})
#'/La%20Pe%C3%B1a'
- self.matches('{x}', native_(b'/La Pe\xc3\xb1a'),
- {'x':text_(b'La Pe\xf1a')})
+ self.matches('{x}', text_(b'/La Pe\xc3\xb1a', 'utf-8'),
+ {'x':text_(b'La Pe\xc3\xb1a', 'utf-8')})
# '/La%20Pe%C3%B1a/x'
- self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'),
- {'traverse':(text_(b'La Pe\xf1a'), 'x')})
+ self.matches('*traverse', text_(b'/La Pe\xc3\xb1a/x'),
+ {'traverse':(text_(b'La Pe\xc3\xb1a'), 'x')})
self.matches('/foo/{id}.html', '/foo/bar.html', {'id':'bar'})
self.matches('/{num:[0-9]+}/*traverse', '/555/abc/def',
{'num':'555', 'traverse':('abc', 'def')})
@@ -394,11 +422,12 @@ class TestCompileRouteFunctional(unittest.TestCase):
self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')})
self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')})
#'/La%20Pe%C3%B1a'
- self.matches(':x', native_(b'/La Pe\xc3\xb1a'),
- {'x':text_(b'La Pe\xf1a')})
+ # pattern, path, expected
+ self.matches(':x', text_(b'/La Pe\xc3\xb1a', 'utf-8'),
+ {'x':text_(b'La Pe\xc3\xb1a', 'utf-8')})
# '/La%20Pe%C3%B1a/x'
- self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'),
- {'traverse':(text_(b'La Pe\xf1a'), 'x')})
+ self.matches('*traverse', text_(b'/La Pe\xc3\xb1a/x', 'utf-8'),
+ {'traverse':(text_(b'La Pe\xc3\xb1a', 'utf-8'), 'x')})
self.matches('/foo/:id.html', '/foo/bar.html', {'id':'bar'})
self.matches('/foo/:id_html', '/foo/bar_html', {'id_html':'bar_html'})
self.matches('zzz/:_', '/zzz/abc', {'_':'abc'})
diff --git a/pyramid/urldispatch.py b/pyramid/urldispatch.py
index 73875b675..cb0e57c4d 100644
--- a/pyramid/urldispatch.py
+++ b/pyramid/urldispatch.py
@@ -7,10 +7,12 @@ from pyramid.interfaces import (
)
from pyramid.compat import (
+ PY3,
native_,
- bytes_,
+ text_,
text_type,
string_types,
+ binary_type,
is_nonstr_iter,
url_quote,
)
@@ -103,72 +105,115 @@ def update_pattern(matchobj):
return '{%s}' % name[1:]
def _compile_route(route):
+ # This function really wants to consume Unicode patterns natively, but if
+ # someone passes us a bytestring, we allow it by converting it to Unicode
+ # using the ASCII decoding. We decode it using ASCII because we dont
+ # want to accept bytestrings with high-order characters in them here as
+ # we have no idea what the encoding represents.
+ if route.__class__ is not text_type:
+ route = text_(route, 'ascii')
+
if old_route_re.search(route) and not route_re.search(route):
route = old_route_re.sub(update_pattern, route)
if not route.startswith('/'):
route = '/' + route
- star = None
+ remainder = None
if star_at_end.search(route):
- route, star = route.rsplit('*', 1)
+ route, remainder = route.rsplit('*', 1)
pat = route_re.split(route)
+
+ # every element in "pat" will be Unicode (regardless of whether the
+ # route_re regex pattern is itself Unicode or str)
pat.reverse()
rpat = []
gen = []
prefix = pat.pop() # invar: always at least one element (route='/'+route)
- rpat.append(re.escape(prefix))
- gen.append(prefix)
+
+ # We want to generate URL-encoded URLs, so we url-quote the prefix, being
+ # careful not to quote any embedded slashes. We have to replace '%' with
+ # '%%' afterwards, as the strings that go into "gen" are used as string
+ # replacement targets.
+ gen.append(quote_path_segment(prefix, safe='/').replace('%', '%%')) # native
+ rpat.append(re.escape(prefix)) # unicode
while pat:
- name = pat.pop()
+ name = pat.pop() # unicode
name = name[1:-1]
if ':' in name:
name, reg = name.split(':')
else:
reg = '[^/]+'
- gen.append('%%(%s)s' % name)
- name = '(?P<%s>%s)' % (name, reg)
+ gen.append('%%(%s)s' % native_(name)) # native
+ name = '(?P<%s>%s)' % (name, reg) # unicode
rpat.append(name)
- s = pat.pop()
+ s = pat.pop() # unicode
if s:
- rpat.append(re.escape(s))
- gen.append(s)
+ rpat.append(re.escape(s)) # unicode
+ # We want to generate URL-encoded URLs, so we url-quote this
+ # literal in the pattern, being careful not to quote the embedded
+ # slashes. We have to replace '%' with '%%' afterwards, as the
+ # strings that go into "gen" are used as string replacement
+ # targets. What is appended to gen is a native string.
+ gen.append(quote_path_segment(s, safe='/').replace('%', '%%'))
- if star:
- rpat.append('(?P<%s>.*?)' % star)
- gen.append('%%(%s)s' % star)
+ if remainder:
+ rpat.append('(?P<%s>.*?)' % remainder) # unicode
+ gen.append('%%(%s)s' % native_(remainder)) # native
- pattern = ''.join(rpat) + '$'
+ pattern = ''.join(rpat) + '$' # unicode
match = re.compile(pattern).match
def matcher(path):
+ # This function really wants to consume Unicode patterns natively,
+ # but if someone passes us a bytestring, we allow it by converting it
+ # to Unicode using the ASCII decoding. We decode it using ASCII
+ # because we dont want to accept bytestrings with high-order
+ # characters in them here as we have no idea what the encoding
+ # represents.
+ if path.__class__ is not text_type:
+ path = text_(path, 'ascii')
m = match(path)
if m is None:
- return m
+ return None
d = {}
for k, v in m.groupdict().items():
- if k == star:
- d[k] = split_path_info(v)
+ # k and v will be Unicode 2.6.4 and lower doesnt accept unicode
+ # kwargs as **kw, so we explicitly cast the keys to native
+ # strings in case someone wants to pass the result as **kw
+ nk = native_(k, 'ascii')
+ if k == remainder:
+ d[nk] = split_path_info(v)
else:
- d[k] = v
+ d[nk] = v
return d
-
gen = ''.join(gen)
def generator(dict):
newdict = {}
for k, v in dict.items():
- if v.__class__ is text_type:
- v = native_(v, 'utf-8')
- if k == star and is_nonstr_iter(v):
- v = '/'.join([quote_path_segment(x) for x in v])
- elif k != star:
+ if PY3:
+ if v.__class__ is binary_type:
+ # url_quote below needs a native string, not bytes on Py3
+ v = v.decode('utf-8')
+ else:
+ if v.__class__ is text_type:
+ # url_quote below needs bytes, not unicode on Py2
+ v = v.encode('utf-8')
+ if k == remainder and is_nonstr_iter(v):
+ v = '/'.join([quote_path_segment(x) for x in v]) # native
+ elif k != remainder:
if v.__class__ not in string_types:
v = str(v)
- v = url_quote(v, safe='')
+ # v may be bytes (py2) or native string (py3)
+ v = url_quote(v, safe='') # defaults to utf8 encoding on py3
+
+ # at this point, the value will be a native string
newdict[k] = v
- return gen % newdict
+
+ result = gen % newdict # native string result
+ return result
return matcher, generator