summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris McDonough <chrism@plope.com>2012-01-04 01:16:50 -0500
committerChris McDonough <chrism@plope.com>2012-01-04 01:16:50 -0500
commit5c43d5725f7d3266f56ec58b412f797a82078aa0 (patch)
treeab8fc6fe930f897ad772d10c42fb89f9717334de
parent52a948e65ef923340a9c172fe6d258f2832f8799 (diff)
downloadpyramid-5c43d5725f7d3266f56ec58b412f797a82078aa0.tar.gz
pyramid-5c43d5725f7d3266f56ec58b412f797a82078aa0.tar.bz2
pyramid-5c43d5725f7d3266f56ec58b412f797a82078aa0.zip
untested work
-rw-r--r--pyramid/compat.py18
-rw-r--r--pyramid/traversal.py84
2 files changed, 71 insertions, 31 deletions
diff --git a/pyramid/compat.py b/pyramid/compat.py
index 12b8c9f37..7376278ac 100644
--- a/pyramid/compat.py
+++ b/pyramid/compat.py
@@ -213,3 +213,21 @@ except ImportError: # pragma: no cover
import json
+if PY3: # pragma: no cover
+ # see PEP 3333 for why we encode WSGI PATH_INFO to latin-1 before
+ # decoding it to utf-8
+ def decode_path_info(path):
+ return path.encode('latin-1').decode('utf-8')
+else:
+ def decode_path_info(path):
+ return path.decode('utf-8')
+
+if PY3: # pragma: no cover
+ # see PEP 3333 for why we decode the path to latin-1
+ from urllib.parse import unquote_to_bytes
+ def unquote_bytes_to_wsgi(bytestring):
+ return unquote_to_bytes(bytestring).decode('latin-1')
+else:
+ from urlparse import unquote as unquote_to_bytes
+ def unquote_bytes_to_wsgi(bytestring):
+ return unquote_to_bytes(bytestring)
diff --git a/pyramid/traversal.py b/pyramid/traversal.py
index cd624fd30..3489dade0 100644
--- a/pyramid/traversal.py
+++ b/pyramid/traversal.py
@@ -16,12 +16,12 @@ from pyramid.compat import (
PY3,
native_,
text_,
- bytes_,
ascii_native_,
text_type,
binary_type,
- url_unquote_native,
is_nonstr_iter,
+ decode_path_info,
+ unquote_bytes_to_wsgi,
)
from pyramid.encode import url_quote
@@ -429,33 +429,44 @@ def virtual_root(resource, request):
def traversal_path(path):
""" Variant of :func:`pyramid.traversal.traversal_path_info` suitable for
- decoding paths that are URL-encoded."""
- path = ascii_native_(path)
- path = url_unquote_native(path, 'latin-1', 'strict')
+ decoding paths that are URL-encoded.
+
+ If this function is passed a Unicode object instead of a sequence of
+ bytes as ``path``, that Unicode object *must* directly encodeable to
+ ASCII. For example, u'/foo' will work but u'/<unprintable unicode>' (a
+ Unicode object with characters that cannot be encoded to ascii) will
+ not. A :exc:`UnicodeEncodeError` will be raised if the Unicode cannot be
+ encoded directly to ASCII.
+ """
+ # we unquote this path exactly like a PEP 3333 server would
+ if isinstance(path, text_type):
+ path = path.encode('ascii')
+ path = unquote_bytes_to_wsgi(path) # result will be a native string
return traversal_path_info(path)
-@lru_cache(1000)
def traversal_path_info(path):
- """ Given a ``PATH_INFO`` environ value (slash-separated path segments),
- return a tuple representing that path which can be used to traverse a
- resource tree.
-
- ``PATH_INFO`` is assumed to already be URL-decoded. It is encoded to
- bytes using the Latin-1 encoding; the resulting set of bytes is
- subsequently decoded to text using the UTF-8 encoding; a
- :exc:`pyramid.exc.URLDecodeError` is raised if a the URL cannot be
- decoded.
-
- The ``PATH_INFO`` is split on slashes, creating a list of segments. Each
- segment subsequently decoded into Unicode. If a segment name is empty or
- if it is ``.``, it is ignored. If a segment name is ``..``, the previous
- segment is deleted, and the ``..`` is ignored.
-
- If this function is passed a Unicode object instead of a string, that
- Unicode object *must* directly encodeable to ASCII. For example, u'/foo'
- will work but u'/<unprintable unicode>' (a Unicode object with characters
- that cannot be encoded to ascii) will not. A :exc:`UnicodeError` will be
- raised if the Unicode cannot be encoded directly to ASCII.
+ """ Given``path``, return a tuple representing that path which can be
+ used to traverse a resource tree. ``path`` is assumed to be an
+ already-URL-decoded ``str`` type as if it had come to us from an upstream
+ WSGI server as the ``PATH_INFO`` environment variable.
+
+ The ``path`` is first decoded to from its WSGI representation to Unicode;
+ it is decoded differently depending on platform:
+
+ - On Python 2, ``path`` is decoded to Unicode from bytes using the UTF-8
+ decoding directly; a :exc:`pyramid.exc.URLDecodeError` is raised if a the
+ URL cannot be decoded.
+
+ - On Python 3, as per the WSGI spec, ``path`` is first encoded to bytes
+ using the Latin-1 encoding; the resulting set of bytes is subsequently
+ decoded to text using the UTF-8 encoding; a
+ :exc:`pyramid.exc.URLDecodeError` is raised if a the URL cannot be
+ decoded.
+
+ The ``path`` is split on slashes, creating a list of segments. If a
+ segment name is empty or if it is ``.``, it is ignored. If a segment
+ name is ``..``, the previous segment is deleted, and the ``..`` is
+ ignored.
Examples:
@@ -504,9 +515,15 @@ def traversal_path_info(path):
applications in :app:`Pyramid`.
"""
try:
- path = bytes_(path, 'latin-1').decode('utf-8')
+ path = decode_path_info(path)
except UnicodeDecodeError as e:
raise URLDecodeError(e.encoding, e.object, e.start, e.end, e.reason)
+ return split_path_info(path)
+
+@lru_cache(1000)
+def split_path_info(path):
+ # suitable for splitting an already-unquoted-already-decoded path_info
+ # string
path = path.strip('/')
clean = []
for segment in path.split('/'):
@@ -622,23 +639,28 @@ class ResourceTreeTraverser(object):
path = matchdict.get('traverse', '/') or '/'
if is_nonstr_iter(path):
# this is a *traverse stararg (not a {traverse})
- path = '/'.join([quote_path_segment(x) for x in path]) or '/'
+ # routing has already decoded these elements, so we just
+ # need to join them
+ path = '/'.join(path) or '/'
subpath = matchdict.get('subpath', ())
if not is_nonstr_iter(subpath):
# this is not a *subpath stararg (just a {subpath})
- subpath = traversal_path_info(subpath)
+ # routing has already decoded this string, so we just need
+ # to split it
+ subpath = split_path_info(subpath)
else:
# this request did not match a route
subpath = ()
try:
- path = environ['PATH_INFO'] or '/'
+ path = decode_path_info(environ['PATH_INFO'] or '/')
except KeyError:
path = '/'
if VH_ROOT_KEY in environ:
- vroot_path = environ[VH_ROOT_KEY]
+ # HTTP_X_VHM_ROOT
+ vroot_path = decode_path_info(environ[VH_ROOT_KEY])
vroot_tuple = traversal_path_info(vroot_path)
vpath = vroot_path + path
vroot_idx = len(vroot_tuple) -1