summaryrefslogtreecommitdiff
path: root/repoze/bfg/encode.py
diff options
context:
space:
mode:
authorChris McDonough <chrism@agendaless.com>2009-09-23 10:24:42 +0000
committerChris McDonough <chrism@agendaless.com>2009-09-23 10:24:42 +0000
commiteb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5 (patch)
tree26afc97a9ddc53feca88bfac957bb376357edc6b /repoze/bfg/encode.py
parent02e1fe45e4bb2d7473296d2b8c2114680d9c75c2 (diff)
downloadpyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.tar.gz
pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.tar.bz2
pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.zip
Features
-------- - Speed up ``repoze.bfg.encode.urlencode`` (nee' ``repoze.bfg.url.urlencode``) slightly. - Speed up ``repoze.bfg.traversal.model_path`` and ``repoze.bfg.traversal.model_path_tuple`` slightly. Internal -------- - Move ``repoze.bfg.traversal._url_quote`` into ``repoze.bfg.encode`` as ``url_quote``. Backwards Incompatibilities --------------------------- - We previously had a Unicode-aware wrapper for the ``urllib.urlencode`` function named ``repoze.bfg.url.urlencode`` which delegated to the stdlib function, but which marshalled all unicode values to utf-8 strings before calling the stdlib version. A newer replacement now lives in ``repoze.bfg.encode`` (old imports will still work). The replacement does not delegate to the stdlib. The replacement diverges from the stdlib implementation and the previous ``repoze.bfg.url`` url implementation inasmuch as its ``doseq`` argument is a decoy: it always behaves in the ``doseq=True`` way (which is the only sane behavior) for speed purposes. The old import location (``repoze.bfg.url.urlencode``) still functions and has not been deprecated.
Diffstat (limited to 'repoze/bfg/encode.py')
-rw-r--r--repoze/bfg/encode.py107
1 files changed, 107 insertions, 0 deletions
diff --git a/repoze/bfg/encode.py b/repoze/bfg/encode.py
new file mode 100644
index 000000000..127c405ed
--- /dev/null
+++ b/repoze/bfg/encode.py
@@ -0,0 +1,107 @@
+import re
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def url_quote(s, safe=''):
+ """quote('abc def') -> 'abc%20def'
+
+ Faster version of Python stdlib urllib.quote which also quotes
+ the '/' character.
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ Unlike the default version of this function in the Python stdlib,
+ by default, the quote function is intended for quoting individual
+ path segments instead of an already composed path that might have
+ '/' characters in it. Thus, it *will* encode any '/' character it
+ finds in a string.
+ """
+ cachekey = (safe, always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ if not _must_quote[cachekey].search(s):
+ return s
+ except KeyError:
+ safe += always_safe
+ _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+ _safemaps[cachekey] = safe_map
+ res = map(safe_map.__getitem__, s)
+ return ''.join(res)
+
+def quote_plus(s, safe=''):
+ """ Version of stdlib quote_plus which uses faster url_quote """
+ if ' ' in s:
+ s = url_quote(s, safe + ' ')
+ return s.replace(' ', '+')
+ return url_quote(s, safe)
+
+def urlencode(query, doseq=True):
+ """
+ An alternate implementation of Python's stdlib `urllib.urlencode
+ function <http://docs.python.org/library/urllib.html>`_ which
+ accepts unicode keys and values within the ``query``
+ dict/sequence; all Unicode keys and values are first converted to
+ UTF-8 before being used to compose the query string.
+
+ The value of ``query`` must be a sequence of two-tuples
+ representing key/value pairs *or* an object (often a dictionary)
+ with an ``.items()`` method that returns a sequence of two-tuples
+ representing key/value pairs.
+
+ For minimal calling convention backwards compatibility, this
+ version of urlencode accepts *but ignores* a second argument
+ conventionally named ``doseq``. The Python stdlib version behaves
+ differently when ``doseq`` is False and when a sequence is
+ presented as one of the values. This version always behaves in
+ the ``doseq=True`` mode, no matter what the value of the second
+ argument.
+
+ See the Python stdlib documentation for ``urllib.urlencode`` for
+ more information.
+ """
+ try:
+ # presumed to be a dictionary
+ query = query.items()
+ except AttributeError:
+ pass
+
+ result = ''
+ prefix = ''
+
+ for (k, v) in query:
+ if k.__class__ is unicode:
+ k = k.encode('utf-8')
+ k = quote_plus(str(k))
+ if hasattr(v, '__iter__'):
+ for x in v:
+ if x.__class__ is unicode:
+ x = x.encode('utf-8')
+ x = quote_plus(str(x))
+ result += '%s%s=%s' % (prefix, k, x)
+ prefix = '&'
+ else:
+ if v.__class__ is unicode:
+ v = v.encode('utf-8')
+ v = quote_plus(str(v))
+ result += '%s%s=%s' % (prefix, k, v)
+ prefix = '&'
+
+ return result