diff options
| author | Chris McDonough <chrism@agendaless.com> | 2009-09-23 10:24:42 +0000 |
|---|---|---|
| committer | Chris McDonough <chrism@agendaless.com> | 2009-09-23 10:24:42 +0000 |
| commit | eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5 (patch) | |
| tree | 26afc97a9ddc53feca88bfac957bb376357edc6b /repoze/bfg/encode.py | |
| parent | 02e1fe45e4bb2d7473296d2b8c2114680d9c75c2 (diff) | |
| download | pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.tar.gz pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.tar.bz2 pyramid-eb9fbf5f24b5e41cadd1eac8ca970ba819ecb6a5.zip | |
Features
--------
- Speed up ``repoze.bfg.encode.urlencode`` (nee'
``repoze.bfg.url.urlencode``) slightly.
- Speed up ``repoze.bfg.traversal.model_path`` and
``repoze.bfg.traversal.model_path_tuple`` slightly.
Internal
--------
- Move ``repoze.bfg.traversal._url_quote`` into ``repoze.bfg.encode``
as ``url_quote``.
Backwards Incompatibilities
---------------------------
- We previously had a Unicode-aware wrapper for the
``urllib.urlencode`` function named ``repoze.bfg.url.urlencode``
which delegated to the stdlib function, but which marshalled all
unicode values to utf-8 strings before calling the stdlib version.
A newer replacement now lives in ``repoze.bfg.encode`` (old imports
will still work). The replacement does not delegate to the stdlib.
The replacement diverges from the stdlib implementation and the
previous ``repoze.bfg.url`` url implementation inasmuch as its
``doseq`` argument is a decoy: it always behaves in the
``doseq=True`` way (which is the only sane behavior) for speed
purposes.
The old import location (``repoze.bfg.url.urlencode``) still
functions and has not been deprecated.
Diffstat (limited to 'repoze/bfg/encode.py')
| -rw-r--r-- | repoze/bfg/encode.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/repoze/bfg/encode.py b/repoze/bfg/encode.py new file mode 100644 index 000000000..127c405ed --- /dev/null +++ b/repoze/bfg/encode.py @@ -0,0 +1,107 @@ +import re + +always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' '_.-') +_safemaps = {} +_must_quote = {} + +def url_quote(s, safe=''): + """quote('abc def') -> 'abc%20def' + + Faster version of Python stdlib urllib.quote which also quotes + the '/' character. + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + Unlike the default version of this function in the Python stdlib, + by default, the quote function is intended for quoting individual + path segments instead of an already composed path that might have + '/' characters in it. Thus, it *will* encode any '/' character it + finds in a string. + """ + cachekey = (safe, always_safe) + try: + safe_map = _safemaps[cachekey] + if not _must_quote[cachekey].search(s): + return s + except KeyError: + safe += always_safe + _must_quote[cachekey] = re.compile(r'[^%s]' % safe) + safe_map = {} + for i in range(256): + c = chr(i) + safe_map[c] = (c in safe) and c or ('%%%02X' % i) + _safemaps[cachekey] = safe_map + res = map(safe_map.__getitem__, s) + return ''.join(res) + +def quote_plus(s, safe=''): + """ Version of stdlib quote_plus which uses faster url_quote """ + if ' ' in s: + s = url_quote(s, safe + ' ') + return s.replace(' ', '+') + return url_quote(s, safe) + +def urlencode(query, doseq=True): + """ + An alternate implementation of Python's stdlib `urllib.urlencode + function <http://docs.python.org/library/urllib.html>`_ which + accepts unicode keys and values within the ``query`` + dict/sequence; all Unicode keys and values are first converted to + UTF-8 before being used to compose the query string. + + The value of ``query`` must be a sequence of two-tuples + representing key/value pairs *or* an object (often a dictionary) + with an ``.items()`` method that returns a sequence of two-tuples + representing key/value pairs. + + For minimal calling convention backwards compatibility, this + version of urlencode accepts *but ignores* a second argument + conventionally named ``doseq``. The Python stdlib version behaves + differently when ``doseq`` is False and when a sequence is + presented as one of the values. This version always behaves in + the ``doseq=True`` mode, no matter what the value of the second + argument. + + See the Python stdlib documentation for ``urllib.urlencode`` for + more information. + """ + try: + # presumed to be a dictionary + query = query.items() + except AttributeError: + pass + + result = '' + prefix = '' + + for (k, v) in query: + if k.__class__ is unicode: + k = k.encode('utf-8') + k = quote_plus(str(k)) + if hasattr(v, '__iter__'): + for x in v: + if x.__class__ is unicode: + x = x.encode('utf-8') + x = quote_plus(str(x)) + result += '%s%s=%s' % (prefix, k, x) + prefix = '&' + else: + if v.__class__ is unicode: + v = v.encode('utf-8') + v = quote_plus(str(v)) + result += '%s%s=%s' % (prefix, k, v) + prefix = '&' + + return result |
