summaryrefslogtreecommitdiff
path: root/repoze/bfg/encode.py
blob: 127c405ed1a2984e36409af3dfd6c41322eaec54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import re

always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
               'abcdefghijklmnopqrstuvwxyz'
               '0123456789' '_.-')
_safemaps = {}
_must_quote = {}

def url_quote(s, safe=''):
    """quote('abc def') -> 'abc%20def'

    Faster version of Python stdlib urllib.quote which also quotes
    the '/' character.  

    Each part of a URL, e.g. the path info, the query, etc., has a
    different set of reserved characters that must be quoted.

    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
    the following reserved characters.

    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
                  "$" | ","

    Each of these characters is reserved in some component of a URL,
    but not necessarily in all of them.

    Unlike the default version of this function in the Python stdlib,
    by default, the quote function is intended for quoting individual
    path segments instead of an already composed path that might have
    '/' characters in it.  Thus, it *will* encode any '/' character it
    finds in a string.
    """
    cachekey = (safe, always_safe)
    try:
        safe_map = _safemaps[cachekey]
        if not _must_quote[cachekey].search(s):
            return s
    except KeyError:
        safe += always_safe
        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
        safe_map = {}
        for i in range(256):
            c = chr(i)
            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
        _safemaps[cachekey] = safe_map
    res = map(safe_map.__getitem__, s)
    return ''.join(res)

def quote_plus(s, safe=''):
    """ Version of stdlib quote_plus which uses faster url_quote """
    if ' ' in s:
        s = url_quote(s, safe + ' ')
        return s.replace(' ', '+')
    return url_quote(s, safe)

def urlencode(query, doseq=True):
    """
    An alternate implementation of Python's stdlib `urllib.urlencode
    function <http://docs.python.org/library/urllib.html>`_ which
    accepts unicode keys and values within the ``query``
    dict/sequence; all Unicode keys and values are first converted to
    UTF-8 before being used to compose the query string.

    The value of ``query`` must be a sequence of two-tuples
    representing key/value pairs *or* an object (often a dictionary)
    with an ``.items()`` method that returns a sequence of two-tuples
    representing key/value pairs.

    For minimal calling convention backwards compatibility, this
    version of urlencode accepts *but ignores* a second argument
    conventionally named ``doseq``.  The Python stdlib version behaves
    differently when ``doseq`` is False and when a sequence is
    presented as one of the values.  This version always behaves in
    the ``doseq=True`` mode, no matter what the value of the second
    argument.

    See the Python stdlib documentation for ``urllib.urlencode`` for
    more information.
    """
    try:
        # presumed to be a dictionary
        query = query.items()
    except AttributeError:
        pass

    result = ''
    prefix = ''

    for (k, v) in query:
        if k.__class__ is unicode:
            k = k.encode('utf-8')
        k = quote_plus(str(k))
        if hasattr(v, '__iter__'):
            for x in v:
                if x.__class__ is unicode:
                    x = x.encode('utf-8')
                x = quote_plus(str(x))
                result += '%s%s=%s' % (prefix, k, x)
                prefix = '&'
        else:
            if v.__class__ is unicode:
                v = v.encode('utf-8')
            v = quote_plus(str(v))
            result += '%s%s=%s' % (prefix, k, v)
        prefix = '&'

    return result