1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
""" Utility functions for dealing with URLs in repoze.bfg """
import re
import urllib
from repoze.bfg.location import lineage
def model_url(model, request, *elements, **kw):
"""
Generate a string representing the absolute URL of the model
object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or
``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``. If a
``query`` keyword argument is provided, a query string based on
its value will be composed and appended to the generated URL
string (see details below). The overall result of this function
is always a string (never unicode). The ``model`` passed in must
be :term:`location`-aware.
.. note:: If any model in the lineage has a unicode name, it will
be converted to UTF-8 before being attached to the URL.
When composing the path based on the model lineage,
empty names in the model graph are ignored.
Any positional arguments passed in as ``elements`` must be strings
or unicode objects. These will be joined by slashes and appended
to the generated model URL. Each of the elements passed in is
URL-quoted before being appended; if any element is unicode, it
will converted to a UTF-8 bytestring before being URL-quoted.
.. warning:: if no ``elements`` arguments are specified, the model
URL will end with a trailing slash. If any
``elements`` are used, the generated URL will *not*
end in trailing a slash.
If a keyword argument ``query`` is present, it will used to
compose a query string that will be tacked on to the end of the
URL. The value of ``query`` must be a sequence of two-tuples *or*
a data structure with an ``.items()`` method that returns a
sequence of two-tuples (presumably a dictionary). This data
structure will be turned into a query string per the documentation
of ``repoze.url.urlencode`` function. After the query data is
turned into a query string, a leading ``?`` is prepended, and the
the resulting string is appended to the generated URL.
.. note:: Python data structures that are passed as ``query``
which are sequences or dictionaries are turned into a
string under the same rules as when run through
urllib.urlencode with the ``doseq`` argument equal to
``True``. This means that sequences can be passed as
values, and a k=v pair will be placed into the query
string for each value.
"""
qs = ''
if 'query' in kw:
qs = '?' + urlencode(kw['query'], doseq=True)
rpath = []
for location in lineage(model):
name = location.__name__
if name:
rpath.append(_urlsegment(name))
prefix = '/'.join(reversed(rpath))
suffix = '/'.join([_urlsegment(s) for s in elements])
path = '/'.join([prefix, suffix])
if not path.startswith('/'):
path = '/' + path
app_url = request.application_url # never ends in a slash
return app_url + path + qs
def urlencode(query, doseq=False):
"""
A wrapper around Python's stdlib `urllib.urlencode function
<http://docs.python.org/library/urllib.html>`_ which accepts
unicode keys and values within the ``query`` dict/sequence; all
Unicode keys and values are first converted to UTF-8 before being
used to compose the query string. The behavior of the function is
otherwise the same as the stdlib version.
The value of ``query`` must be a sequence of two-tuples
representing key/value pairs *or* an object (often a dictionary)
with an ``.items()`` method that returns a sequence of two-tuples
representing key/value pairs. ``doseq`` controls what happens
when a sequence is presented as one of the values. See the Python
stdlib documentation for more information.
"""
if hasattr(query, 'items'):
# dictionary
query = query.items()
# presumed to be a sequence of two-tuples
newquery = []
for k, v in query:
if k.__class__ is unicode:
k = k.encode('utf-8')
if isinstance(v, (tuple, list)):
L = []
for x in v:
if x.__class__ is unicode:
x = x.encode('utf-8')
L.append(x)
v = L
elif v.__class__ is unicode:
v = v.encode('utf-8')
newquery.append((k, v))
return urllib.urlencode(newquery, doseq=doseq)
_segment_cache = {}
def _urlsegment(s):
""" The bit of this code that deals with ``_segment_cache`` is an
optimization: we cache all the computation of URL path segments in
this module-scope dictionary with the original string (or unicode
value) as the key, so we can look it up later without needing to
reencode or re-url-quote it """
result = _segment_cache.get(s)
if result is None:
if s.__class__ is unicode: # isinstance slighly slower (~15%)
result = _url_quote(s.encode('utf-8'))
else:
result = _url_quote(s)
# we don't need a lock to mutate _segment_cache, as the below
# will generate exactly one Python bytecode (STORE_SUBSCR)
_segment_cache[s] = result
return result
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safemaps = {}
_must_quote = {}
def _url_quote(s, safe = '/'):
"""quote('abc def') -> 'abc%20def'
Faster version of Python stdlib urllib.quote. See
http://bugs.python.org/issue1285086 for more information.
Each part of a URL, e.g. the path info, the query, etc., has a
different set of reserved characters that must be quoted.
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
the following reserved characters.
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","
Each of these characters is reserved in some component of a URL,
but not necessarily in all of them.
By default, the quote function is intended for quoting the path
section of a URL. Thus, it will not encode '/'. This character
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
"""
cachekey = (safe, always_safe)
try:
safe_map = _safemaps[cachekey]
if not _must_quote[cachekey].search(s):
return s
except KeyError:
safe += always_safe
_must_quote[cachekey] = re.compile(r'[^%s]' % safe)
safe_map = {}
for i in range(256):
c = chr(i)
safe_map[c] = (c in safe) and c or ('%%%02X' % i)
_safemaps[cachekey] = safe_map
res = map(safe_map.__getitem__, s)
return ''.join(res)
|