diff options
| author | Chris McDonough <chrism@agendaless.com> | 2009-01-05 23:36:27 +0000 |
|---|---|---|
| committer | Chris McDonough <chrism@agendaless.com> | 2009-01-05 23:36:27 +0000 |
| commit | 7ae0c2522bbb2b026bc0370dbb1b1608f146137d (patch) | |
| tree | d46aa3e872eaf70d900af720b7458efd2ed0a189 /repoze/bfg/url.py | |
| parent | 96d8a517bf195a1ce0787e6ec16c3db82bef98f5 (diff) | |
| download | pyramid-7ae0c2522bbb2b026bc0370dbb1b1608f146137d.tar.gz pyramid-7ae0c2522bbb2b026bc0370dbb1b1608f146137d.tar.bz2 pyramid-7ae0c2522bbb2b026bc0370dbb1b1608f146137d.zip | |
New Modules
- A new module ``repoze.bfg.url`` has been added. It contains the
``model_url`` API (moved from ``repoze.bfg.traversal``) and an
implementation of ``urlencode`` (like Python's
``urllib.urlencode``) which can handle Unicode keys and values in
parameters to the ``query`` argument.
Deprecations
- The ``model_url`` function has been moved from
``repoze.bfg.traversal`` into ``repoze.bfg.url``. It can still
be imported from ``repoze.bfg.traversal`` but an import from
``repoze.bfg.traversal`` will emit a DeprecationWarning.
Features
- The ``repoze.bfg.url.model_url`` API (nee'
``repoze.bfg.traversal.model_url``) now accepts and honors a
keyword argument named ``query``. The value of this argument
will be used to compose a query string, which will be attached to
the generated URL before it is returned. See the API docs (in
the docs directory or `on the web
<http://static.repoze.org/bfgdocs>`_) for more information.
Diffstat (limited to 'repoze/bfg/url.py')
| -rw-r--r-- | repoze/bfg/url.py | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py new file mode 100644 index 000000000..4c1545de5 --- /dev/null +++ b/repoze/bfg/url.py @@ -0,0 +1,174 @@ +""" Utility functions for dealing with URLs in repoze.bfg """ + +import re +import urllib + +from repoze.bfg.location import lineage + +def model_url(model, request, *elements, **kw): + """ + Generate a string representing the absolute URL of the model + object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or + ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``. If a + ``query`` keyword argument is provided, a query string based on + its value will be composed and appended to the generated URL + string (see details below). The overall result of this function + is always a string (never unicode). The ``model`` passed in must + be :term:`location`-aware. + + .. note:: If any model in the lineage has a unicode name, it will + be converted to UTF-8 before being attached to the URL. + When composing the path based on the model lineage, + empty names in the model graph are ignored. + + Any positional arguments passed in as ``elements`` must be strings + or unicode objects. These will be joined by slashes and appended + to the generated model URL. Each of the elements passed in is + URL-quoted before being appended; if any element is unicode, it + will converted to a UTF-8 bytestring before being URL-quoted. + + .. warning:: if no ``elements`` arguments are specified, the model + URL will end with a trailing slash. If any + ``elements`` are used, the generated URL will *not* + end in trailing a slash. + + If a keyword argument ``query`` is present, it will used to + compose a query string that will be tacked on to the end of the + URL. The value of ``query`` must be a sequence of two-tuples *or* + a data structure with an ``.items()`` method that returns a + sequence of two-tuples (presumably a dictionary). This data + structure will be turned into a query string per the documentation + of ``repoze.url.urlencode`` function. After the query data is + turned into a query string, a leading ``?`` is prepended, and the + the resulting string is appended to the generated URL. + + .. note:: Python data structures that are passed as ``query`` + which are sequences or dictionaries are turned into a + string under the same rules as when run through + urllib.urlencode with the ``doseq`` argument equal to + ``True``. This means that sequences can be passed as + values, and a k=v pair will be placed into the query + string for each value. + """ + + qs = '' + if 'query' in kw: + qs = '?' + urlencode(kw['query'], doseq=True) + + rpath = [] + for location in lineage(model): + name = location.__name__ + if name: + rpath.append(_urlsegment(name)) + prefix = '/'.join(reversed(rpath)) + suffix = '/'.join([_urlsegment(s) for s in elements]) + path = '/'.join([prefix, suffix]) + if not path.startswith('/'): + path = '/' + path + app_url = request.application_url # never ends in a slash + return app_url + path + qs + +def urlencode(query, doseq=False): + """ + A wrapper around Python's stdlib `urllib.urlencode function + <http://docs.python.org/library/urllib.html>`_ which + accepts unicode keys and values within the ``query`` + dict/sequence; all keys and values are first converted to UTF-8 + before being used to compose the query string. The behavior of + the function is otherwise the same as the stdlib version. + + The value of ``query`` must be a sequence of two-tuples + representing key/value pairs *or* an object (often a dictionary) + with an ``.items()`` method that returns a sequence of two-tuples + representing key/value pairs. ``doseq`` controls what happens + when a sequence is presented as one of the values. See the Python + stdlib documentation for more information. + """ + if hasattr(query, 'items'): + # dictionary + query = query.items() + # presumed to be a sequence of two-tuples + newquery = [] + for k, v in query: + if k.__class__ is unicode: + k = k.encode('utf-8') + + if isinstance(v, (tuple, list)): + L = [] + for x in v: + if x.__class__ is unicode: + x = x.encode('utf-8') + L.append(x) + v = L + elif v.__class__ is unicode: + v = v.encode('utf-8') + newquery.append((k, v)) + + return urllib.urlencode(newquery, doseq=doseq) + +_segment_cache = {} + +def _urlsegment(s): + """ The bit of this code that deals with ``_segment_cache`` is an + optimization: we cache all the computation of URL path segments in + this module-scope dictionary with the original string (or unicode + value) as the key, so we can look it up later without needing to + reencode or re-url-quote it """ + result = _segment_cache.get(s) + if result is None: + if s.__class__ is unicode: # isinstance slighly slower (~15%) + result = _url_quote(s.encode('utf-8')) + else: + result = _url_quote(s) + # we don't need a lock to mutate _segment_cache, as the below + # will generate exactly one Python bytecode (STORE_SUBSCR) + _segment_cache[s] = result + return result + + +always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' '_.-') +_safemaps = {} +_must_quote = {} + +def _url_quote(s, safe = '/'): + """quote('abc def') -> 'abc%20def' + + Faster version of Python stdlib urllib.quote. See + http://bugs.python.org/issue1285086 for more information. + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + """ + cachekey = (safe, always_safe) + try: + safe_map = _safemaps[cachekey] + if not _must_quote[cachekey].search(s): + return s + except KeyError: + safe += always_safe + _must_quote[cachekey] = re.compile(r'[^%s]' % safe) + safe_map = {} + for i in range(256): + c = chr(i) + safe_map[c] = (c in safe) and c or ('%%%02X' % i) + _safemaps[cachekey] = safe_map + res = map(safe_map.__getitem__, s) + return ''.join(res) + |
