summaryrefslogtreecommitdiff
path: root/repoze/bfg/url.py
diff options
context:
space:
mode:
authorChris McDonough <chrism@agendaless.com>2009-01-05 23:36:27 +0000
committerChris McDonough <chrism@agendaless.com>2009-01-05 23:36:27 +0000
commit7ae0c2522bbb2b026bc0370dbb1b1608f146137d (patch)
treed46aa3e872eaf70d900af720b7458efd2ed0a189 /repoze/bfg/url.py
parent96d8a517bf195a1ce0787e6ec16c3db82bef98f5 (diff)
downloadpyramid-7ae0c2522bbb2b026bc0370dbb1b1608f146137d.tar.gz
pyramid-7ae0c2522bbb2b026bc0370dbb1b1608f146137d.tar.bz2
pyramid-7ae0c2522bbb2b026bc0370dbb1b1608f146137d.zip
New Modules
- A new module ``repoze.bfg.url`` has been added. It contains the ``model_url`` API (moved from ``repoze.bfg.traversal``) and an implementation of ``urlencode`` (like Python's ``urllib.urlencode``) which can handle Unicode keys and values in parameters to the ``query`` argument. Deprecations - The ``model_url`` function has been moved from ``repoze.bfg.traversal`` into ``repoze.bfg.url``. It can still be imported from ``repoze.bfg.traversal`` but an import from ``repoze.bfg.traversal`` will emit a DeprecationWarning. Features - The ``repoze.bfg.url.model_url`` API (nee' ``repoze.bfg.traversal.model_url``) now accepts and honors a keyword argument named ``query``. The value of this argument will be used to compose a query string, which will be attached to the generated URL before it is returned. See the API docs (in the docs directory or `on the web <http://static.repoze.org/bfgdocs>`_) for more information.
Diffstat (limited to 'repoze/bfg/url.py')
-rw-r--r--repoze/bfg/url.py174
1 files changed, 174 insertions, 0 deletions
diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py
new file mode 100644
index 000000000..4c1545de5
--- /dev/null
+++ b/repoze/bfg/url.py
@@ -0,0 +1,174 @@
+""" Utility functions for dealing with URLs in repoze.bfg """
+
+import re
+import urllib
+
+from repoze.bfg.location import lineage
+
+def model_url(model, request, *elements, **kw):
+ """
+ Generate a string representing the absolute URL of the model
+ object based on the ``wsgi.url_scheme``, ``HTTP_HOST`` or
+ ``SERVER_NAME`` in the request, plus any ``SCRIPT_NAME``. If a
+ ``query`` keyword argument is provided, a query string based on
+ its value will be composed and appended to the generated URL
+ string (see details below). The overall result of this function
+ is always a string (never unicode). The ``model`` passed in must
+ be :term:`location`-aware.
+
+ .. note:: If any model in the lineage has a unicode name, it will
+ be converted to UTF-8 before being attached to the URL.
+ When composing the path based on the model lineage,
+ empty names in the model graph are ignored.
+
+ Any positional arguments passed in as ``elements`` must be strings
+ or unicode objects. These will be joined by slashes and appended
+ to the generated model URL. Each of the elements passed in is
+ URL-quoted before being appended; if any element is unicode, it
+ will converted to a UTF-8 bytestring before being URL-quoted.
+
+ .. warning:: if no ``elements`` arguments are specified, the model
+ URL will end with a trailing slash. If any
+ ``elements`` are used, the generated URL will *not*
+ end in trailing a slash.
+
+ If a keyword argument ``query`` is present, it will used to
+ compose a query string that will be tacked on to the end of the
+ URL. The value of ``query`` must be a sequence of two-tuples *or*
+ a data structure with an ``.items()`` method that returns a
+ sequence of two-tuples (presumably a dictionary). This data
+ structure will be turned into a query string per the documentation
+ of ``repoze.url.urlencode`` function. After the query data is
+ turned into a query string, a leading ``?`` is prepended, and the
+ the resulting string is appended to the generated URL.
+
+ .. note:: Python data structures that are passed as ``query``
+ which are sequences or dictionaries are turned into a
+ string under the same rules as when run through
+ urllib.urlencode with the ``doseq`` argument equal to
+ ``True``. This means that sequences can be passed as
+ values, and a k=v pair will be placed into the query
+ string for each value.
+ """
+
+ qs = ''
+ if 'query' in kw:
+ qs = '?' + urlencode(kw['query'], doseq=True)
+
+ rpath = []
+ for location in lineage(model):
+ name = location.__name__
+ if name:
+ rpath.append(_urlsegment(name))
+ prefix = '/'.join(reversed(rpath))
+ suffix = '/'.join([_urlsegment(s) for s in elements])
+ path = '/'.join([prefix, suffix])
+ if not path.startswith('/'):
+ path = '/' + path
+ app_url = request.application_url # never ends in a slash
+ return app_url + path + qs
+
+def urlencode(query, doseq=False):
+ """
+ A wrapper around Python's stdlib `urllib.urlencode function
+ <http://docs.python.org/library/urllib.html>`_ which
+ accepts unicode keys and values within the ``query``
+ dict/sequence; all keys and values are first converted to UTF-8
+ before being used to compose the query string. The behavior of
+ the function is otherwise the same as the stdlib version.
+
+ The value of ``query`` must be a sequence of two-tuples
+ representing key/value pairs *or* an object (often a dictionary)
+ with an ``.items()`` method that returns a sequence of two-tuples
+ representing key/value pairs. ``doseq`` controls what happens
+ when a sequence is presented as one of the values. See the Python
+ stdlib documentation for more information.
+ """
+ if hasattr(query, 'items'):
+ # dictionary
+ query = query.items()
+ # presumed to be a sequence of two-tuples
+ newquery = []
+ for k, v in query:
+ if k.__class__ is unicode:
+ k = k.encode('utf-8')
+
+ if isinstance(v, (tuple, list)):
+ L = []
+ for x in v:
+ if x.__class__ is unicode:
+ x = x.encode('utf-8')
+ L.append(x)
+ v = L
+ elif v.__class__ is unicode:
+ v = v.encode('utf-8')
+ newquery.append((k, v))
+
+ return urllib.urlencode(newquery, doseq=doseq)
+
+_segment_cache = {}
+
+def _urlsegment(s):
+ """ The bit of this code that deals with ``_segment_cache`` is an
+ optimization: we cache all the computation of URL path segments in
+ this module-scope dictionary with the original string (or unicode
+ value) as the key, so we can look it up later without needing to
+ reencode or re-url-quote it """
+ result = _segment_cache.get(s)
+ if result is None:
+ if s.__class__ is unicode: # isinstance slighly slower (~15%)
+ result = _url_quote(s.encode('utf-8'))
+ else:
+ result = _url_quote(s)
+ # we don't need a lock to mutate _segment_cache, as the below
+ # will generate exactly one Python bytecode (STORE_SUBSCR)
+ _segment_cache[s] = result
+ return result
+
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '_.-')
+_safemaps = {}
+_must_quote = {}
+
+def _url_quote(s, safe = '/'):
+ """quote('abc def') -> 'abc%20def'
+
+ Faster version of Python stdlib urllib.quote. See
+ http://bugs.python.org/issue1285086 for more information.
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ By default, the quote function is intended for quoting the path
+ section of a URL. Thus, it will not encode '/'. This character
+ is reserved, but in typical usage the quote function is being
+ called on a path where the existing slash characters are used as
+ reserved characters.
+ """
+ cachekey = (safe, always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ if not _must_quote[cachekey].search(s):
+ return s
+ except KeyError:
+ safe += always_safe
+ _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+ _safemaps[cachekey] = safe_map
+ res = map(safe_map.__getitem__, s)
+ return ''.join(res)
+