From a97a702c5c3b1494028f32ffc837e35e3e8f606e Mon Sep 17 00:00:00 2001 From: Chris McDonough Date: Fri, 6 Feb 2009 03:53:10 +0000 Subject: Revert my decision to make ``model_path`` return a tuple; it now still returns a string; albeit a quoted one. An additional API (model_path_tuple) now also exists which can be used to get a model path as a tuple. - The ``repoze.bfg.traversal.model_path`` API now returns a *quoted* string rather than a string represented by series of unquoted elements joined via ``/`` characters. Previously it returned a string or unicode object representing the model path, with each segment name in the path joined together via ``/`` characters, e.g. ``/foo /bar``. Now it returns a string, where each segment is a UTF-8 encoded and URL-quoted element e.g. ``/foo%20/bar``. This change was (as discussed briefly on the repoze-dev maillist) necessary to accomodate model objects which themselves have ``__name__`` attributes that contain the ``/`` character. For people that have no models that have high-order Unicode ``__name__`` attributes or ``__name__`` attributes with values that require URL-quoting with in their model graphs, this won't cause any issue. However, if you have code that currently expects ``model_path`` to return an unquoted string, or you have an existing application with data generated via the old method, and you're too lazy to change anything, you may wish replace the BFG-imported ``model_path`` in your code with this function (this is the code of the "old" ``model_path`` implementation):: from repoze.bfg.location import lineage def i_am_too_lazy_to_move_to_the_new_model_path(model, *elements): rpath = [] for location in lineage(model): if location.__name__: rpath.append(location.__name__) path = '/' + '/'.join(reversed(rpath)) if elements: suffix = '/'.join(elements) path = '/'.join([path, suffix]) return path - The ``repoze.bfg.traversal.find_model`` API no longer implicitly converts unicode representations of a full path passed to it as a Unicode object into a UTF-8 string. Callers should either use prequoted path strings returned by ``repoze.bfg.traversal.model_path``, or tuple values returned by the result of ``repoze.bfg.traversal.model_path_tuple`` or they should use the guidelines about passing a string ``path`` argument described in the ``find_model`` API documentation. - Each argument contained in ``elements`` passed to ``repoze.bfg.traversal.model_path`` will now have any ``/`` characters contained within quoted to ``%2F`` in the returned string. Previously, ``/`` characters in elements were left unquoted (a bug). - A ``repoze.bfg.traversal.model_path_tuple`` API was added. This API is an alternative to ``model_path`` (which returns a string); ``model_path_tuple`` returns a model path as a tuple (much like Zope's ``getPhysicalPath``). - A ``repoze.bfg.traversal.quote_path_segment`` API was added. This API will quote an individual path segment (string or unicode object). See the ``repoze.bfg.traversal`` API documentation for more information. --- repoze/bfg/url.py | 72 ++++--------------------------------------------------- 1 file changed, 4 insertions(+), 68 deletions(-) (limited to 'repoze/bfg/url.py') diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py index 99b4cdabf..0c6031c26 100644 --- a/repoze/bfg/url.py +++ b/repoze/bfg/url.py @@ -1,11 +1,13 @@ """ Utility functions for dealing with URLs in repoze.bfg """ -import re import urllib from zope.component import queryMultiAdapter from repoze.bfg.interfaces import IContextURL +from repoze.bfg.traversal import TraversalContextURL +from repoze.bfg.traversal import quote_path_segment + def model_url(model, request, *elements, **kw): """ Generate a string representing the absolute URL of the model (or @@ -58,7 +60,6 @@ def model_url(model, request, *elements, **kw): context_url = queryMultiAdapter((model, request), IContextURL) if context_url is None: # b/w compat for unit tests - from repoze.bfg.traversal import TraversalContextURL context_url = TraversalContextURL(model, request) model_url = context_url() @@ -68,7 +69,7 @@ def model_url(model, request, *elements, **kw): qs = '' if elements: - suffix = '/'.join([_urlsegment(s) for s in elements]) + suffix = '/'.join([quote_path_segment(s) for s in elements]) else: suffix = '' @@ -112,69 +113,4 @@ def urlencode(query, doseq=False): return urllib.urlencode(newquery, doseq=doseq) -_segment_cache = {} - -def _urlsegment(s): - """ The bit of this code that deals with ``_segment_cache`` is an - optimization: we cache all the computation of URL path segments in - this module-scope dictionary with the original string (or unicode - value) as the key, so we can look it up later without needing to - reencode or re-url-quote it """ - result = _segment_cache.get(s) - if result is None: - if s.__class__ is unicode: # isinstance slighly slower (~15%) - result = _url_quote(s.encode('utf-8')) - else: - result = _url_quote(s) - # we don't need a lock to mutate _segment_cache, as the below - # will generate exactly one Python bytecode (STORE_SUBSCR) - _segment_cache[s] = result - return result - - -always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' - 'abcdefghijklmnopqrstuvwxyz' - '0123456789' '_.-') -_safemaps = {} -_must_quote = {} - -def _url_quote(s, safe = '/'): - """quote('abc def') -> 'abc%20def' - - Faster version of Python stdlib urllib.quote. See - http://bugs.python.org/issue1285086 for more information. - - Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. - - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists - the following reserved characters. - - reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," - - Each of these characters is reserved in some component of a URL, - but not necessarily in all of them. - - By default, the quote function is intended for quoting the path - section of a URL. Thus, it will not encode '/'. This character - is reserved, but in typical usage the quote function is being - called on a path where the existing slash characters are used as - reserved characters. - """ - cachekey = (safe, always_safe) - try: - safe_map = _safemaps[cachekey] - if not _must_quote[cachekey].search(s): - return s - except KeyError: - safe += always_safe - _must_quote[cachekey] = re.compile(r'[^%s]' % safe) - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) -- cgit v1.2.3