From a97a702c5c3b1494028f32ffc837e35e3e8f606e Mon Sep 17 00:00:00 2001
From: Chris McDonough <chrism@agendaless.com>
Date: Fri, 6 Feb 2009 03:53:10 +0000
Subject: Revert my decision to make ``model_path`` return a tuple; it now
 still returns a string; albeit a quoted one.  An additional API
 (model_path_tuple) now also exists which can be used to get a model path as a
 tuple.

- The ``repoze.bfg.traversal.model_path`` API now returns a *quoted*
  string rather than a string represented by series of unquoted
  elements joined via ``/`` characters.  Previously it returned a
  string or unicode object representing the model path, with each
  segment name in the path joined together via ``/`` characters,
  e.g. ``/foo /bar``.  Now it returns a string, where each segment is
  a UTF-8 encoded and URL-quoted element e.g. ``/foo%20/bar``.  This
  change was (as discussed briefly on the repoze-dev maillist)
  necessary to accomodate model objects which themselves have
  ``__name__`` attributes that contain the ``/`` character.

  For people that have no models that have high-order Unicode
  ``__name__`` attributes or ``__name__`` attributes with values that
  require URL-quoting with in their model graphs, this won't cause any
  issue.  However, if you have code that currently expects
  ``model_path`` to return an unquoted string, or you have an existing
  application with data generated via the old method, and you're too
  lazy to change anything, you may wish replace the BFG-imported
  ``model_path`` in your code with this function (this is the code of
  the "old" ``model_path`` implementation)::

        from repoze.bfg.location import lineage

        def i_am_too_lazy_to_move_to_the_new_model_path(model, *elements):
            rpath = []
            for location in lineage(model):
                if location.__name__:
                    rpath.append(location.__name__)
            path = '/' + '/'.join(reversed(rpath))
            if elements:
                suffix = '/'.join(elements)
                path = '/'.join([path, suffix])
            return path

- The ``repoze.bfg.traversal.find_model`` API no longer implicitly
  converts unicode representations of a full path passed to it as a
  Unicode object into a UTF-8 string.  Callers should either use
  prequoted path strings returned by
  ``repoze.bfg.traversal.model_path``, or tuple values returned by the
  result of ``repoze.bfg.traversal.model_path_tuple`` or they should
  use the guidelines about passing a string ``path`` argument
  described in the ``find_model`` API documentation.

- Each argument contained in ``elements`` passed to
  ``repoze.bfg.traversal.model_path`` will now have any ``/``
  characters contained within quoted to ``%2F`` in the returned
  string.  Previously, ``/`` characters in elements were left unquoted
  (a bug).

- A ``repoze.bfg.traversal.model_path_tuple`` API was added.  This API
  is an alternative to ``model_path`` (which returns a string);
  ``model_path_tuple`` returns a model path as a tuple (much like
  Zope's ``getPhysicalPath``).

- A ``repoze.bfg.traversal.quote_path_segment`` API was added.  This
  API will quote an individual path segment (string or unicode
  object).  See the ``repoze.bfg.traversal`` API documentation for
  more information.
---
 repoze/bfg/url.py | 72 ++++---------------------------------------------------
 1 file changed, 4 insertions(+), 68 deletions(-)

(limited to 'repoze/bfg/url.py')

diff --git a/repoze/bfg/url.py b/repoze/bfg/url.py
index 99b4cdabf..0c6031c26 100644
--- a/repoze/bfg/url.py
+++ b/repoze/bfg/url.py
@@ -1,11 +1,13 @@
 """ Utility functions for dealing with URLs in repoze.bfg """
 
-import re
 import urllib
 
 from zope.component import queryMultiAdapter
 from repoze.bfg.interfaces import IContextURL
 
+from repoze.bfg.traversal import TraversalContextURL
+from repoze.bfg.traversal import quote_path_segment
+
 def model_url(model, request, *elements, **kw):
     """
     Generate a string representing the absolute URL of the model (or
@@ -58,7 +60,6 @@ def model_url(model, request, *elements, **kw):
     context_url = queryMultiAdapter((model, request), IContextURL)
     if context_url is None:
         # b/w compat for unit tests
-        from repoze.bfg.traversal import TraversalContextURL
         context_url = TraversalContextURL(model, request)
     model_url = context_url()
 
@@ -68,7 +69,7 @@ def model_url(model, request, *elements, **kw):
         qs = ''
 
     if elements:
-        suffix = '/'.join([_urlsegment(s) for s in elements])
+        suffix = '/'.join([quote_path_segment(s) for s in elements])
     else:
         suffix = ''
 
@@ -112,69 +113,4 @@ def urlencode(query, doseq=False):
 
     return urllib.urlencode(newquery, doseq=doseq)
 
-_segment_cache = {}
-
-def _urlsegment(s):
-    """ The bit of this code that deals with ``_segment_cache`` is an
-    optimization: we cache all the computation of URL path segments in
-    this module-scope dictionary with the original string (or unicode
-    value) as the key, so we can look it up later without needing to
-    reencode or re-url-quote it """
-    result = _segment_cache.get(s)
-    if result is None:
-        if s.__class__ is unicode: # isinstance slighly slower (~15%)
-            result = _url_quote(s.encode('utf-8'))
-        else:
-            result = _url_quote(s)
-        # we don't need a lock to mutate _segment_cache, as the below
-        # will generate exactly one Python bytecode (STORE_SUBSCR)
-        _segment_cache[s] = result
-    return result
-
-
-always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-               'abcdefghijklmnopqrstuvwxyz'
-               '0123456789' '_.-')
-_safemaps = {}
-_must_quote = {}
-
-def _url_quote(s, safe = '/'):
-    """quote('abc def') -> 'abc%20def'
-
-    Faster version of Python stdlib urllib.quote.  See
-    http://bugs.python.org/issue1285086 for more information.
-
-    Each part of a URL, e.g. the path info, the query, etc., has a
-    different set of reserved characters that must be quoted.
-
-    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
-    the following reserved characters.
-
-    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
-                  "$" | ","
-
-    Each of these characters is reserved in some component of a URL,
-    but not necessarily in all of them.
-
-    By default, the quote function is intended for quoting the path
-    section of a URL.  Thus, it will not encode '/'.  This character
-    is reserved, but in typical usage the quote function is being
-    called on a path where the existing slash characters are used as
-    reserved characters.
-    """
-    cachekey = (safe, always_safe)
-    try:
-        safe_map = _safemaps[cachekey]
-        if not _must_quote[cachekey].search(s):
-            return s
-    except KeyError:
-        safe += always_safe
-        _must_quote[cachekey] = re.compile(r'[^%s]' % safe)
-        safe_map = {}
-        for i in range(256):
-            c = chr(i)
-            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
-        _safemaps[cachekey] = safe_map
-    res = map(safe_map.__getitem__, s)
-    return ''.join(res)
 
-- 
cgit v1.2.3