summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.txt24
-rw-r--r--docs/narr/views.rst125
-rw-r--r--repoze/bfg/request.py19
-rw-r--r--repoze/bfg/tests/test_request.py49
4 files changed, 214 insertions, 3 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 8763ce038..b8b6ca64c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -37,6 +37,30 @@ Features
``http://example.com/model/url``, the generated URL will be
``http://example.com/model/url#foo``).
+Backwards Incompatibilities
+---------------------------
+
+- The default request charset encoding is now ``utf-8``. As a result,
+ the request machinery will attempt to decode values from the utf-8
+ encoding to Unicode automatically when they are obtained via
+ ``request.params``, ``request.GET``, and ``request.POST``. The
+ previous behavior of BFG was to return a bytestring when a value was
+ accessed in this manner. This change will break form handling code
+ in apps that rely on values from those APIs being considered
+ bytestrings. If you are manually decoding values from form
+ submissions in your application, you'll either need to change the
+ code that does that to expect Unicode values from
+ ``request.params``, ``request.GET`` and ``request.POST``, or you'll
+ need to explicitly reenable the previous behavior. To reenable the
+ previous behavior, add the following to your application's
+ ``configure.zcml``::
+
+ <subscriber for="repoze.bfg.interfaces.INewRequest"
+ handler="repoze.bfg.request.make_request_ascii"/>
+
+ See also the documentation in the "Views" chapter of the BFG docs
+ entitled "Using Views to Handle Form Submissions (Unicode and
+ Character Set Issues)".
0.6.9 (2009-02-16)
==================
diff --git a/docs/narr/views.rst b/docs/narr/views.rst
index 3fbe8ef60..f52b0619b 100644
--- a/docs/narr/views.rst
+++ b/docs/narr/views.rst
@@ -576,5 +576,130 @@ these will be resolved by the static view as you would expect.
<http://pythonpaste.org/modules/urlparser.html>`_ for more
information about ``urlparser.StaticURLParser``.
+Using Views to Handle Form Submissions (Unicode and Character Set Issues)
+-------------------------------------------------------------------------
+
+Most web applications need to accept form submissions from web
+browsers and various other clients. In :mod:`repoze.bfg`, form
+submission handling logic is always part of a :term:`view`. For a
+general overview of how to handle form submission data using the
+:term:`WebOb` API, see `"Query and POST variables" within the WebOb
+documentation
+<http://pythonpaste.org/webob/reference.html#query-post-variables>`_.
+:mod:`repoze.bfg` defers to WebOb for its request and response
+implementations, and handling form submission data is a property of
+the request implementation. Understanding WebOb's request API is the
+key to understanding how to process form submission data.
+
+There are some defaults that you need to be aware of when trying to
+handle form submission data in a :mod:`repoze.bfg` view. Because
+having high-order (non-ASCII) characters in data contained within form
+submissions is exceedingly common, and because the UTF-8 encoding is
+the most common encoding used on the web for non-ASCII character data,
+and because working and storing Unicode values is much saner than
+working with an storing bytestrings, :mod:`repoze.bfg` configures the
+:term:`WebOb` request machinery to attempt to decode form submission
+values into Unicode automatically from the UTF-8 character set
+implicitly. This implicit decoding happens when view code obtains
+form field values via the :term:`WebOb` ``request.params``,
+``request.GET``, or ``request.POST`` APIs.
+
+For example, let's assume that the following form page is served up to
+a browser client, and its ``action`` points at some :mod:`repoze.bfg`
+view code::
+
+.. code-block: xml
+
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+ </head>
+ <form method="POST" action="myview">
+ <div>
+ <input type="text" name="firstname"/>
+ </div>
+ <div>
+ <input type="text" name="lastname"/>
+ </div>
+ <input type="submit" value="Submit"/>
+ </form>
+ </html>
+
+The ``myview`` view code in the :mod:`repoze.bfg` application *must*
+expect that the values returned by ``request.params`` will be of type
+``unicode``, as opposed to type ``str``. The following will work to
+accept a form post from the above form:
+.. code-block:: python
+
+ def myview(context, request):
+ firstname = request.params['firstname']
+ lastname = request.params['lastname']
+
+But the following ``myview`` view code *may not* work, as it tries to
+decode already-decoded (``unicode``) values obtained from
+``request.params``:
+
+.. code-block:: python
+
+ def myview(context, request):
+ # the .decode('utf-8') will break below if there are any high-order
+ # characters in the firstname or lastname
+ firstname = request.params['firstname'].decode('utf-8')
+ lastname = request.params['lastname'].decode('utf-8')
+
+For implicit decoding to work reliably, you must ensure that every
+form you render that posts to a :mod:`repoze.bfg` view is rendered via
+a response that has a ``;charset=UTF-8`` in its ``Content-Type``
+header; or, as in the form above, with a ``meta http-equiv`` tag that
+implies that the charset is UTF-8 within the HTML ``head`` of the page
+containing the form. This must be done explicitly because all known
+browser clients assume that they should encode form data in the
+character set implied by ``Content-Type`` value of the response
+containing the form when subsequently submitting that form; there is
+no other generally accepted way to tell browser clients which charset
+to use to encode form data. If you do not specify an encoding
+explicitly, the browser client will choose to encode form data in its
+default character set before submitting it. The browser client may
+have a non-UTF-8 default encoding. If such a request is handled by
+your view code, when the form submission data is encoded in a non-UTF8
+charset, eventually the WebOb request code accessed within your view
+will throw an error when it can't decode some high-order character
+encoded in another character set within form data e.g. when
+``request.params['somename']`` is accessed.
+
+If you are using the ``webob.Response`` class to generate a response,
+or if you use the ``render_template``* templating APIs, the UTF-8
+charset is set automatically as the default via the ``Content-Type``
+header. If you return a ``Content-Type`` header without an explicit
+charset, a WebOb request will add a ``;charset=utf-8`` trailer to the
+``Content-Type`` header value for you for response content types that
+are textual (e.g. ``text/html``, ``application/xml``, etc) as it is
+rendered. If you are using your own response object, you will need to
+ensure you do this yourself.
+
+To avoid implicit form submission value decoding, so that the values
+returned from ``request.params``, ``request.GET`` and ``request.POST``
+are returned as bytestrings rather than Unicode, add the following to
+your application's ``configure.zcml``::
+
+ <subscriber for="repoze.bfg.interfaces.INewRequest"
+ handler="repoze.bfg.request.make_request_ascii"/>
+
+You can then control form post data decoding "by hand" as necessary.
+For example, when this subscriber is active, the second example above
+will work unconditionally as long as you ensure that your forms are
+rendered in a request that has a ``;charset=utf-8`` stanza on its
+``Content-Type`` header.
+
+.. note:: The behavior that form values are decoded from UTF-8 to
+ Unicode implicitly was introduced in :mod:`repoze.bfg` 0.7.0.
+ Previous versions of :mod:`repoze.bfg` performed no implicit
+ decoding of form values (the default was to treat values as
+ bytestrings).
+
+.. note:: Only the *values* of request params obtained via
+ ``request.params``, ``request.GET`` or ``request.POST`` are decoded
+ to Unicode objects implicitly by :mod:`repoze.bfg`. The keys are
+ still strings.
diff --git a/repoze/bfg/request.py b/repoze/bfg/request.py
index 8d938e974..de5711e0a 100644
--- a/repoze/bfg/request.py
+++ b/repoze/bfg/request.py
@@ -3,6 +3,17 @@ from webob import Request as WebobRequest
import repoze.bfg.interfaces
+def make_request_ascii(event):
+ """ An event handler that causes the request charset to be ASCII;
+ used as an INewRequest subscriber so code written before 0.7.0 can
+ continue to work without a change"""
+ request = event.request
+ request.charset = None
+
+class Request(WebobRequest):
+ implements(repoze.bfg.interfaces.IRequest)
+ charset = 'utf-8'
+
# We use 'precooked' Request subclasses that correspond to HTTP
# request methods within ``router.py`` when constructing a request
# object rather than using ``alsoProvides`` to attach the proper
@@ -13,23 +24,25 @@ import repoze.bfg.interfaces
# ``HTTP_METHOD_FACTORIES`` lookup dict should be imported directly by
# user code.
-class Request(WebobRequest):
- implements(repoze.bfg.interfaces.IRequest)
-
class GETRequest(WebobRequest):
implements(repoze.bfg.interfaces.IGETRequest)
+ charset = 'utf-8'
class POSTRequest(WebobRequest):
implements(repoze.bfg.interfaces.IPOSTRequest)
+ charset = 'utf-8'
class PUTRequest(WebobRequest):
implements(repoze.bfg.interfaces.IPUTRequest)
+ charset = 'utf-8'
class DELETERequest(WebobRequest):
implements(repoze.bfg.interfaces.IDELETERequest)
+ charset = 'utf-8'
class HEADRequest(WebobRequest):
implements(repoze.bfg.interfaces.IHEADRequest)
+ charset = 'utf-8'
HTTP_METHOD_FACTORIES = {
'GET':GETRequest,
diff --git a/repoze/bfg/tests/test_request.py b/repoze/bfg/tests/test_request.py
new file mode 100644
index 000000000..f2124d4c9
--- /dev/null
+++ b/repoze/bfg/tests/test_request.py
@@ -0,0 +1,49 @@
+import unittest
+
+class TestMakeRequestASCII(unittest.TestCase):
+ def _callFUT(self, event):
+ from repoze.bfg.request import make_request_ascii
+ return make_request_ascii(event)
+
+ def test_it(self):
+ request = DummyRequest()
+ event = DummyNewRequestEvent(request)
+ self._callFUT(event)
+ self.assertEqual(request.charset, None)
+
+class TestSubclassedRequest(unittest.TestCase):
+ def _getTargetClass(self):
+ from repoze.bfg.request import Request
+ return Request
+
+ def _makeOne(self, environ):
+ request = self._getTargetClass()(environ)
+ return request
+
+ def test_params_decoded_from_utf_8_by_default(self):
+ environ = {
+ 'PATH_INFO':'/',
+ 'QUERY_STRING':'la=La%20Pe%C3%B1a'
+ }
+ request = self._makeOne(environ)
+ self.assertEqual(request.GET['la'], u'La Pe\xf1a')
+
+ def test_params_bystring_when_charset_None(self):
+ environ = {
+ 'PATH_INFO':'/',
+ 'QUERY_STRING':'la=La%20Pe%C3%B1a'
+ }
+ request = self._makeOne(environ)
+ request.charset = None
+ self.assertEqual(request.GET['la'], 'La Pe\xc3\xb1a')
+
+class DummyRequest:
+ pass
+
+class DummyNewRequestEvent:
+ def __init__(self, request):
+ self.request = request
+
+
+
+