diff options
| -rw-r--r-- | CHANGES.txt | 24 | ||||
| -rw-r--r-- | docs/narr/views.rst | 125 | ||||
| -rw-r--r-- | repoze/bfg/request.py | 19 | ||||
| -rw-r--r-- | repoze/bfg/tests/test_request.py | 49 |
4 files changed, 214 insertions, 3 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 8763ce038..b8b6ca64c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -37,6 +37,30 @@ Features ``http://example.com/model/url``, the generated URL will be ``http://example.com/model/url#foo``). +Backwards Incompatibilities +--------------------------- + +- The default request charset encoding is now ``utf-8``. As a result, + the request machinery will attempt to decode values from the utf-8 + encoding to Unicode automatically when they are obtained via + ``request.params``, ``request.GET``, and ``request.POST``. The + previous behavior of BFG was to return a bytestring when a value was + accessed in this manner. This change will break form handling code + in apps that rely on values from those APIs being considered + bytestrings. If you are manually decoding values from form + submissions in your application, you'll either need to change the + code that does that to expect Unicode values from + ``request.params``, ``request.GET`` and ``request.POST``, or you'll + need to explicitly reenable the previous behavior. To reenable the + previous behavior, add the following to your application's + ``configure.zcml``:: + + <subscriber for="repoze.bfg.interfaces.INewRequest" + handler="repoze.bfg.request.make_request_ascii"/> + + See also the documentation in the "Views" chapter of the BFG docs + entitled "Using Views to Handle Form Submissions (Unicode and + Character Set Issues)". 0.6.9 (2009-02-16) ================== diff --git a/docs/narr/views.rst b/docs/narr/views.rst index 3fbe8ef60..f52b0619b 100644 --- a/docs/narr/views.rst +++ b/docs/narr/views.rst @@ -576,5 +576,130 @@ these will be resolved by the static view as you would expect. <http://pythonpaste.org/modules/urlparser.html>`_ for more information about ``urlparser.StaticURLParser``. +Using Views to Handle Form Submissions (Unicode and Character Set Issues) +------------------------------------------------------------------------- + +Most web applications need to accept form submissions from web +browsers and various other clients. In :mod:`repoze.bfg`, form +submission handling logic is always part of a :term:`view`. For a +general overview of how to handle form submission data using the +:term:`WebOb` API, see `"Query and POST variables" within the WebOb +documentation +<http://pythonpaste.org/webob/reference.html#query-post-variables>`_. +:mod:`repoze.bfg` defers to WebOb for its request and response +implementations, and handling form submission data is a property of +the request implementation. Understanding WebOb's request API is the +key to understanding how to process form submission data. + +There are some defaults that you need to be aware of when trying to +handle form submission data in a :mod:`repoze.bfg` view. Because +having high-order (non-ASCII) characters in data contained within form +submissions is exceedingly common, and because the UTF-8 encoding is +the most common encoding used on the web for non-ASCII character data, +and because working and storing Unicode values is much saner than +working with an storing bytestrings, :mod:`repoze.bfg` configures the +:term:`WebOb` request machinery to attempt to decode form submission +values into Unicode automatically from the UTF-8 character set +implicitly. This implicit decoding happens when view code obtains +form field values via the :term:`WebOb` ``request.params``, +``request.GET``, or ``request.POST`` APIs. + +For example, let's assume that the following form page is served up to +a browser client, and its ``action`` points at some :mod:`repoze.bfg` +view code:: + +.. code-block: xml + + <html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + </head> + <form method="POST" action="myview"> + <div> + <input type="text" name="firstname"/> + </div> + <div> + <input type="text" name="lastname"/> + </div> + <input type="submit" value="Submit"/> + </form> + </html> + +The ``myview`` view code in the :mod:`repoze.bfg` application *must* +expect that the values returned by ``request.params`` will be of type +``unicode``, as opposed to type ``str``. The following will work to +accept a form post from the above form: +.. code-block:: python + + def myview(context, request): + firstname = request.params['firstname'] + lastname = request.params['lastname'] + +But the following ``myview`` view code *may not* work, as it tries to +decode already-decoded (``unicode``) values obtained from +``request.params``: + +.. code-block:: python + + def myview(context, request): + # the .decode('utf-8') will break below if there are any high-order + # characters in the firstname or lastname + firstname = request.params['firstname'].decode('utf-8') + lastname = request.params['lastname'].decode('utf-8') + +For implicit decoding to work reliably, you must ensure that every +form you render that posts to a :mod:`repoze.bfg` view is rendered via +a response that has a ``;charset=UTF-8`` in its ``Content-Type`` +header; or, as in the form above, with a ``meta http-equiv`` tag that +implies that the charset is UTF-8 within the HTML ``head`` of the page +containing the form. This must be done explicitly because all known +browser clients assume that they should encode form data in the +character set implied by ``Content-Type`` value of the response +containing the form when subsequently submitting that form; there is +no other generally accepted way to tell browser clients which charset +to use to encode form data. If you do not specify an encoding +explicitly, the browser client will choose to encode form data in its +default character set before submitting it. The browser client may +have a non-UTF-8 default encoding. If such a request is handled by +your view code, when the form submission data is encoded in a non-UTF8 +charset, eventually the WebOb request code accessed within your view +will throw an error when it can't decode some high-order character +encoded in another character set within form data e.g. when +``request.params['somename']`` is accessed. + +If you are using the ``webob.Response`` class to generate a response, +or if you use the ``render_template``* templating APIs, the UTF-8 +charset is set automatically as the default via the ``Content-Type`` +header. If you return a ``Content-Type`` header without an explicit +charset, a WebOb request will add a ``;charset=utf-8`` trailer to the +``Content-Type`` header value for you for response content types that +are textual (e.g. ``text/html``, ``application/xml``, etc) as it is +rendered. If you are using your own response object, you will need to +ensure you do this yourself. + +To avoid implicit form submission value decoding, so that the values +returned from ``request.params``, ``request.GET`` and ``request.POST`` +are returned as bytestrings rather than Unicode, add the following to +your application's ``configure.zcml``:: + + <subscriber for="repoze.bfg.interfaces.INewRequest" + handler="repoze.bfg.request.make_request_ascii"/> + +You can then control form post data decoding "by hand" as necessary. +For example, when this subscriber is active, the second example above +will work unconditionally as long as you ensure that your forms are +rendered in a request that has a ``;charset=utf-8`` stanza on its +``Content-Type`` header. + +.. note:: The behavior that form values are decoded from UTF-8 to + Unicode implicitly was introduced in :mod:`repoze.bfg` 0.7.0. + Previous versions of :mod:`repoze.bfg` performed no implicit + decoding of form values (the default was to treat values as + bytestrings). + +.. note:: Only the *values* of request params obtained via + ``request.params``, ``request.GET`` or ``request.POST`` are decoded + to Unicode objects implicitly by :mod:`repoze.bfg`. The keys are + still strings. diff --git a/repoze/bfg/request.py b/repoze/bfg/request.py index 8d938e974..de5711e0a 100644 --- a/repoze/bfg/request.py +++ b/repoze/bfg/request.py @@ -3,6 +3,17 @@ from webob import Request as WebobRequest import repoze.bfg.interfaces +def make_request_ascii(event): + """ An event handler that causes the request charset to be ASCII; + used as an INewRequest subscriber so code written before 0.7.0 can + continue to work without a change""" + request = event.request + request.charset = None + +class Request(WebobRequest): + implements(repoze.bfg.interfaces.IRequest) + charset = 'utf-8' + # We use 'precooked' Request subclasses that correspond to HTTP # request methods within ``router.py`` when constructing a request # object rather than using ``alsoProvides`` to attach the proper @@ -13,23 +24,25 @@ import repoze.bfg.interfaces # ``HTTP_METHOD_FACTORIES`` lookup dict should be imported directly by # user code. -class Request(WebobRequest): - implements(repoze.bfg.interfaces.IRequest) - class GETRequest(WebobRequest): implements(repoze.bfg.interfaces.IGETRequest) + charset = 'utf-8' class POSTRequest(WebobRequest): implements(repoze.bfg.interfaces.IPOSTRequest) + charset = 'utf-8' class PUTRequest(WebobRequest): implements(repoze.bfg.interfaces.IPUTRequest) + charset = 'utf-8' class DELETERequest(WebobRequest): implements(repoze.bfg.interfaces.IDELETERequest) + charset = 'utf-8' class HEADRequest(WebobRequest): implements(repoze.bfg.interfaces.IHEADRequest) + charset = 'utf-8' HTTP_METHOD_FACTORIES = { 'GET':GETRequest, diff --git a/repoze/bfg/tests/test_request.py b/repoze/bfg/tests/test_request.py new file mode 100644 index 000000000..f2124d4c9 --- /dev/null +++ b/repoze/bfg/tests/test_request.py @@ -0,0 +1,49 @@ +import unittest + +class TestMakeRequestASCII(unittest.TestCase): + def _callFUT(self, event): + from repoze.bfg.request import make_request_ascii + return make_request_ascii(event) + + def test_it(self): + request = DummyRequest() + event = DummyNewRequestEvent(request) + self._callFUT(event) + self.assertEqual(request.charset, None) + +class TestSubclassedRequest(unittest.TestCase): + def _getTargetClass(self): + from repoze.bfg.request import Request + return Request + + def _makeOne(self, environ): + request = self._getTargetClass()(environ) + return request + + def test_params_decoded_from_utf_8_by_default(self): + environ = { + 'PATH_INFO':'/', + 'QUERY_STRING':'la=La%20Pe%C3%B1a' + } + request = self._makeOne(environ) + self.assertEqual(request.GET['la'], u'La Pe\xf1a') + + def test_params_bystring_when_charset_None(self): + environ = { + 'PATH_INFO':'/', + 'QUERY_STRING':'la=La%20Pe%C3%B1a' + } + request = self._makeOne(environ) + request.charset = None + self.assertEqual(request.GET['la'], 'La Pe\xc3\xb1a') + +class DummyRequest: + pass + +class DummyNewRequestEvent: + def __init__(self, request): + self.request = request + + + + |
