aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schadt <kingdread@gmx.de>2021-06-30 14:31:59 +0200
committerDaniel Schadt <kingdread@gmx.de>2021-06-30 14:31:59 +0200
commita087209121285d924456ecdd850e84b33f80726f (patch)
tree39c6243077fdbde90d5197c9727a709bbeaf93ef
parent04292d18c8ddd394acd683662caa9f848eced9d3 (diff)
downloadCana-a087209121285d924456ecdd850e84b33f80726f.tar.gz
Cana-a087209121285d924456ecdd850e84b33f80726f.tar.bz2
Cana-a087209121285d924456ecdd850e84b33f80726f.zip
CGI: Properly relay UTF-8 encoded text
The culprit was using "pack" to go from the String (which we get from readProcess) to the ByteString (which we need to have in the response). Since "pack" just throws away all the good codepoints, we ended up with a minor case of mojibake. There are two possible solutions to this issue: 1) Properly encode the text from String to ByteString using utf8-string's function or 2) Read a ByteString from the process by using process-extras Solution 1 works, but it has the side effect of assuming the process's output encoding (based on the LOCALE). Furthermore, we cannot even be sure that the script doesn't send a completely different encoding via the charset header field. Therefore, this would introduce 2 points at which character encoding mismatches could happen, which is not something we're looking forward to. Solution 2 has the benefit of just passing through the data basically unchanged. We don't need to convert the ByteString to a textual string ourselves, so that is a big benefit. The only thing where we now have UTF-8 conversion is when giving the request URI to the CGI script, but 1) URIs should be ASCII & percent encoded anyway, so there's no big chance of failure and 2) Now we are using UTF8.fromString properly
-rw-r--r--Cana.cabal9
-rw-r--r--package.yaml3
-rw-r--r--src/Cana/Extra.hs10
3 files changed, 14 insertions, 8 deletions
diff --git a/Cana.cabal b/Cana.cabal
index d09fbf2..161633d 100644
--- a/Cana.cabal
+++ b/Cana.cabal
@@ -48,11 +48,12 @@ library
, mime-types
, network
, network-uri
- , process
+ , process-extras
, stringsearch
, text
, tls
, transformers
+ , utf8-string
, x509
, x509-store
default-language: Haskell2010
@@ -73,11 +74,12 @@ executable Cana-exe
, mime-types
, network
, network-uri
- , process
+ , process-extras
, stringsearch
, text
, tls
, transformers
+ , utf8-string
, x509
, x509-store
if flag(static)
@@ -106,11 +108,12 @@ test-suite Cana-test
, mime-types
, network
, network-uri
- , process
+ , process-extras
, stringsearch
, text
, tls
, transformers
+ , utf8-string
, x509
, x509-store
default-language: Haskell2010
diff --git a/package.yaml b/package.yaml
index 6aa5a3c..c491cc2 100644
--- a/package.yaml
+++ b/package.yaml
@@ -33,8 +33,9 @@ dependencies:
- directory
- mime-types
- text
-- process
+- process-extras
- stringsearch
+- utf8-string
library:
source-dirs: src
diff --git a/src/Cana/Extra.hs b/src/Cana/Extra.hs
index f4d9f73..b7119d6 100644
--- a/src/Cana/Extra.hs
+++ b/src/Cana/Extra.hs
@@ -25,11 +25,12 @@ import Data.Maybe
import qualified Data.Text as T
import System.FilePath
import System.Directory
-import System.Process
+import System.Process.ByteString.Lazy
import System.Exit (ExitCode(..))
import Control.Monad.IO.Class
import qualified Data.ByteString.Lazy as BSL
import Data.ByteString.Lazy.Char8 (pack)
+import qualified Data.ByteString.Lazy.UTF8 as UTF8
import Network.URI
import Network.Mime
@@ -101,8 +102,9 @@ runCGI :: FilePath -- ^ CGI script
-> [String] -- ^ Script arguments
-> Handler
runCGI script args request = do
+ let uri = UTF8.fromString . show . requestUri $ request
(scriptResult :: Either SomeException rest_) <- liftIO . try
- $ readProcessWithExitCode script args (show $ requestUri request)
+ $ readProcessWithExitCode script args uri
(code, stdout, stderr) <- except
$ mapLeft (\e -> "CGI execution error: " ++ show e) scriptResult
@@ -110,10 +112,10 @@ runCGI script args request = do
case code of
ExitFailure _ -> do
logError ("CGI script " ++ script ++ " exited with an error:")
- liftIO $ putStrLn stderr
+ liftIO $ BSL.putStr stderr
return invalidResponse
- ExitSuccess -> case parseResponse $ pack stdout of
+ ExitSuccess -> case parseResponse stdout of
Nothing -> do
logError ("CGI script " ++ script ++ " returned malformed response")
return invalidResponse