From a087209121285d924456ecdd850e84b33f80726f Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Wed, 30 Jun 2021 14:31:59 +0200 Subject: CGI: Properly relay UTF-8 encoded text The culprit was using "pack" to go from the String (which we get from readProcess) to the ByteString (which we need to have in the response). Since "pack" just throws away all the good codepoints, we ended up with a minor case of mojibake. There are two possible solutions to this issue: 1) Properly encode the text from String to ByteString using utf8-string's function or 2) Read a ByteString from the process by using process-extras Solution 1 works, but it has the side effect of assuming the process's output encoding (based on the LOCALE). Furthermore, we cannot even be sure that the script doesn't send a completely different encoding via the charset header field. Therefore, this would introduce 2 points at which character encoding mismatches could happen, which is not something we're looking forward to. Solution 2 has the benefit of just passing through the data basically unchanged. We don't need to convert the ByteString to a textual string ourselves, so that is a big benefit. The only thing where we now have UTF-8 conversion is when giving the request URI to the CGI script, but 1) URIs should be ASCII & percent encoded anyway, so there's no big chance of failure and 2) Now we are using UTF8.fromString properly --- Cana.cabal | 9 ++++++--- package.yaml | 3 ++- src/Cana/Extra.hs | 10 ++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Cana.cabal b/Cana.cabal index d09fbf2..161633d 100644 --- a/Cana.cabal +++ b/Cana.cabal @@ -48,11 +48,12 @@ library , mime-types , network , network-uri - , process + , process-extras , stringsearch , text , tls , transformers + , utf8-string , x509 , x509-store default-language: Haskell2010 @@ -73,11 +74,12 @@ executable Cana-exe , mime-types , network , network-uri - , process + , process-extras , stringsearch , text , tls , transformers + , utf8-string , x509 , x509-store if flag(static) @@ -106,11 +108,12 @@ test-suite Cana-test , mime-types , network , network-uri - , process + , process-extras , stringsearch , text , tls , transformers + , utf8-string , x509 , x509-store default-language: Haskell2010 diff --git a/package.yaml b/package.yaml index 6aa5a3c..c491cc2 100644 --- a/package.yaml +++ b/package.yaml @@ -33,8 +33,9 @@ dependencies: - directory - mime-types - text -- process +- process-extras - stringsearch +- utf8-string library: source-dirs: src diff --git a/src/Cana/Extra.hs b/src/Cana/Extra.hs index f4d9f73..b7119d6 100644 --- a/src/Cana/Extra.hs +++ b/src/Cana/Extra.hs @@ -25,11 +25,12 @@ import Data.Maybe import qualified Data.Text as T import System.FilePath import System.Directory -import System.Process +import System.Process.ByteString.Lazy import System.Exit (ExitCode(..)) import Control.Monad.IO.Class import qualified Data.ByteString.Lazy as BSL import Data.ByteString.Lazy.Char8 (pack) +import qualified Data.ByteString.Lazy.UTF8 as UTF8 import Network.URI import Network.Mime @@ -101,8 +102,9 @@ runCGI :: FilePath -- ^ CGI script -> [String] -- ^ Script arguments -> Handler runCGI script args request = do + let uri = UTF8.fromString . show . requestUri $ request (scriptResult :: Either SomeException rest_) <- liftIO . try - $ readProcessWithExitCode script args (show $ requestUri request) + $ readProcessWithExitCode script args uri (code, stdout, stderr) <- except $ mapLeft (\e -> "CGI execution error: " ++ show e) scriptResult @@ -110,10 +112,10 @@ runCGI script args request = do case code of ExitFailure _ -> do logError ("CGI script " ++ script ++ " exited with an error:") - liftIO $ putStrLn stderr + liftIO $ BSL.putStr stderr return invalidResponse - ExitSuccess -> case parseResponse $ pack stdout of + ExitSuccess -> case parseResponse stdout of Nothing -> do logError ("CGI script " ++ script ++ " returned malformed response") return invalidResponse -- cgit v1.2.3