aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schadt <kingdread@gmx.de>2023-12-02 11:52:18 +0100
committerDaniel Schadt <kingdread@gmx.de>2023-12-02 11:52:18 +0100
commit430c17097cba8c970c2826be0e622b6cb7bb6818 (patch)
tree33f63a17c359816b176aa364c130c32257e8cc4b
downloadMiniScalp-master.tar.gz
MiniScalp-master.tar.bz2
MiniScalp-master.zip
head commitHEADmaster
-rw-r--r--.gitignore2
-rw-r--r--CHANGELOG.md11
-rw-r--r--LICENSE19
-rw-r--r--README.md61
-rw-r--r--Setup.hs2
-rw-r--r--app/Main.hs20
-rw-r--r--miniscalp.cabal84
-rw-r--r--package.yaml67
-rw-r--r--src/MiniScalp.hs56
-rw-r--r--src/MiniScalp/Predicates.hs72
-rw-r--r--src/MiniScalp/Query.hs91
-rw-r--r--src/MiniScalp/Sources.hs32
-rw-r--r--src/MiniScalp/Types.hs61
-rw-r--r--stack.yaml67
-rw-r--r--stack.yaml.lock13
-rw-r--r--test/Spec.hs2
16 files changed, 660 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c368d45
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.stack-work/
+*~ \ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..cfb5e35
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,11 @@
+# Changelog for `MiniScalp`
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to the
+[Haskell Package Versioning Policy](https://pvp.haskell.org/).
+
+## Unreleased
+
+## 0.1.0.0 - YYYY-MM-DD
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..92bf908
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright © 2023 Dunj3
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the “Software”), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9b9f866
--- /dev/null
+++ b/README.md
@@ -0,0 +1,61 @@
+# MiniScalp
+
+This is my learning experiment to build a Haskell HTML scraping interface on
+top of [`zenacy-html`](https://hackage.haskell.org/package/zenacy-html),
+inspired by [`scalpel`](https://hackage.haskell.org/package/scalpel) but with
+more correct (according to the spec) HTML parsing.
+
+Don't expect this package to be in any form complete or production ready!
+
+## Example
+
+```haskell
+module Main (main) where
+
+import Control.Monad
+import Data.Maybe
+import Data.Text
+import MiniScalp.Predicates
+import MiniScalp.Query
+import MiniScalp.Sources
+import MiniScalp.Types
+
+data MensaLine = MensaLine Text [Text] deriving (Show)
+
+mensaScraper :: Scraper [MensaLine]
+mensaScraper = chroot ("id" @= "canteen_day_1") $ do
+ chroots (tag "tr" @& hasClass "mensatype_rows") $ do
+ name <- chroot ("td" @: [hasClass "mensatype"]) text'
+ meals <- chroots ("td" @: [hasClass "menu-title"]) text'
+ return $ MensaLine name meals
+
+main :: IO ()
+main = do
+ scraped <- fromJust <$> scrapeFile "mensa.html" mensaScraper
+ forM_ scraped $ \(MensaLine name meals) -> do
+ putStrLn $ unpack name
+ forM_ meals $ \meal -> putStrLn (" " ++ unpack meal)
+ putStrLn ""
+```
+
+## Licence
+
+Copyright © 2023 Dunj3
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the “Software”), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..9a994af
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/app/Main.hs b/app/Main.hs
new file mode 100644
index 0000000..7c960b1
--- /dev/null
+++ b/app/Main.hs
@@ -0,0 +1,20 @@
+module Main (main) where
+
+import Data.Text
+import MiniScalp.Predicates
+import MiniScalp.Query
+import MiniScalp.Sources
+import MiniScalp.Types
+
+data MensaLine = MensaLine Text [Text] deriving (Show)
+
+source :: Text
+source = "<!DOCTYPE html><html><div class=\"content\"><p>Hey <span id=\"username\">Alice</span>!</p></div></html>"
+
+nameScraper :: Scraper Text
+nameScraper = chroot ("div" @: [hasClass "content"]) $ chroot ("span" @: ["id" @= "username"]) text
+
+main :: IO ()
+main = do
+ let scraped = scrapeText source nameScraper
+ print scraped
diff --git a/miniscalp.cabal b/miniscalp.cabal
new file mode 100644
index 0000000..f3372f5
--- /dev/null
+++ b/miniscalp.cabal
@@ -0,0 +1,84 @@
+cabal-version: 1.12
+
+-- This file has been generated from package.yaml by hpack version 0.35.2.
+--
+-- see: https://github.com/sol/hpack
+
+name: miniscalp
+version: 0.1.0.0
+description: Please see the README on GitLab at <https://gitlab.com/dunj3/MiniScalp/-/blob/master/README.md>
+homepage: https://gitlab.com/dunj3/MiniScalp
+author: Dunj3
+maintainer: Dunj3
+copyright: 2023 Dunj3
+license: MIT
+license-file: LICENSE
+build-type: Simple
+extra-source-files:
+ README.md
+ CHANGELOG.md
+
+library
+ exposed-modules:
+ MiniScalp
+ MiniScalp.Predicates
+ MiniScalp.Query
+ MiniScalp.Sources
+ MiniScalp.Types
+ other-modules:
+ Paths_miniscalp
+ hs-source-dirs:
+ src
+ default-extensions:
+ GeneralizedNewtypeDeriving
+ OverloadedStrings
+ ImportQualifiedPost
+ ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
+ build-depends:
+ base >=4.7 && <5
+ , mtl
+ , text
+ , transformers
+ , zenacy-html
+ default-language: Haskell2010
+
+executable MiniScalp-exe
+ main-is: Main.hs
+ other-modules:
+ Paths_miniscalp
+ hs-source-dirs:
+ app
+ default-extensions:
+ GeneralizedNewtypeDeriving
+ OverloadedStrings
+ ImportQualifiedPost
+ ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
+ build-depends:
+ base >=4.7 && <5
+ , miniscalp
+ , mtl
+ , text
+ , transformers
+ , zenacy-html
+ default-language: Haskell2010
+
+test-suite MiniScalp-test
+ type: exitcode-stdio-1.0
+ main-is: Spec.hs
+ other-modules:
+ Paths_miniscalp
+ hs-source-dirs:
+ test
+ default-extensions:
+ GeneralizedNewtypeDeriving
+ OverloadedStrings
+ ImportQualifiedPost
+ ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
+ build-depends:
+ base >=4.7 && <5
+ , miniscalp
+ , mtl
+ , text
+ , transformers
+ , zenacy-html
+ default-language: Haskell2010
diff --git a/package.yaml b/package.yaml
new file mode 100644
index 0000000..867cced
--- /dev/null
+++ b/package.yaml
@@ -0,0 +1,67 @@
+name: miniscalp
+version: 0.1.0.0
+homepage: "https://gitlab.com/dunj3/MiniScalp"
+license: MIT
+author: "Dunj3"
+copyright: "2023 Dunj3"
+
+extra-source-files:
+- README.md
+- CHANGELOG.md
+
+# Metadata used when publishing your package
+# synopsis: Short description of your package
+# category: Web
+
+# To avoid duplicated efforts in documentation and dealing with the
+# complications of embedding Haddock markup inside cabal files, it is
+# common to point users to the README.md file.
+description: Please see the README on GitLab at <https://gitlab.com/dunj3/MiniScalp/-/blob/master/README.md>
+
+dependencies:
+- base >= 4.7 && < 5
+- mtl
+- text
+- transformers
+- zenacy-html
+
+default-extensions:
+- GeneralizedNewtypeDeriving
+- OverloadedStrings
+- ImportQualifiedPost
+
+ghc-options:
+- -Wall
+- -Wcompat
+- -Widentities
+- -Wincomplete-record-updates
+- -Wincomplete-uni-patterns
+- -Wmissing-export-lists
+- -Wmissing-home-modules
+- -Wpartial-fields
+- -Wredundant-constraints
+
+library:
+ source-dirs: src
+
+executables:
+ MiniScalp-exe:
+ main: Main.hs
+ source-dirs: app
+ ghc-options:
+ - -threaded
+ - -rtsopts
+ - -with-rtsopts=-N
+ dependencies:
+ - miniscalp
+
+tests:
+ MiniScalp-test:
+ main: Spec.hs
+ source-dirs: test
+ ghc-options:
+ - -threaded
+ - -rtsopts
+ - -with-rtsopts=-N
+ dependencies:
+ - miniscalp
diff --git a/src/MiniScalp.hs b/src/MiniScalp.hs
new file mode 100644
index 0000000..f0e3e11
--- /dev/null
+++ b/src/MiniScalp.hs
@@ -0,0 +1,56 @@
+-- |
+-- Module : MiniScalp
+-- Description : Wrapper around zenacy-html for web scraping
+-- Copyright : (c) Daniel Schadt, 2023
+-- License : MIT
+-- Maintainer : sample@email.com
+-- Stability : experimental
+-- Portability : POSIX
+--
+-- MiniScalp is a wrapper around [@zenacy-html@](https://hackage.haskell.org/package/zenacy-html) to do web scraping.
+--
+-- As @zenacy-html@ implements HTML parsing according to the WHATWG standard, this should produce results equivalent to
+-- what your browser produces.
+--
+-- = DOM navigation
+--
+-- The interface of MiniScalp is similar to that of [@scalpel@](https://hackage.haskell.org/package/scalpel) in the
+-- sense that you are provided with a monadic interface to the DOM. The bread and butter function is
+-- 'MiniScalp.Query.chroot', which allows you to focus on a specific subpart of the DOM by using
+-- 'MiniScalp.Types.Predicate' functions.
+--
+-- A number of helpful predicates are defined in "MiniScalp.Predicates".
+--
+-- = Example
+--
+-- > import Control.Monad
+-- > import Data.Maybe
+-- > import Data.Text
+-- > import MiniScalp.Predicates
+-- > import MiniScalp.Query
+-- > import MiniScalp.Sources
+-- > import MiniScalp.Types
+-- >
+-- > data MensaLine = MensaLine Text [Text] deriving (Show)
+-- >
+-- > mensaScraper :: Scraper [MensaLine]
+-- > mensaScraper = chroots (tag "tr" @& hasClass "mensatype_rows") $ do
+-- > name <- chroot ("td" @: [hasClass "mensatype"]) text'
+-- > meals <- chroots ("td" @: [hasClass "menu-title"]) text'
+-- > return $ MensaLine name meals
+-- >
+-- > main :: IO ()
+-- > main = do
+-- > scraped <- fromJust <$> scrapeFile "mensa.html" mensaScraper
+-- > forM_ scraped $ \(MensaLine name meals) -> do
+-- > putStrLn $ unpack name
+-- > forM_ meals $ \meal -> putStrLn (" " ++ unpack meal)
+-- > putStrLn ""
+--
+-- = Modules
+--
+-- * "MiniScalp.Types": Basic definitions of the needed types.
+-- * "MiniScalp.Query": Data extraction routines.
+-- * "MiniScalp.Predicates": Predicates to select the wanted nodes.
+-- * "MiniScalp.Sources": Various data sources.
+module MiniScalp () where
diff --git a/src/MiniScalp/Predicates.hs b/src/MiniScalp/Predicates.hs
new file mode 100644
index 0000000..bcf0a11
--- /dev/null
+++ b/src/MiniScalp/Predicates.hs
@@ -0,0 +1,72 @@
+-- | Various predicates to match HTML nodes.
+module MiniScalp.Predicates
+ ( tag,
+ (@&),
+ (@|),
+ (@=),
+ (@/),
+ (@:),
+ hasClass,
+ )
+where
+
+import Data.List (tails)
+import Data.Text (Text)
+import MiniScalp.Types
+import Zenacy.HTML
+
+-- | Matches if both predicates match.
+--
+-- > tag "tr" @& hasClass "menu-title"
+(@&) :: Predicate -> Predicate -> Predicate
+a @& b = \s n -> a s n && b s n
+
+infixl 8 @&
+
+-- | Matches if one of the predicates matches.
+--
+-- > tag "thead" @| tag "tbody"
+(@|) :: Predicate -> Predicate -> Predicate
+a @| b = \s n -> a s n || b s n
+
+infixl 7 @|
+
+-- | Matches if the current node has the given tag.
+--
+-- > tag "p"
+tag :: Text -> Predicate
+tag t _ = htmlElemHasName t
+
+-- | Matches if the current node has the given attribute and value.
+--
+-- > "id" @= "description"
+(@=) :: Text -> Text -> Predicate
+k @= v = \_ n -> htmlElemHasAttrVal k v n
+
+infix 9 @=
+
+-- | Matches if the node has the given class.
+--
+-- > hasClass "src"
+hasClass :: Text -> Predicate
+hasClass c _ = htmlElemClassesContains c
+
+-- | Matches if the right predicate matches the current node, and the left operand matches a predecessor node.
+--
+-- > tag "p" @/ tag "img"
+(@/) :: Predicate -> Predicate -> Predicate
+a @/ b = \s n -> b s n && or (zipWith a (drop 1 $ tails s) s)
+
+infixl 1 @/
+
+-- | Shorthand to find a specific tag with the given predicates.
+--
+-- The following two are equivalent:
+--
+-- > "p" @: [hasClass "text", "id" @= "description"]
+--
+-- and
+--
+-- > tag "p" @& hasClass "text" @& "id" @= "description"
+(@:) :: Text -> [Predicate] -> Predicate
+t @: a = tag t @& \s n -> all (\p -> p s n) a
diff --git a/src/MiniScalp/Query.hs b/src/MiniScalp/Query.hs
new file mode 100644
index 0000000..07784fb
--- /dev/null
+++ b/src/MiniScalp/Query.hs
@@ -0,0 +1,91 @@
+-- | Data retrieval functions.
+module MiniScalp.Query
+ ( -- * Simple accessors
+ node,
+ parents,
+ text,
+ text',
+ attribute,
+ html,
+
+ -- * Complex navigation
+ retrieve,
+ chroots,
+ chroot,
+ )
+where
+
+import Control.Applicative (empty, optional)
+import Control.Monad (forM)
+import Control.Monad.Reader (asks, local)
+import Data.Maybe (catMaybes)
+import Data.Text (Text)
+import Data.Text qualified as T
+import MiniScalp.Types
+import Zenacy.HTML
+
+-- | Retrieves the current node.
+node :: (Monad m) => ScraperT m HTMLNode
+node = asks snd
+
+-- | Retrieves the parents of the current node.
+--
+-- Node that the first entry is the immediate parent.
+parents :: (Monad m) => ScraperT m [HTMLNode]
+parents = asks fst
+
+-- | Retrieves the text of the current node.
+text :: (Monad m) => ScraperT m Text
+text = node >>= \n -> maybe empty return $ htmlElemText n
+
+-- | Recursively retrieves the text of the current node and all children nodes.
+text' :: (Monad m) => ScraperT m Text
+text' = recurseText <$> node
+ where
+ recurseText :: HTMLNode -> Text
+ recurseText (HTMLDocument _ c) = T.concat $ map recurseText c
+ recurseText (HTMLDoctype {}) = mempty
+ recurseText (HTMLFragment _ c) = T.concat $ map recurseText c
+ recurseText (HTMLElement _ _ _ c) = T.concat $ map recurseText c
+ recurseText (HTMLTemplate {}) = mempty
+ recurseText (HTMLText t) = t
+ recurseText (HTMLComment _) = mempty
+
+-- | Retrieves the value of the attribute with the given name.
+--
+-- Fails if the attribute does not exist.
+attribute :: (Monad m) => Text -> ScraperT m Text
+attribute a = node >>= \n -> maybe empty return $ htmlElemGetAttr a n
+
+-- | Retrieves the rendered HTML of the current node.
+--
+-- Note that this may not correspond to the original source, as it is re-rendered from the DOM.
+html :: (Monad m) => ScraperT m Text
+html = htmlRender <$> node
+
+-- | Retrieves all child contexts for which the given predicate matches.
+retrieve :: (Monad m) => Predicate -> ScraperT m [ScrapeContext]
+retrieve predicate = do
+ n <- node
+ ps <- parents
+ let includeRoot = predicate ps n
+ children <- concat <$> forM (htmlNodeContent n) (\child -> local (const (n : ps, child)) (retrieve predicate))
+ return $ if includeRoot then (ps, n) : children else children
+
+-- | Finds the elements according to the predicate and then executes the given scraper in their contexts.
+--
+-- If a subscraper fails, it is silently skipped.
+chroots :: (Monad m) => Predicate -> ScraperT m a -> ScraperT m [a]
+chroots p s = do
+ es <- retrieve p
+ catMaybes <$> forM es (\ctx -> local (const ctx) $ optional s)
+
+-- | Like 'chroots', but only executes the scraper in the first context.
+--
+-- If no matching elements are found, this scraper fails.
+chroot :: (Monad m) => Predicate -> ScraperT m a -> ScraperT m a
+chroot p s = do
+ cs <- chroots p s
+ case cs of
+ a : _ -> return a
+ [] -> empty
diff --git a/src/MiniScalp/Sources.hs b/src/MiniScalp/Sources.hs
new file mode 100644
index 0000000..959b41a
--- /dev/null
+++ b/src/MiniScalp/Sources.hs
@@ -0,0 +1,32 @@
+-- | Various entry points for scrapers using different data sources.
+module MiniScalp.Sources
+ ( -- * Scraping in-memory text
+ scrapeTextT,
+ scrapeText,
+ -- * Scraping local files
+ scrapeFileT,
+ scrapeFile,
+ )
+where
+
+import Data.Functor ((<&>))
+import Data.Text (Text)
+import Data.Text.IO qualified as T
+import MiniScalp.Types
+import Zenacy.HTML (htmlParseEasy)
+
+-- | Parse and scrape the given 'Text'.
+scrapeTextT :: Text -> ScraperT m a -> m (Maybe a)
+scrapeTextT text scraper = runScraperT scraper $ htmlParseEasy text
+
+-- | Specialised version of 'scrapeTextT'.
+scrapeText :: Text -> Scraper a -> Maybe a
+scrapeText text scraper = runScraper scraper $ htmlParseEasy text
+
+-- | Read the file from the given path and scrape it.
+scrapeFileT :: FilePath -> ScraperT m a -> IO (m (Maybe a))
+scrapeFileT path scraper = T.readFile path <&> flip scrapeTextT scraper
+
+-- | Specialised version of 'scrapeFileT'.
+scrapeFile :: FilePath -> Scraper a -> IO (Maybe a)
+scrapeFile path scraper = T.readFile path <&> flip scrapeText scraper
diff --git a/src/MiniScalp/Types.hs b/src/MiniScalp/Types.hs
new file mode 100644
index 0000000..cb6e366
--- /dev/null
+++ b/src/MiniScalp/Types.hs
@@ -0,0 +1,61 @@
+-- |
+-- Type definitions for MiniScalp.
+--
+-- The main type is a 'ScraperT', which provides the monadic interface to the scraper. It is a monad transformer, so you
+-- can use it in combinations with other monads in your scrapers. If you do not need any other monads, you can use the
+-- 'Scraper'.
+module MiniScalp.Types
+ ( -- * Scraper types
+ ScraperT,
+ Scraper,
+ runScraperT,
+ runScraper,
+
+ -- * Auxiliary types
+ ScrapeContext,
+ Predicate,
+ )
+where
+
+import Control.Applicative
+import Control.Monad.Identity
+import Control.Monad.Reader
+import Control.Monad.Trans.Maybe
+import Zenacy.HTML (HTMLNode)
+
+-- | Context of the scraping operation
+--
+-- The first element represents the stack of parent nodes so we can inspect the predecessors of the current element.
+-- Note that the stack is "reversed", meaning the first element is the immediate parent.
+--
+-- The second element represents the currently focussed node.
+type ScrapeContext = ([HTMLNode], HTMLNode)
+
+-- | Predicate to match HTML elements.
+--
+-- Gets passed the current stack as well as the current node.
+type Predicate = [HTMLNode] -> HTMLNode -> Bool
+
+-- | Main Monad of the scraper machinery.
+newtype ScraperT m a = MkScraperT (ReaderT ScrapeContext (MaybeT m) a)
+ deriving (Functor, Applicative, Alternative, Monad, MonadReader ScrapeContext, MonadPlus)
+
+instance MonadTrans ScraperT where
+ lift = MkScraperT . lift . lift
+
+-- | Runs the given scraper.
+runScraperT ::
+ -- | The scraper to run.
+ ScraperT m a ->
+ -- | The initial HTML node (usually the document root).
+ HTMLNode ->
+ -- | The resulting scraped value.
+ m (Maybe a)
+runScraperT (MkScraperT s) n = runMaybeT $ runReaderT s ([], n)
+
+-- | Alias for Scrapers that don't need an additional monadic context.
+type Scraper a = ScraperT Identity a
+
+-- | Analogue to 'runScraperT'.
+runScraper :: Scraper a -> HTMLNode -> Maybe a
+runScraper s n = runIdentity $ runScraperT s n
diff --git a/stack.yaml b/stack.yaml
new file mode 100644
index 0000000..57a0789
--- /dev/null
+++ b/stack.yaml
@@ -0,0 +1,67 @@
+# This file was automatically generated by 'stack init'
+#
+# Some commonly used options have been documented as comments in this file.
+# For advanced use and comprehensive documentation of the format, please see:
+# https://docs.haskellstack.org/en/stable/yaml_configuration/
+
+# Resolver to choose a 'specific' stackage snapshot or a compiler version.
+# A snapshot resolver dictates the compiler version and the set of packages
+# to be used for project dependencies. For example:
+#
+# resolver: lts-3.5
+# resolver: nightly-2015-09-21
+# resolver: ghc-7.10.2
+#
+# The location of a snapshot can be provided as a file or url. Stack assumes
+# a snapshot provided as a file might change, whereas a url resource does not.
+#
+# resolver: ./custom-snapshot.yaml
+# resolver: https://example.com/snapshots/2018-01-01.yaml
+resolver:
+ url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/21/21.yaml
+
+# User packages to be built.
+# Various formats can be used as shown in the example below.
+#
+# packages:
+# - some-directory
+# - https://example.com/foo/bar/baz-0.0.2.tar.gz
+# subdirs:
+# - auto-update
+# - wai
+packages:
+- .
+# Dependency packages to be pulled from upstream that are not in the resolver.
+# These entries can reference officially published versions as well as
+# forks / in-progress versions pinned to a git hash. For example:
+#
+# extra-deps:
+# - acme-missiles-0.3
+# - git: https://github.com/commercialhaskell/stack.git
+# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
+#
+# extra-deps: []
+
+# Override default flag values for local packages and extra-deps
+# flags: {}
+
+# Extra package databases containing global packages
+# extra-package-dbs: []
+
+# Control whether we use the GHC we find on the path
+# system-ghc: true
+#
+# Require a specific version of stack, using version ranges
+# require-stack-version: -any # Default
+# require-stack-version: ">=2.7"
+#
+# Override the architecture used by stack, especially useful on Windows
+# arch: i386
+# arch: x86_64
+#
+# Extra directories used by stack for building
+# extra-include-dirs: [/path/to/dir]
+# extra-lib-dirs: [/path/to/dir]
+#
+# Allow a newer minor version of GHC than the snapshot specifies
+# compiler-check: newer-minor
diff --git a/stack.yaml.lock b/stack.yaml.lock
new file mode 100644
index 0000000..fb5c9ae
--- /dev/null
+++ b/stack.yaml.lock
@@ -0,0 +1,13 @@
+# This file was autogenerated by Stack.
+# You should not edit this file by hand.
+# For more information, please see the documentation at:
+# https://docs.haskellstack.org/en/stable/lock_files
+
+packages: []
+snapshots:
+- completed:
+ sha256: 7d4b649cf368f9076d8aa049aa44efe58950971d105892734e9957b2a26a2186
+ size: 640060
+ url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/21/21.yaml
+ original:
+ url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/21/21.yaml
diff --git a/test/Spec.hs b/test/Spec.hs
new file mode 100644
index 0000000..cd4753f
--- /dev/null
+++ b/test/Spec.hs
@@ -0,0 +1,2 @@
+main :: IO ()
+main = putStrLn "Test suite not yet implemented"