From 7871c99a5ef791a5ce24c4b1d016a8b4200baf34 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Thu, 19 Aug 2021 13:10:43 +0200
Subject: Add an internal Document representation

Doing everything on strings is kinda wonky, so this adds an intermediate
representation. The idea behind this is that the pipeline now goes

    Wikicode [1]-> Document [2]-> Output String

Where step 1 takes care of templates and everything, and step 2 does the
actual output formatting. This has the benefit that we can support
multiple output types, some with more and some with less features (e.g.,
adding a Markdown output which keeps some of the original formatting
intact), and it has the benefit of being less wonky (no hacks with
"<!NUM!>" for numbered lists, more streamlined formatting with newlines,
...).
---
 wikimini/__init__.py            | 185 ++++++++---------
 wikimini/document.py            | 438 ++++++++++++++++++++++++++++++++++++++++
 wikimini/templates/cite.py      |   3 +-
 wikimini/templates/convert.py   |   9 +-
 wikimini/templates/language.py  |  21 +-
 wikimini/templates/mainlinks.py |   5 +-
 wikimini/templates/quotes.py    |  11 +-
 wikimini/templates/various.py   |   5 +-
 8 files changed, 555 insertions(+), 122 deletions(-)
 create mode 100644 wikimini/document.py

diff --git a/wikimini/__init__.py b/wikimini/__init__.py
index b93ef81..fac0c84 100644
--- a/wikimini/__init__.py
+++ b/wikimini/__init__.py
@@ -4,8 +4,9 @@ import re
 
 from tabulate import tabulate
 
-from typing import Union, Tuple
+from typing import List, Union, Tuple
 
+from .document import *
 
 #: The default API URL, pointing to the english Wikipedia.
 API_URL = "https://en.wikipedia.org/w/api.php"
@@ -77,66 +78,71 @@ class Wikimini:
         text = revision["slots"]["main"]["content"]
         return (title, mwp.parse(text))
 
-    def _convert(self, obj):
-        """Function that does the actual conversion.
+    def convert(
+            self,
+            obj: Union[mwp.wikicode.Wikicode, mwp.nodes.Node],
+        ) -> Union[Document, List[Node], List[Block]]:
+        """Function that converts and renders a node.
 
-        This is called recursively on each node, and should perform the correct
-        conversion - based on the node type.
+        This function is exposed for template implementors, for normal usage,
+        see :meth:`convert_to_document`.
+
+        The input and output of this function is as follows:
+
+        * If ``obj`` is a :class:`~mwparserfromhell.wikicode.Wikicode`, then
+          :meth:`convert` will return a :class:`document.Document`.
+        * If ``obj`` is a :class:`~mwparserfromhell.nodes.Node`, then
+          :meth:`convert` will return either a list of :class:`document.Node`
+          or a list of :class:`document.Block`, depending on whether the
+          converted object is inline (like a link), or a block object (like a
+          quote).
+
+        Note that in the last case, the empty list ``[]`` might be returned,
+        indicating that the object should not be included in the output.
+
+        Args:
+            obj: The object to convert.
+
+        Returns:
+            The converted object.
         """
         default = lambda obj:\
-            mwp.wikicode.Wikicode([obj]).strip_code(collapse=False)
+            [Plain(mwp.wikicode.Wikicode([obj]).strip_code(collapse=False))]
 
-        # This does the actual conversion
         if isinstance(obj, mwp.wikicode.Wikicode):
-            converted = []
-            iterator = iter(enumerate(obj.nodes))
-            for i, node in iterator:
-                # Pattern: * [[Wikilink]]\n
-                if (i >= 2 and
-                        i + 1 < len(obj.nodes) and
-                        # Links can have a plural s after them
-                        re.match("s?\n", str(obj.nodes[i+1])) and
-                        isinstance(node, mwp.nodes.wikilink.Wikilink) and
-                        str(obj.nodes[i-1]) == " " and
-                        str(obj.nodes[i-2]) == "*"):
-                    converted.pop()
-                    converted.pop()
-                    _, after = next(iterator)
-                    converted.append("=> {} {}{}".format(
-                        self.page_url(str(node.title)),
-                        self._convert(node),
-                        self._convert(after),
-                    ))
-                    continue
-                # Pattern: *[[Wikilink]]\n
-                elif (i >= 1 and
-                        i + 1 < len(obj.nodes) and
-                        re.match("s?\n", str(obj.nodes[i+1])) and
-                        isinstance(node, mwp.nodes.wikilink.Wikilink) and
-                        str(obj.nodes[i-1]) == "*"):
-                    converted.pop()
-                    _, after = next(iterator)
-                    converted.append("=> {} {}{}".format(
-                        self.page_url(str(node.title)),
-                        self._convert(node),
-                        self._convert(after),
-                    ))
-                    continue
-
-                # Default: Just convert the node
-                converted.append(self._convert(node))
-            return "".join(converted)
+            document = []
+            for node in obj.nodes:
+                current = self.convert(node)
+
+                if current == []:
+                    pass
+                # Special case: We're starting a list, but we're already in a list
+                elif (document and len(current) == 1 and
+                        isinstance(current[0], ItemList) and
+                        isinstance(document[-1], ItemList) and
+                        document[-1].ordered == current[0].ordered):
+                    pass
+                # Special case: We're starting a list!
+                elif len(current) == 1 and isinstance(current[0], ItemList):
+                    document.extend(current)
+                elif isinstance(current[0], Block):
+                    document.extend(current)
+                    document.append(Paragraph([]))
+                elif isinstance(current[0], Node):
+                    for c in current:
+                        insert_into(document, c)
+            return Document(document)
         elif isinstance(obj, mwp.nodes.heading.Heading):
-            return "{} {}\n".format("#" * min(obj.level, 3), obj.title.strip_code())
+            return [Heading(obj.level, obj.title.strip_code())]
         elif isinstance(obj, mwp.nodes.tag.Tag):
             # Most tags are handled just fine and can be delegated to strip_code
             # (inline text styles), however we can do a bit better for list tags.
             if str(obj.wiki_markup) == "*":
-                return "* "
+                return [ItemList([], False)]
             elif str(obj.wiki_markup) == "#":
-                return "<!NUM!> "
+                return [ItemList([], True)]
             elif str(obj.tag) == "ref":
-                return ""
+                return []
             elif str(obj.tag) == "table":
                 rows = []
                 header = ()
@@ -151,16 +157,16 @@ class Wikimini:
                             continue
                         if str(node.tag) == "th":
                             row_is_header = True
-                        parsed.append(self._convert(node.contents).strip())
+                        parsed.append(
+                            self.convert(node.contents).plain().strip()
+                        )
                     if not row_is_header:
                         rows.append(parsed)
                     else:
                         header = parsed
-                return "".join([
-                    "\n```\n",
-                    tabulate(rows, header, tablefmt=self.table_format),
-                    "\n```\n",
-                ])
+                return [
+                    Verbatim(tabulate(rows, header, tablefmt=self.table_format))
+                ]
             else:
                 return default(obj)
         elif isinstance(obj, mwp.nodes.template.Template):
@@ -175,50 +181,28 @@ class Wikimini:
                 return template(self, obj)
         elif isinstance(obj, mwp.nodes.wikilink.Wikilink):
             if str(obj.title).startswith("File:") or str(obj.text).startswith("thumb|"):
-                return ""
+                return []
             elif str(obj.title).startswith("Category:"):
-                return ""
+                return []
             else:
-                return default(obj)
+                return [InlineLink(
+                    self.page_url(str(obj.title)),
+                    Plain(
+                        extract_plaintext(self.convert(obj.text)) if obj.text
+                        else str(obj.title)
+                    ),
+                )]
         else:
             return default(obj)
 
-    def _postprocess(self, gemtext):
-        # Strip out any more thumbs that have been left.
-        # This happens because the wikilinks are nested in each other, which the
-        # parser would only notice after doing the first replacement. We'll just
-        # take the easy way out here and use a regex to get rid of them.
-        gemtext = re.sub("^\\[\\[File:.*?\\]\\]$", "", gemtext, flags=re.MULTILINE)
-
-        # Collapse too many empty lines
-        while "\n\n\n" in gemtext:
-            gemtext = gemtext.replace("\n\n\n", "\n\n")
-
-        # Shortcut to avoid unnecessary splitting
-        if "<!NUM!>" not in gemtext:
-            return gemtext
-
-        lines = gemtext.split("\n")
-        counter = 1
-        for idx in range(len(lines)):
-            line = lines[idx]
-            if line.startswith("<!NUM!>"):
-                line = line.replace("<!NUM!>", str(counter), 1)
-                lines[idx] = line
-                counter += 1
-            else:
-                counter = 1
-        return "\n".join(lines)
-
-
-    def wikicode_to_gemtext(
-            self, obj: Union[mwp.nodes.Node, mwp.wikicode.Wikicode]
-        ) -> str:
-        """Try to turn the given object into a sensible Gemtext representation.
+    def convert_to_document(self, obj: mwp.wikicode.Wikicode) -> Document:
+        """Try to turn the given object into a sensible
+        :class:`~document.Document` representation.
 
-        Note that wikicode is much more powerful than Gemtext, so this is a lossy
-        function. The returned Gemtext tries to mimic the content of the Wikicode
-        as much as possible (for human consumption).
+        Note that wikicode is much more powerful than the internal
+        representation, so this is a lossy function. The returned document tries
+        to mimic the content of the Wikicode as much as possible (for human
+        consumption).
 
         This function mostly mimics
         :meth:`~mwparserfromhell.wikicode.Wikicode.strip_code`, with some
@@ -228,21 +212,26 @@ class Wikimini:
             obj: The object to convert.
 
         Returns:
-            The converted Gemtext.
+            The converted Document.
         """
         # Avoid calling str() on the whole Wikicode here
         if (isinstance(obj, mwp.wikicode.Wikicode) and
                 str(mwp.wikicode.Wikicode(obj.nodes[:2])) == "#REDIRECT "):
+            document = Document()
             title = str(obj.nodes[2].title)
             if "#" in title:
                 title, section = title.split("#")
                 section = f"Section '{section}'"
             else:
                 section = ""
-            return "Redirect:\n=> {} {}\n{}".format(
-                self.page_url(title), title, section
-            )
-        return self._postprocess(self._convert(obj))
+            document.append(BlockLink(self.page_url(title), title))
+            if section:
+                document.append(Paragraph([Plain(section)]))
+            return document
+
+        document = self.convert(obj)
+        document.cleanup()
+        return document
 
 
 # import at the bottom to avoid circular dependencies
diff --git a/wikimini/document.py b/wikimini/document.py
new file mode 100644
index 0000000..2c901dc
--- /dev/null
+++ b/wikimini/document.py
@@ -0,0 +1,438 @@
+"""The main class of this module is a :class:`Document`, which holds a parsed
+and rendered Wikipedia article.
+
+We distinguish between two kinds of nodes, similar to HTML:
+"""
+import re
+from dataclasses import dataclass, replace
+from typing import List, Union
+
+class Document:
+    """A rendered Wikipedia article.
+
+    Attributes:
+        blocks (List[Block]): A list of top-level nodes.
+    """
+    __slots__ = ('blocks',)
+
+    def __init__(self, blocks=None):
+        self.blocks = []
+        if blocks:
+            self.blocks = blocks
+
+    def __iter__(self):
+        return iter(self.blocks)
+
+    def append(self, block: "Block"):
+        """Append a block to the document.
+
+        Args:
+            block: The block to append.
+        """
+        self.blocks.append(block)
+
+    def cleanup(self):
+        """Clean up the document by cleaning up every contained block.
+
+        See also :meth:`Block.cleanup`.
+        """
+        for block in self.blocks:
+            block.cleanup()
+        self.blocks = [block for block in self.blocks if block]
+
+    def nodes(self) -> List["Node"]:
+        """Discard the block information and return a list of inner nodes.
+
+        Returns:
+            A list of all inner nodes.
+        """
+        return [node for block in self.blocks for node in block.to_nodes()]
+
+    def plain(self) -> str:
+        """Returns the plain text content of this document.
+
+        Returns:
+            The plain text.
+        """
+        return extract_plaintext(self)
+
+
+@dataclass
+class Node:
+    """Base class for all in-line text elements."""
+
+    def plain(self) -> str:
+        """Returns the plain text of this node, stripping all markup.
+
+        Returns:
+            The plain text.
+        """
+
+    def with_text(self, text: str) -> "Node":
+        """Returns a new node that has the same markup, but the given text.
+
+        Args:
+            text: The new text.
+
+        Returns:
+            The new node, usually of the same type as the node this function is
+            called on.
+        """
+
+    def __getitem__(self, index):
+        if isinstance(index, int):
+            return self.plain()[index]
+        elif isinstance(index, slice):
+            text = self.plain()[index]
+            return self.with_text(text)
+        else:
+            raise TypeError("Node indices must be integers or slices")
+
+
+@dataclass
+class Plain(Node):
+    """A plain text node.
+
+    Attributes:
+        text: The text content of this node.
+    """
+    __slots__ = ("text",)
+    text: str
+
+    def plain(self):
+        return self.text
+
+    def with_text(self, text):
+        return Plain(text)
+
+
+@dataclass
+class Style(Node):
+    """Text that is styled with inline markup.
+
+    Attributes:
+        inner: The content.
+        bold: Whether the text is bold.
+        italic: Whether the text is cursive.
+        monospace: Whether the text is monospaced.
+    """
+    __slots__ = ("text", "bold", "italic", "monospace")
+    inner: Node
+    bold: bool
+    italic: bool
+    monospace: bool
+
+    def plain(self):
+        return self.inner.plain()
+
+    def with_text(self, text):
+        return replace(self, inner=self.inner.with_text(text))
+
+
+@dataclass
+class InlineLink(Node):
+    """An inline link.
+
+    Attributes:
+        href: The link target.
+        title: The text that should be shown.
+    """
+    __slots__ = ("href", "title")
+    href: str
+    title: Union[Plain, Style]
+
+    def plain(self):
+        if self.title is None:
+            return self.href
+        return self.title.plain()
+
+    def with_text(self, text):
+        return replace(self, title=self.title.with_text(text))
+
+
+@dataclass
+class Block:
+    """Base class for all top-level blocks."""
+
+    def cleanup(self):
+        """Clean up the content of this block.
+
+        The exact meaning of this is dependent on the type of the block, but it
+        can involve stripping trailing/leading whitespace or other changes.
+
+        Note that this modifies the block.
+        """
+
+    def append(self, node: Node):
+        """Append the given node to the block.
+
+        Depending on the block, the node can either be inserted as-is (keeping
+        the markup information), or it is converted to plain text first.
+
+        Args:
+            node: The node to insert.
+        """
+
+    def plain(self) -> str:
+        """Returns the plain text of this block, stripping all markup.
+
+        Returns:
+            The plain text.
+        """
+
+    def to_nodes(self) -> List[Node]:
+        """Returns the inner nodes of this block.
+
+        If the block is not made up of nodes, this will create new nodes that
+        contain the plain text content of this block.
+
+        Returns:
+            The list of nodes.
+        """
+        return [Plain(self.plain())]
+
+
+@dataclass
+class Paragraph(Block):
+    """A paragraph is a piece of text, which itself can hold inline markup."""
+    __slots__ = ("nodes",)
+    nodes: List[Node]
+
+    def __bool__(self):
+        return bool(self.nodes)
+
+    def append(self, node):
+        self.nodes.append(node)
+
+    def plain(self):
+        return "".join(node.plain() for node in self.nodes)
+
+    def to_nodes(self):
+        return self.nodes
+
+    def cleanup(self):
+        while self.nodes and re.match("^\\s+|^$", self.nodes[0].plain()):
+            self.nodes[0] = self.nodes[0].with_text(
+                self.nodes[0].plain().lstrip())
+            if not self.nodes[0].plain():
+                del self.nodes[0]
+        while self.nodes and re.search("\\s+$|^$", self.nodes[-1].plain()):
+            self.nodes[-1] = self.nodes[-1].with_text(
+                self.nodes[-1].plain().rstrip())
+            if not self.nodes[-1].plain():
+                del self.nodes[-1]
+
+
+@dataclass
+class Heading(Block):
+    """A heading.
+
+    Attributes:
+        level: The level of the heading.
+        text: The heading text.
+    """
+    __slots__ = ("level", "text")
+    level: int
+    text: str
+
+    def __bool__(self):
+        return bool(self.text)
+
+    def cleanup(self):
+        self.text = self.text.strip()
+
+    def append(self, node):
+        self.text += node.plain()
+
+    def plain(self):
+        return self.text
+
+
+@dataclass
+class Verbatim(Block):
+    """Text that should appear verbatim in the output, such as code.
+
+    Attributes:
+        text: The text that should appear.
+    """
+    __slots__ = ("text",)
+    text: str
+
+    def __bool__(self):
+        return bool(self.text)
+
+    def append(self, node):
+        self.text += node.plain()
+
+    def plain(self):
+        return self.text
+
+
+@dataclass
+class ItemList(Block):
+    """A list of elements.
+
+    Attributes:
+        items: The list of items. Each item is a list of inline :class:`Node`.
+        ordered: A flag indicating whether the list should be an ordered
+            (numbered) list.
+    """
+    __slots__ = ("items", "ordered")
+    items: List[List[Node]]
+    ordered: bool
+
+    def __bool__(self):
+        return bool(self.items)
+
+    def new_item(self):
+        """Start a new item."""
+        self.items.append([])
+
+    def append(self, node):
+        if not self.items:
+            self.new_item()
+        self.items[-1].append(node)
+
+    def plain(self):
+        return "\n".join(
+            "".join(i.plain() for i in item) for item in self.items
+        )
+
+    def to_nodes(self):
+        return [node for item in self.items for node in item]
+
+    def cleanup(self):
+        i = 0
+        while i < len(self.items):
+            p = Paragraph(self.items[i])
+            p.cleanup()
+            if p:
+                self.items[i] = p.to_nodes()
+                i += 1
+            else:
+                del self.items[i]
+
+
+@dataclass
+class Blockquote(Block):
+    """A quote.
+
+    Attributes:
+        nodes: The content of the blockquote, similar to
+            :attr:`Paragraph.nodes`.
+    """
+    __slots__ = ("nodes",)
+    nodes: List[Node]
+
+    def __bool__(self):
+        return bool(self.nodes)
+
+    def append(self, node):
+        self.nodes.append(node)
+
+    def plain(self):
+        return "".join(node.plain() for node in self.nodes)
+
+    def to_nodes(self):
+        return self.nodes
+
+
+@dataclass
+class BlockLink(Block):
+    """A link on its own line.
+
+    This is important for formats like Gemtext, where inline links will be
+    discarded.
+
+    Attributes:
+        href: The target of the link.
+        title: The link text.
+    """
+    __slots__ = ("href", "title")
+    href: str
+    title: str
+
+    def append(self, node):
+        self.title += node.plain()
+
+    def plain(self):
+        return self.title
+
+
+def insert_into(blocks: List[Block], node: Node):
+    """Inserts the given node into the list of blocks.
+
+    The node will always be inserted into the last block. If the list of blocks
+    is still empty, a fresh :class:`Paragraph` will be started.
+
+    This function takes care of handling newlines properly. That means that a
+    double newline (``\\n\\n``) will start a new paragraph, and a single
+    newline (``\\n``) will start a new list item (if the current block is a
+    list).
+
+    Note that this function will modify the given list of blocks.
+
+    Args:
+        blocks: The list of blocks.
+        node: The node to insert.
+    """
+    if not blocks:
+        blocks.append(Paragraph([]))
+
+    current_block = blocks[-1]
+
+    if isinstance(current_block, Paragraph):
+        if "\n\n" in node.plain():
+            idx = node.plain().index("\n\n")
+            left = node[:idx]
+            right = node[idx+2:]
+            current_block.append(left)
+            blocks.append(Paragraph([]))
+            insert_into(blocks, right)
+        else:
+            current_block.append(node)
+
+    elif isinstance(current_block, ItemList):
+        match = re.search("\\n\\n?", node.plain())
+        if not match:
+            current_block.append(node)
+        else:
+            left_end, right_start = match.span()
+            current_block.append(node[:left_end])
+            if match.group() == "\n\n":
+                blocks.append(Paragraph([]))
+            else:
+                current_block.new_item()
+            insert_into(blocks, node[right_start:])
+
+    else:
+        current_block.append(node)
+
+
+def extract_plaintext(obj) -> str:
+    """Tries to extract plaintext from the given object.
+
+    The given object can be one of many things:
+
+    * A list of :class:`Node`
+    * A list of :class:`Block`
+    * A single :class:`Node`
+    * A single :class:`Block`
+    * A :class:`Document`
+
+    This function is useful if you recursively call
+    :meth:`wikimini.Wikimini.convert` and want to include the output in
+    something that only accepts plain text.
+    """
+    if isinstance(obj, Document):
+        return extract_plaintext(obj.blocks)
+
+    if not isinstance(obj, list):
+        obj = [obj]
+
+    if not obj:
+        return ""
+    elif isinstance(obj[0], Node):
+        return "".join(node.plain() for node in obj)
+    elif isinstance(obj[0], Block):
+        return "\n\n".join(block.plain() for block in obj)
diff --git a/wikimini/templates/cite.py b/wikimini/templates/cite.py
index ac4f597..e342a5f 100644
--- a/wikimini/templates/cite.py
+++ b/wikimini/templates/cite.py
@@ -1,5 +1,6 @@
 """Citation related templates."""
 from . import registry
+from ..document import Plain
 
 
 def tmpl_citation(wikimini, obj):
@@ -23,7 +24,7 @@ def tmpl_citation(wikimini, obj):
             names.append(last)
         elif first:
             names.append(first)
-    return "{} ({})".format(title, "; ".join(names))
+    return [Plain("{} ({})".format(title, "; ".join(names)))]
 
 
 for name in ["cite", "citation", "cite arXiv", "cite AV media", "cite book",
diff --git a/wikimini/templates/convert.py b/wikimini/templates/convert.py
index a7a3f44..8bab782 100644
--- a/wikimini/templates/convert.py
+++ b/wikimini/templates/convert.py
@@ -1,20 +1,21 @@
 """Implementations for the unit conversion templates."""
 from . import registry
+from ..document import Plain
 
 
 def tmpl_convert(wikimini, obj):
     """Renders the ``{{convert|...}}`` template."""
     if str(obj.params[1]) in {"-", "to"}:
-        return "{0}{3} {1} {2}{3}".format(
+        return [Plain("{0}{3} {1} {2}{3}".format(
             obj.params[0].value.strip_code(),
             obj.params[1].value.strip_code(),
             obj.params[2].value.strip_code(),
             obj.params[3].value.strip_code(),
-        )
-    return "{}{}".format(
+        ))]
+    return [Plain("{}{}".format(
         obj.params[0].value.strip_code(),
         obj.params[1].value.strip_code(),
-    )
+    ))]
 
 
 registry.insert("convert", tmpl_convert)
diff --git a/wikimini/templates/language.py b/wikimini/templates/language.py
index e8ab738..22320da 100644
--- a/wikimini/templates/language.py
+++ b/wikimini/templates/language.py
@@ -1,12 +1,15 @@
 """Language related templates."""
 from . import registry
+from ..document import Plain
 
 import pycountry
 
 
 def tmpl_ipa(wikimini, obj):
     """Renders the ``{{IPA|...}}`` template."""
-    return "pronounced [{}]".format(wikimini._convert(obj.params[0].value))
+    return [Plain("pronounced [{}]".format(
+        wikimini.convert(obj.params[0].value).plain()
+    ))]
 
 
 registry.insert("IPA", tmpl_ipa)
@@ -14,7 +17,7 @@ registry.insert("IPA", tmpl_ipa)
 
 def tmpl_lang(wikimini, obj):
     """Renders the ``{{Lang|...}}`` template."""
-    return wikimini._convert(obj.params[1].value)
+    return wikimini.convert(obj.params[1].value).nodes()
 
 
 registry.insert("lang", tmpl_lang)
@@ -24,18 +27,18 @@ registry.insert("script", tmpl_lang)
 def tmpl_lang_code(language_name):
     """Creates a template renderer for a ``{{lang-xx|...}}`` template."""
     def inner(wikimini, obj):
-        return "{}: {}".format(
-            language_name, wikimini._convert(obj.params[0].value)
-        )
+        return [Plain("{}: {}".format(
+            language_name, wikimini.convert(obj.params[0].value).plain()
+        ))]
     return inner
 
 
 def tmpl_ipa_code(language_name):
     """Creates a template renderer for a ``{{IPA-xx|...}}`` template."""
     def inner(wikimini, obj):
-        return "{} pronunciation: [{}]".format(
-            language_name, wikimini._convert(obj.params[0].value)
-        )
+        return [Plain("{} pronunciation: [{}]".format(
+            language_name, wikimini.convert(obj.params[0].value).plain()
+        ))]
     return inner
 
 
@@ -52,7 +55,7 @@ for language in pycountry.languages:
 def tmpl_country_flag(country):
     """Creates a template renderer for ``{{BRA}}`` country flags."""
     def inner(wikimini, obj):
-        return country
+        return [Plain(country)]
     return inner
 
 
diff --git a/wikimini/templates/mainlinks.py b/wikimini/templates/mainlinks.py
index ffcbc5e..8c9abbf 100644
--- a/wikimini/templates/mainlinks.py
+++ b/wikimini/templates/mainlinks.py
@@ -1,14 +1,15 @@
 """Renders templates that link to further articles."""
 from . import registry
 
+from ..document import Paragraph, Plain, BlockLink
 
 def tmpl_main(wikimini, obj):
     """Renders the ``{{main|...}}`` template."""
     links = [
-        "=> {} {}".format(wikimini.page_url(str(t.value)), t.value)
+        BlockLink(wikimini.page_url(str(t.value)), t.value.strip_code())
         for t in obj.params
     ]
-    return "Main articles:\n{}\n".format("\n".join(links))
+    return [Paragraph([Plain("Main articles:")])] + links
 
 
 registry.insert("main", tmpl_main)
diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py
index b51d92d..39f6fa2 100644
--- a/wikimini/templates/quotes.py
+++ b/wikimini/templates/quotes.py
@@ -1,15 +1,15 @@
 """Renders various quote related templates."""
 from . import registry
 
+from ..document import Blockquote
 
 def tmpl_quote(wikimini, obj):
     """Renders the ``{{blockquote|...}}`` template."""
     text = obj.get("text", None)
     if not text:
         return ""
-    content = wikimini._convert(text.value)
-    lines = content.split("\n")
-    return "\n".join(f"> {line}" for line in lines)
+    content = wikimini.convert(text.value).nodes()
+    return [Blockquote(content)]
 
 
 registry.insert("blockquote", tmpl_quote)
@@ -19,9 +19,8 @@ registry.insert("quote", tmpl_quote)
 def tmpl_cquote(wikimini, obj):
     """Renders the ``{{cquote|...}}`` template."""
     text = obj.params[0]
-    content = wikimini._convert(text.value)
-    lines = content.split("\n")
-    return "\n".join(f"> {line}" for line in lines)
+    content = wikimini.convert(text.value).nodes()
+    return [Blockquote(content)]
 
 
 registry.insert("cquote", tmpl_cquote)
diff --git a/wikimini/templates/various.py b/wikimini/templates/various.py
index 8c6e0d5..6076ac4 100644
--- a/wikimini/templates/various.py
+++ b/wikimini/templates/various.py
@@ -1,16 +1,17 @@
 """Various small templates."""
 from . import registry
+from ..document import Plain
 
 
 def tmpl_reign(wikimini, obj):
     """Renders the ``{{reign|...}}`` template."""
     if not obj.params:
-        return "r. "
+        return [Plain("r. ")]
     first = obj.params[0].value.strip_code().strip() or "?"
     second = ""
     if len(obj.params) > 1:
         second = obj.params[1].value.strip_code().strip()
-    return f"r. {first} – {second}"
+    return [Plain(f"r. {first} – {second}")]
 
 
 registry.insert("reign", tmpl_reign)
-- 
cgit v1.2.3


From 05bb70ce36a11550222c718c6e69185b44793ca4 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Thu, 19 Aug 2021 14:24:15 +0200
Subject: implement style fixes suggested by pycodestyle

---
 wikimini/__init__.py            | 53 ++++++++++++++++++++++-------------------
 wikimini/document.py            |  3 ++-
 wikimini/templates/__init__.py  |  6 ++---
 wikimini/templates/mainlinks.py |  1 +
 wikimini/templates/quotes.py    |  1 +
 5 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/wikimini/__init__.py b/wikimini/__init__.py
index fac0c84..d6e49a1 100644
--- a/wikimini/__init__.py
+++ b/wikimini/__init__.py
@@ -1,12 +1,14 @@
 import mwparserfromhell as mwp
 import requests
-import re
 
 from tabulate import tabulate
 
 from typing import List, Union, Tuple
 
-from .document import *
+from .document import (
+    Plain, BlockLink, InlineLink, Verbatim, Document, Node, Block, ItemList,
+    Paragraph, Heading, insert_into, extract_plaintext,
+)
 
 #: The default API URL, pointing to the english Wikipedia.
 API_URL = "https://en.wikipedia.org/w/api.php"
@@ -79,9 +81,9 @@ class Wikimini:
         return (title, mwp.parse(text))
 
     def convert(
-            self,
-            obj: Union[mwp.wikicode.Wikicode, mwp.nodes.Node],
-        ) -> Union[Document, List[Node], List[Block]]:
+        self,
+        obj: Union[mwp.wikicode.Wikicode, mwp.nodes.Node],
+    ) -> Union[Document, List[Node], List[Block]]:
         """Function that converts and renders a node.
 
         This function is exposed for template implementors, for normal usage,
@@ -116,11 +118,12 @@ class Wikimini:
 
                 if current == []:
                     pass
-                # Special case: We're starting a list, but we're already in a list
-                elif (document and len(current) == 1 and
-                        isinstance(current[0], ItemList) and
-                        isinstance(document[-1], ItemList) and
-                        document[-1].ordered == current[0].ordered):
+                # Special case: We're starting a list, but we're already in a
+                # list
+                elif (document and len(current) == 1
+                        and isinstance(current[0], ItemList)
+                        and isinstance(document[-1], ItemList)
+                        and document[-1].ordered == current[0].ordered):
                     pass
                 # Special case: We're starting a list!
                 elif len(current) == 1 and isinstance(current[0], ItemList):
@@ -135,8 +138,9 @@ class Wikimini:
         elif isinstance(obj, mwp.nodes.heading.Heading):
             return [Heading(obj.level, obj.title.strip_code())]
         elif isinstance(obj, mwp.nodes.tag.Tag):
-            # Most tags are handled just fine and can be delegated to strip_code
-            # (inline text styles), however we can do a bit better for list tags.
+            # Most tags are handled just fine and can be delegated to
+            # strip_code (inline text styles), however we can do a bit better
+            # for list tags.
             if str(obj.wiki_markup) == "*":
                 return [ItemList([], False)]
             elif str(obj.wiki_markup) == "#":
@@ -164,15 +168,15 @@ class Wikimini:
                         rows.append(parsed)
                     else:
                         header = parsed
-                return [
-                    Verbatim(tabulate(rows, header, tablefmt=self.table_format))
-                ]
+                return [Verbatim(
+                    tabulate(rows, header, tablefmt=self.table_format)
+                )]
             else:
                 return default(obj)
         elif isinstance(obj, mwp.nodes.template.Template):
-            # Most templates are handled fine (and completely stripped), however,
-            # some of them are useful and provide some output that we should mimic
-            # (for example, the convert template).
+            # Most templates are handled fine (and completely stripped),
+            # however, some of them are useful and provide some output that we
+            # should mimic (for example, the convert template).
             name = str(obj.name)
             template = templates.registry.get(name)
             if template is None:
@@ -180,7 +184,8 @@ class Wikimini:
             else:
                 return template(self, obj)
         elif isinstance(obj, mwp.nodes.wikilink.Wikilink):
-            if str(obj.title).startswith("File:") or str(obj.text).startswith("thumb|"):
+            if (str(obj.title).startswith("File:")
+                    or str(obj.text).startswith("thumb|")):
                 return []
             elif str(obj.title).startswith("Category:"):
                 return []
@@ -200,9 +205,9 @@ class Wikimini:
         :class:`~document.Document` representation.
 
         Note that wikicode is much more powerful than the internal
-        representation, so this is a lossy function. The returned document tries
-        to mimic the content of the Wikicode as much as possible (for human
-        consumption).
+        representation, so this is a lossy function. The returned document
+        tries to mimic the content of the Wikicode as much as possible (for
+        human consumption).
 
         This function mostly mimics
         :meth:`~mwparserfromhell.wikicode.Wikicode.strip_code`, with some
@@ -215,8 +220,8 @@ class Wikimini:
             The converted Document.
         """
         # Avoid calling str() on the whole Wikicode here
-        if (isinstance(obj, mwp.wikicode.Wikicode) and
-                str(mwp.wikicode.Wikicode(obj.nodes[:2])) == "#REDIRECT "):
+        if (isinstance(obj, mwp.wikicode.Wikicode)
+                and str(mwp.wikicode.Wikicode(obj.nodes[:2])) == "#REDIRECT "):
             document = Document()
             title = str(obj.nodes[2].title)
             if "#" in title:
diff --git a/wikimini/document.py b/wikimini/document.py
index 2c901dc..be51108 100644
--- a/wikimini/document.py
+++ b/wikimini/document.py
@@ -7,6 +7,7 @@ import re
 from dataclasses import dataclass, replace
 from typing import List, Union
 
+
 class Document:
     """A rendered Wikipedia article.
 
@@ -385,7 +386,7 @@ def insert_into(blocks: List[Block], node: Node):
         if "\n\n" in node.plain():
             idx = node.plain().index("\n\n")
             left = node[:idx]
-            right = node[idx+2:]
+            right = node[idx + 2:]
             current_block.append(left)
             blocks.append(Paragraph([]))
             insert_into(blocks, right)
diff --git a/wikimini/templates/__init__.py b/wikimini/templates/__init__.py
index 9e983e1..58a5db8 100644
--- a/wikimini/templates/__init__.py
+++ b/wikimini/templates/__init__.py
@@ -2,9 +2,9 @@
 
 This module contains functions that mimic Wikipedia's templates.
 
-A template is a function that takes the :class:`~wikimini.Wikimini` instance and the
-:class:`~mwparserfromhell.nodes.template.Template` node to convert, and returns
-a string with the template output (see :const:`Template`).
+A template is a function that takes the :class:`~wikimini.Wikimini` instance
+and the :class:`~mwparserfromhell.nodes.template.Template` node to convert, and
+returns a string with the template output (see :const:`Template`).
 """
 from typing import Callable, Optional
 
diff --git a/wikimini/templates/mainlinks.py b/wikimini/templates/mainlinks.py
index 8c9abbf..3d945ed 100644
--- a/wikimini/templates/mainlinks.py
+++ b/wikimini/templates/mainlinks.py
@@ -3,6 +3,7 @@ from . import registry
 
 from ..document import Paragraph, Plain, BlockLink
 
+
 def tmpl_main(wikimini, obj):
     """Renders the ``{{main|...}}`` template."""
     links = [
diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py
index 39f6fa2..fdf00cc 100644
--- a/wikimini/templates/quotes.py
+++ b/wikimini/templates/quotes.py
@@ -3,6 +3,7 @@ from . import registry
 
 from ..document import Blockquote
 
+
 def tmpl_quote(wikimini, obj):
     """Renders the ``{{blockquote|...}}`` template."""
     text = obj.get("text", None)
-- 
cgit v1.2.3


From 635f53e9f82ede414f97087510043ec62d41468c Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 11:03:03 +0200
Subject: Rename Blockquote to BlockQuote

This keeps it more in line with BlockLink. Also, this adds a LineBreak
block, which templates can use to enfore extra line breaks.
---
 wikimini/document.py         | 11 ++++++++++-
 wikimini/templates/quotes.py |  6 +++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/wikimini/document.py b/wikimini/document.py
index be51108..fe979fc 100644
--- a/wikimini/document.py
+++ b/wikimini/document.py
@@ -193,6 +193,15 @@ class Block:
         return [Plain(self.plain())]
 
 
+@dataclass
+class LineBreak:
+    """Represents an enforced empty line."""
+    __slots__ = ()
+
+    def plain(self):
+        return "\n"
+
+
 @dataclass
 class Paragraph(Block):
     """A paragraph is a piece of text, which itself can hold inline markup."""
@@ -315,7 +324,7 @@ class ItemList(Block):
 
 
 @dataclass
-class Blockquote(Block):
+class BlockQuote(Block):
     """A quote.
 
     Attributes:
diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py
index fdf00cc..ef7f297 100644
--- a/wikimini/templates/quotes.py
+++ b/wikimini/templates/quotes.py
@@ -1,7 +1,7 @@
 """Renders various quote related templates."""
 from . import registry
 
-from ..document import Blockquote
+from ..document import BlockQuote
 
 
 def tmpl_quote(wikimini, obj):
@@ -10,7 +10,7 @@ def tmpl_quote(wikimini, obj):
     if not text:
         return ""
     content = wikimini.convert(text.value).nodes()
-    return [Blockquote(content)]
+    return [BlockQuote(content)]
 
 
 registry.insert("blockquote", tmpl_quote)
@@ -21,7 +21,7 @@ def tmpl_cquote(wikimini, obj):
     """Renders the ``{{cquote|...}}`` template."""
     text = obj.params[0]
     content = wikimini.convert(text.value).nodes()
-    return [Blockquote(content)]
+    return [BlockQuote(content)]
 
 
 registry.insert("cquote", tmpl_cquote)
-- 
cgit v1.2.3


From 1a05603d0e77a6aa786d1b9fb05003a4161a486b Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 11:03:36 +0200
Subject: add Format base class

---
 wikimini/formats/__init__.py | 148 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 wikimini/formats/__init__.py

diff --git a/wikimini/formats/__init__.py b/wikimini/formats/__init__.py
new file mode 100644
index 0000000..8d6296c
--- /dev/null
+++ b/wikimini/formats/__init__.py
@@ -0,0 +1,148 @@
+"""The formats are responsible for turning a
+:class:`~wikimini.document.Document` into an output string.
+
+Formats work by being given a file-like buffer as argument, into which the
+output should be written.
+"""
+from typing import TextIO
+
+from ..document import (
+    Document, Block, BlockLink, BlockQuote, Heading, ItemList, LineBreak,
+    Paragraph, Verbatim, Node, InlineLink, Plain, Style,
+)
+
+
+class Format:
+    """:class:`Format` is the base class for all output formats.
+
+    Any output format should inherit from this class and override the specific
+    output methods. Note that by default, no output is generated.
+
+    The methods :meth:`render_document`, :meth:`render_block` and
+    :meth:`render_node` have sensible default implementations that dispatch to
+    the more specific rendering methods.
+
+    Attributes:
+        writer: The file-like object that output should be written to.
+    """
+    writer: TextIO
+
+    def __init__(self, writer: TextIO):
+        self.writer = writer
+
+    def render_document(self, document: Document):
+        """Renders the given document.
+
+        Args:
+            document: The document to render.
+        """
+        for block in document:
+            self.render_block(block)
+
+    def render_block(self, block: Block):
+        """Renders a single block.
+
+        Args:
+            block: The block to render.
+        """
+        if isinstance(block, BlockLink):
+            self.render_block_link(block)
+        elif isinstance(block, BlockQuote):
+            self.render_block_quote(block)
+        elif isinstance(block, Heading):
+            self.render_heading(block)
+        elif isinstance(block, ItemList):
+            self.render_item_list(block)
+        elif isinstance(block, LineBreak):
+            self.render_line_break(block)
+        elif isinstance(block, Paragraph):
+            self.render_paragraph(block)
+        elif isinstance(block, Verbatim):
+            self.render_verbatim(block)
+        else:
+            raise TypeError(f"Unknown Block type given: {type(block)}")
+
+    def render_block_link(self, block_link: BlockLink):
+        """Renders a :class:`~wikimini.document.BlockLink`.
+
+        Args:
+            block_link: The block link to render.
+        """
+
+    def render_block_quote(self, block_quote: BlockQuote):
+        """Renders a :class:`~wikimini.document.BlockQuote`.
+
+        Args:
+            block_quote: The block quote to render.
+        """
+
+    def render_heading(self, heading: Heading):
+        """Renders a :class:`~wikimini.document.Heading`.
+
+        Args:
+            heading: The heading to render.
+        """
+
+    def render_item_list(self, item_list: ItemList):
+        """Renders a :class:`~wikimini.document.ItemList`.
+
+        Args:
+            item_list: The item list to render.
+        """
+
+    def render_line_break(self, line_break: LineBreak):
+        """Renders a :class:`~wikimini.document.LineBreak`.
+
+        Args:
+            line_break: The line break to render.
+        """
+
+    def render_paragraph(self, paragraph: Paragraph):
+        """Renders a :class:`~wikimini.document.Paragraph`.
+
+        Args:
+            paragraph: The paragraph to render.
+        """
+
+    def render_verbatim(self, verbatim: Verbatim):
+        """Renders a :class:`~wikimini.document.Verbatim`.
+
+        Args:
+            verbatim: The verbatim to render.
+        """
+
+    def render_node(self, node: Node):
+        """Renders a single node.
+
+        Args:
+            node: The node to render.
+        """
+        if isinstance(node, InlineLink):
+            self.render_inline_link(node)
+        elif isinstance(node, Plain):
+            self.render_plain(node)
+        elif isinstance(node, Style):
+            self.render_style(node)
+        else:
+            raise TypeError(f"Unknown node type: {type(node)}")
+
+    def render_inline_link(self, inline_link: InlineLink):
+        """Renders a :class:`~wikimini.document.InlineLink`.
+
+        Args:
+            inline_link: The inline link to render.
+        """
+
+    def render_plain(self, plain: Plain):
+        """Renders a :class:`~wikimini.document.Plain`.
+
+        Args:
+            plain: The plain text to render.
+        """
+
+    def render_style(self, style: Style):
+        """Renders a :class:`~wikimini.document.Style`.
+
+        Args:
+            style: The styled text to render.
+        """
-- 
cgit v1.2.3


From 9b5b2bda1e43e659f142bb88a6b8138962e754e9 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 11:10:14 +0200
Subject: Rework ItemList/BlockQuote to hold Paragraph

A List[Node] is basically a Paragraph, and we already delegated some of
the methods to Paragraph (see ItemList.cleanup). Therefore, it only made
sense to rework ItemList and BlockQuote to hold a Paragraph instead of a
List[Node].
---
 wikimini/document.py         | 28 ++++++++++++----------------
 wikimini/templates/quotes.py |  6 +++---
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/wikimini/document.py b/wikimini/document.py
index fe979fc..7c459b6 100644
--- a/wikimini/document.py
+++ b/wikimini/document.py
@@ -283,12 +283,12 @@ class ItemList(Block):
     """A list of elements.
 
     Attributes:
-        items: The list of items. Each item is a list of inline :class:`Node`.
+        items: The list of items. Each item is a :class:`Paragraph`.
         ordered: A flag indicating whether the list should be an ordered
             (numbered) list.
     """
     __slots__ = ("items", "ordered")
-    items: List[List[Node]]
+    items: List[Paragraph]
     ordered: bool
 
     def __bool__(self):
@@ -296,7 +296,7 @@ class ItemList(Block):
 
     def new_item(self):
         """Start a new item."""
-        self.items.append([])
+        self.items.append(Paragraph([]))
 
     def append(self, node):
         if not self.items:
@@ -304,20 +304,17 @@ class ItemList(Block):
         self.items[-1].append(node)
 
     def plain(self):
-        return "\n".join(
-            "".join(i.plain() for i in item) for item in self.items
-        )
+        return "\n".join(paragraph.plain() for paragraph in self.items)
 
     def to_nodes(self):
-        return [node for item in self.items for node in item]
+        return [node for item in self.items for node in item.nodes]
 
     def cleanup(self):
         i = 0
         while i < len(self.items):
-            p = Paragraph(self.items[i])
-            p.cleanup()
-            if p:
-                self.items[i] = p.to_nodes()
+            paragraph = self.items[i]
+            paragraph.cleanup()
+            if paragraph:
                 i += 1
             else:
                 del self.items[i]
@@ -328,11 +325,10 @@ class BlockQuote(Block):
     """A quote.
 
     Attributes:
-        nodes: The content of the blockquote, similar to
-            :attr:`Paragraph.nodes`.
+        nodes: The content of the blockquote.
     """
     __slots__ = ("nodes",)
-    nodes: List[Node]
+    nodes: Paragraph
 
     def __bool__(self):
         return bool(self.nodes)
@@ -341,10 +337,10 @@ class BlockQuote(Block):
         self.nodes.append(node)
 
     def plain(self):
-        return "".join(node.plain() for node in self.nodes)
+        return self.nodes.plain()
 
     def to_nodes(self):
-        return self.nodes
+        return self.nodes.to_nodes()
 
 
 @dataclass
diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py
index ef7f297..00b82fb 100644
--- a/wikimini/templates/quotes.py
+++ b/wikimini/templates/quotes.py
@@ -1,7 +1,7 @@
 """Renders various quote related templates."""
 from . import registry
 
-from ..document import BlockQuote
+from ..document import BlockQuote, Paragraph
 
 
 def tmpl_quote(wikimini, obj):
@@ -10,7 +10,7 @@ def tmpl_quote(wikimini, obj):
     if not text:
         return ""
     content = wikimini.convert(text.value).nodes()
-    return [BlockQuote(content)]
+    return [BlockQuote(Paragraph(content))]
 
 
 registry.insert("blockquote", tmpl_quote)
@@ -21,7 +21,7 @@ def tmpl_cquote(wikimini, obj):
     """Renders the ``{{cquote|...}}`` template."""
     text = obj.params[0]
     content = wikimini.convert(text.value).nodes()
-    return [BlockQuote(content)]
+    return [BlockQuote(Paragraph(content))]
 
 
 registry.insert("cquote", tmpl_cquote)
-- 
cgit v1.2.3


From a05368c8c8f9b97d727dc8d2efcf847743b29f66 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 11:58:42 +0200
Subject: implement Gemtext format

---
 wikimini/document.py         | 16 ++++++------
 wikimini/formats/__init__.py | 47 ++++++++++++++++++++++++++++++++---
 wikimini/formats/gemtext.py  | 58 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 12 deletions(-)
 create mode 100644 wikimini/formats/gemtext.py

diff --git a/wikimini/document.py b/wikimini/document.py
index 7c459b6..1e18a8b 100644
--- a/wikimini/document.py
+++ b/wikimini/document.py
@@ -325,22 +325,22 @@ class BlockQuote(Block):
     """A quote.
 
     Attributes:
-        nodes: The content of the blockquote.
+        content: The content of the blockquote.
     """
-    __slots__ = ("nodes",)
-    nodes: Paragraph
+    __slots__ = ("content",)
+    content: Paragraph
 
     def __bool__(self):
-        return bool(self.nodes)
+        return bool(self.content)
 
     def append(self, node):
-        self.nodes.append(node)
+        self.content.append(node)
 
     def plain(self):
-        return self.nodes.plain()
+        return self.content.plain()
 
-    def to_nodes(self):
-        return self.nodes.to_nodes()
+    def to_content(self):
+        return self.content.to_nodes()
 
 
 @dataclass
diff --git a/wikimini/formats/__init__.py b/wikimini/formats/__init__.py
index 8d6296c..b48486a 100644
--- a/wikimini/formats/__init__.py
+++ b/wikimini/formats/__init__.py
@@ -4,7 +4,8 @@
 Formats work by being given a file-like buffer as argument, into which the
 output should be written.
 """
-from typing import TextIO
+import io
+from typing import TextIO, Union
 
 from ..document import (
     Document, Block, BlockLink, BlockQuote, Heading, ItemList, LineBreak,
@@ -18,9 +19,9 @@ class Format:
     Any output format should inherit from this class and override the specific
     output methods. Note that by default, no output is generated.
 
-    The methods :meth:`render_document`, :meth:`render_block` and
-    :meth:`render_node` have sensible default implementations that dispatch to
-    the more specific rendering methods.
+    The methods :meth:`render`, :meth:`render_document`, :meth:`render_block`
+    and :meth:`render_node` have sensible default implementations that dispatch
+    to the more specific rendering methods.
 
     Attributes:
         writer: The file-like object that output should be written to.
@@ -30,6 +31,21 @@ class Format:
     def __init__(self, writer: TextIO):
         self.writer = writer
 
+    def render(self, obj: Union[Document, Block, Node]):
+        """Renders the given object.
+
+        Args:
+            obj: The object to render.
+        """
+        if isinstance(obj, Document):
+            self.render_document(obj)
+        elif isinstance(obj, Block):
+            self.render_block(obj)
+        elif isinstance(obj, Node):
+            self.render_node(obj)
+        else:
+            raise TypeError(f"Cannot render {obj}, unknown type")
+
     def render_document(self, document: Document):
         """Renders the given document.
 
@@ -146,3 +162,26 @@ class Format:
         Args:
             style: The styled text to render.
         """
+
+
+def as_string(formatter: Format, obj: Union[Document, Node, Block]) -> str:
+    """Runs the given format function and returns the result as a string.
+
+    This temporarily replaces the output writer by an in-memory string object,
+    runs the render function and then restores the writer.
+
+    Args:
+        formatter: The formatter to run.
+        obj: The object to render.
+
+    Returns:
+        The content, as string.
+    """
+    old_writer = formatter.writer
+    buffer = io.StringIO()
+    formatter.writer = buffer
+    try:
+        formatter.render(obj)
+    finally:
+        formatter.writer = old_writer
+    return buffer.getvalue()
diff --git a/wikimini/formats/gemtext.py b/wikimini/formats/gemtext.py
new file mode 100644
index 0000000..935565c
--- /dev/null
+++ b/wikimini/formats/gemtext.py
@@ -0,0 +1,58 @@
+"""This module contains a Gemtext formatter for
+:class:`~wikimini.document.Document`.
+"""
+from itertools import zip_longest
+from . import Format, as_string
+from ..document import LineBreak, BlockLink, InlineLink
+
+
+class Gemtext(Format):
+    """The Gemtext formatter."""
+
+    def render_document(self, document):
+        for block, next_block in zip_longest(
+                document.blocks, document.blocks[1:]):
+            self.render_block(block)
+            if not isinstance(next_block, (LineBreak, BlockLink)):
+                self.writer.write("\n")
+
+    def render_block_link(self, block_link):
+        self.writer.write(f"=> {block_link.href} {block_link.title}\n")
+
+    def render_block_quote(self, block_quote):
+        content = as_string(self, block_quote.content)
+        for line in content.split("\n"):
+            self.writer.write(f"> {line}\n")
+
+    def render_heading(self, heading):
+        level = min(3, heading.level)
+        self.writer.write("#" * level + f" {heading.text}\n")
+
+    def render_inline_link(self, inline_link):
+        self.render(inline_link.title)
+
+    def render_item_list(self, item_list):
+        for item in item_list.items:
+            if len(item.nodes) == 1 and isinstance(item.nodes[0], InlineLink):
+                link = item.nodes[0]
+                self.render(BlockLink(link.href, link.title.plain()))
+            else:
+                self.writer.write("* ")
+                self.render(item)
+
+    def render_line_break(self, _):
+        self.writer.write("\n")
+
+    def render_paragraph(self, paragraph):
+        for node in paragraph.nodes:
+            self.render(node)
+        self.writer.write("\n")
+
+    def render_plain(self, plain):
+        self.writer.write(plain.text)
+
+    def render_style(self, style):
+        self.render(style.inner)
+
+    def render_verbatim(self, verbatim):
+        self.writer.write(f"```\n{verbatim.text}\n```\n")
-- 
cgit v1.2.3


From d6e7479fb0b845415c9d1bdcc42936a4f36dde39 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 12:35:03 +0200
Subject: properly strip File: links that got through

---
 wikimini/document.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/wikimini/document.py b/wikimini/document.py
index 1e18a8b..a7da2a2 100644
--- a/wikimini/document.py
+++ b/wikimini/document.py
@@ -80,6 +80,9 @@ class Node:
             called on.
         """
 
+    def __len__(self):
+        return len(self.plain())
+
     def __getitem__(self, index):
         if isinstance(index, int):
             return self.plain()[index]
@@ -208,6 +211,16 @@ class Paragraph(Block):
     __slots__ = ("nodes",)
     nodes: List[Node]
 
+    def _find_index(self, idx):
+        offset = 0
+        for i, node in enumerate(self.nodes):
+            if idx < offset + len(node):
+                return (i, idx - offset)
+            offset += len(node)
+        if idx == offset:
+            return (i, len(node))
+        raise IndexError(f"{idx} is out of range")
+
     def __bool__(self):
         return bool(self.nodes)
 
@@ -221,6 +234,19 @@ class Paragraph(Block):
         return self.nodes
 
     def cleanup(self):
+        # There is a chance that some "thumbnail" links will get through
+        # (mainly if their text also contains links, in which case it'd require
+        # multiple parsing passes). As a quick and dirty fix, we just delete
+        # that stuff here:
+        while match := re.search("\\[\\[File:.+?\\]\\]", self.plain()):
+            start_node, start_pos = self._find_index(match.start())
+            end_node, end_pos = self._find_index(match.end())
+
+            new_start = self.nodes[start_node][:start_pos]
+            new_end = self.nodes[end_node][end_pos:]
+            self.nodes[start_node:end_node + 1] = [new_start, new_end]
+
+        # Strip leading and trailing whitespace
         while self.nodes and re.match("^\\s+|^$", self.nodes[0].plain()):
             self.nodes[0] = self.nodes[0].with_text(
                 self.nodes[0].plain().lstrip())
-- 
cgit v1.2.3


From 22029400ef35ee7eb85bf5d89562738d65a38e75 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 12:45:57 +0200
Subject: fix handling of link items with trailing plural s

---
 wikimini/document.py        | 20 ++++++++++++++++++++
 wikimini/formats/gemtext.py |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/wikimini/document.py b/wikimini/document.py
index a7da2a2..a363c25 100644
--- a/wikimini/document.py
+++ b/wikimini/document.py
@@ -258,6 +258,26 @@ class Paragraph(Block):
             if not self.nodes[-1].plain():
                 del self.nodes[-1]
 
+    def is_link_paragraph(self) -> bool:
+        """Returns whether the paragraph can be considered a "link item".
+
+        A link item is a paragraph that only consists of a link (and
+        potentially a plural identifier), usually found in the "See also"
+        section on Wikipedia.
+
+        In case of a link paragraph, the first node will be an
+        :class:`InlineLink`.
+
+        Returns:
+            True if the paragraph is a link paragraph.
+        """
+        if not self.nodes:
+            return False
+        return (isinstance(self.nodes[0], InlineLink)
+                and (len(self.nodes) == 1
+                     or len(self.nodes) == 2 and self.nodes[1].plain() == "s")
+                )
+
 
 @dataclass
 class Heading(Block):
diff --git a/wikimini/formats/gemtext.py b/wikimini/formats/gemtext.py
index 935565c..39df956 100644
--- a/wikimini/formats/gemtext.py
+++ b/wikimini/formats/gemtext.py
@@ -33,9 +33,9 @@ class Gemtext(Format):
 
     def render_item_list(self, item_list):
         for item in item_list.items:
-            if len(item.nodes) == 1 and isinstance(item.nodes[0], InlineLink):
+            if item.is_link_paragraph():
                 link = item.nodes[0]
-                self.render(BlockLink(link.href, link.title.plain()))
+                self.render(BlockLink(link.href, item.plain()))
             else:
                 self.writer.write("* ")
                 self.render(item)
-- 
cgit v1.2.3


From a114ad49db792ec190a5cb6c96acc47669ac4b03 Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 20 Aug 2021 12:49:36 +0200
Subject: strip template name before looking it up

Some templates seem to be invoked with a trailing space at the end of
the name, which we need to strip before searching our template registry.
---
 wikimini/templates/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/wikimini/templates/__init__.py b/wikimini/templates/__init__.py
index 58a5db8..360b3fa 100644
--- a/wikimini/templates/__init__.py
+++ b/wikimini/templates/__init__.py
@@ -31,6 +31,7 @@ class Registry:
         Returns:
             The template if found, or :any:`None`.
         """
+        name = name.strip()
         # Are templates case-sensitive?
         #   Yes, except usually the first letter.
         # (https://en.wikipedia.org/wiki/Help:A_quick_guide_to_templates#FAQ)
-- 
cgit v1.2.3