From 7871c99a5ef791a5ce24c4b1d016a8b4200baf34 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Thu, 19 Aug 2021 13:10:43 +0200 Subject: Add an internal Document representation Doing everything on strings is kinda wonky, so this adds an intermediate representation. The idea behind this is that the pipeline now goes Wikicode [1]-> Document [2]-> Output String Where step 1 takes care of templates and everything, and step 2 does the actual output formatting. This has the benefit that we can support multiple output types, some with more and some with less features (e.g., adding a Markdown output which keeps some of the original formatting intact), and it has the benefit of being less wonky (no hacks with "" for numbered lists, more streamlined formatting with newlines, ...). --- wikimini/__init__.py | 185 ++++++++--------- wikimini/document.py | 438 ++++++++++++++++++++++++++++++++++++++++ wikimini/templates/cite.py | 3 +- wikimini/templates/convert.py | 9 +- wikimini/templates/language.py | 21 +- wikimini/templates/mainlinks.py | 5 +- wikimini/templates/quotes.py | 11 +- wikimini/templates/various.py | 5 +- 8 files changed, 555 insertions(+), 122 deletions(-) create mode 100644 wikimini/document.py diff --git a/wikimini/__init__.py b/wikimini/__init__.py index b93ef81..fac0c84 100644 --- a/wikimini/__init__.py +++ b/wikimini/__init__.py @@ -4,8 +4,9 @@ import re from tabulate import tabulate -from typing import Union, Tuple +from typing import List, Union, Tuple +from .document import * #: The default API URL, pointing to the english Wikipedia. API_URL = "https://en.wikipedia.org/w/api.php" @@ -77,66 +78,71 @@ class Wikimini: text = revision["slots"]["main"]["content"] return (title, mwp.parse(text)) - def _convert(self, obj): - """Function that does the actual conversion. + def convert( + self, + obj: Union[mwp.wikicode.Wikicode, mwp.nodes.Node], + ) -> Union[Document, List[Node], List[Block]]: + """Function that converts and renders a node. - This is called recursively on each node, and should perform the correct - conversion - based on the node type. + This function is exposed for template implementors, for normal usage, + see :meth:`convert_to_document`. + + The input and output of this function is as follows: + + * If ``obj`` is a :class:`~mwparserfromhell.wikicode.Wikicode`, then + :meth:`convert` will return a :class:`document.Document`. + * If ``obj`` is a :class:`~mwparserfromhell.nodes.Node`, then + :meth:`convert` will return either a list of :class:`document.Node` + or a list of :class:`document.Block`, depending on whether the + converted object is inline (like a link), or a block object (like a + quote). + + Note that in the last case, the empty list ``[]`` might be returned, + indicating that the object should not be included in the output. + + Args: + obj: The object to convert. + + Returns: + The converted object. """ default = lambda obj:\ - mwp.wikicode.Wikicode([obj]).strip_code(collapse=False) + [Plain(mwp.wikicode.Wikicode([obj]).strip_code(collapse=False))] - # This does the actual conversion if isinstance(obj, mwp.wikicode.Wikicode): - converted = [] - iterator = iter(enumerate(obj.nodes)) - for i, node in iterator: - # Pattern: * [[Wikilink]]\n - if (i >= 2 and - i + 1 < len(obj.nodes) and - # Links can have a plural s after them - re.match("s?\n", str(obj.nodes[i+1])) and - isinstance(node, mwp.nodes.wikilink.Wikilink) and - str(obj.nodes[i-1]) == " " and - str(obj.nodes[i-2]) == "*"): - converted.pop() - converted.pop() - _, after = next(iterator) - converted.append("=> {} {}{}".format( - self.page_url(str(node.title)), - self._convert(node), - self._convert(after), - )) - continue - # Pattern: *[[Wikilink]]\n - elif (i >= 1 and - i + 1 < len(obj.nodes) and - re.match("s?\n", str(obj.nodes[i+1])) and - isinstance(node, mwp.nodes.wikilink.Wikilink) and - str(obj.nodes[i-1]) == "*"): - converted.pop() - _, after = next(iterator) - converted.append("=> {} {}{}".format( - self.page_url(str(node.title)), - self._convert(node), - self._convert(after), - )) - continue - - # Default: Just convert the node - converted.append(self._convert(node)) - return "".join(converted) + document = [] + for node in obj.nodes: + current = self.convert(node) + + if current == []: + pass + # Special case: We're starting a list, but we're already in a list + elif (document and len(current) == 1 and + isinstance(current[0], ItemList) and + isinstance(document[-1], ItemList) and + document[-1].ordered == current[0].ordered): + pass + # Special case: We're starting a list! + elif len(current) == 1 and isinstance(current[0], ItemList): + document.extend(current) + elif isinstance(current[0], Block): + document.extend(current) + document.append(Paragraph([])) + elif isinstance(current[0], Node): + for c in current: + insert_into(document, c) + return Document(document) elif isinstance(obj, mwp.nodes.heading.Heading): - return "{} {}\n".format("#" * min(obj.level, 3), obj.title.strip_code()) + return [Heading(obj.level, obj.title.strip_code())] elif isinstance(obj, mwp.nodes.tag.Tag): # Most tags are handled just fine and can be delegated to strip_code # (inline text styles), however we can do a bit better for list tags. if str(obj.wiki_markup) == "*": - return "* " + return [ItemList([], False)] elif str(obj.wiki_markup) == "#": - return " " + return [ItemList([], True)] elif str(obj.tag) == "ref": - return "" + return [] elif str(obj.tag) == "table": rows = [] header = () @@ -151,16 +157,16 @@ class Wikimini: continue if str(node.tag) == "th": row_is_header = True - parsed.append(self._convert(node.contents).strip()) + parsed.append( + self.convert(node.contents).plain().strip() + ) if not row_is_header: rows.append(parsed) else: header = parsed - return "".join([ - "\n```\n", - tabulate(rows, header, tablefmt=self.table_format), - "\n```\n", - ]) + return [ + Verbatim(tabulate(rows, header, tablefmt=self.table_format)) + ] else: return default(obj) elif isinstance(obj, mwp.nodes.template.Template): @@ -175,50 +181,28 @@ class Wikimini: return template(self, obj) elif isinstance(obj, mwp.nodes.wikilink.Wikilink): if str(obj.title).startswith("File:") or str(obj.text).startswith("thumb|"): - return "" + return [] elif str(obj.title).startswith("Category:"): - return "" + return [] else: - return default(obj) + return [InlineLink( + self.page_url(str(obj.title)), + Plain( + extract_plaintext(self.convert(obj.text)) if obj.text + else str(obj.title) + ), + )] else: return default(obj) - def _postprocess(self, gemtext): - # Strip out any more thumbs that have been left. - # This happens because the wikilinks are nested in each other, which the - # parser would only notice after doing the first replacement. We'll just - # take the easy way out here and use a regex to get rid of them. - gemtext = re.sub("^\\[\\[File:.*?\\]\\]$", "", gemtext, flags=re.MULTILINE) - - # Collapse too many empty lines - while "\n\n\n" in gemtext: - gemtext = gemtext.replace("\n\n\n", "\n\n") - - # Shortcut to avoid unnecessary splitting - if "" not in gemtext: - return gemtext - - lines = gemtext.split("\n") - counter = 1 - for idx in range(len(lines)): - line = lines[idx] - if line.startswith(""): - line = line.replace("", str(counter), 1) - lines[idx] = line - counter += 1 - else: - counter = 1 - return "\n".join(lines) - - - def wikicode_to_gemtext( - self, obj: Union[mwp.nodes.Node, mwp.wikicode.Wikicode] - ) -> str: - """Try to turn the given object into a sensible Gemtext representation. + def convert_to_document(self, obj: mwp.wikicode.Wikicode) -> Document: + """Try to turn the given object into a sensible + :class:`~document.Document` representation. - Note that wikicode is much more powerful than Gemtext, so this is a lossy - function. The returned Gemtext tries to mimic the content of the Wikicode - as much as possible (for human consumption). + Note that wikicode is much more powerful than the internal + representation, so this is a lossy function. The returned document tries + to mimic the content of the Wikicode as much as possible (for human + consumption). This function mostly mimics :meth:`~mwparserfromhell.wikicode.Wikicode.strip_code`, with some @@ -228,21 +212,26 @@ class Wikimini: obj: The object to convert. Returns: - The converted Gemtext. + The converted Document. """ # Avoid calling str() on the whole Wikicode here if (isinstance(obj, mwp.wikicode.Wikicode) and str(mwp.wikicode.Wikicode(obj.nodes[:2])) == "#REDIRECT "): + document = Document() title = str(obj.nodes[2].title) if "#" in title: title, section = title.split("#") section = f"Section '{section}'" else: section = "" - return "Redirect:\n=> {} {}\n{}".format( - self.page_url(title), title, section - ) - return self._postprocess(self._convert(obj)) + document.append(BlockLink(self.page_url(title), title)) + if section: + document.append(Paragraph([Plain(section)])) + return document + + document = self.convert(obj) + document.cleanup() + return document # import at the bottom to avoid circular dependencies diff --git a/wikimini/document.py b/wikimini/document.py new file mode 100644 index 0000000..2c901dc --- /dev/null +++ b/wikimini/document.py @@ -0,0 +1,438 @@ +"""The main class of this module is a :class:`Document`, which holds a parsed +and rendered Wikipedia article. + +We distinguish between two kinds of nodes, similar to HTML: +""" +import re +from dataclasses import dataclass, replace +from typing import List, Union + +class Document: + """A rendered Wikipedia article. + + Attributes: + blocks (List[Block]): A list of top-level nodes. + """ + __slots__ = ('blocks',) + + def __init__(self, blocks=None): + self.blocks = [] + if blocks: + self.blocks = blocks + + def __iter__(self): + return iter(self.blocks) + + def append(self, block: "Block"): + """Append a block to the document. + + Args: + block: The block to append. + """ + self.blocks.append(block) + + def cleanup(self): + """Clean up the document by cleaning up every contained block. + + See also :meth:`Block.cleanup`. + """ + for block in self.blocks: + block.cleanup() + self.blocks = [block for block in self.blocks if block] + + def nodes(self) -> List["Node"]: + """Discard the block information and return a list of inner nodes. + + Returns: + A list of all inner nodes. + """ + return [node for block in self.blocks for node in block.to_nodes()] + + def plain(self) -> str: + """Returns the plain text content of this document. + + Returns: + The plain text. + """ + return extract_plaintext(self) + + +@dataclass +class Node: + """Base class for all in-line text elements.""" + + def plain(self) -> str: + """Returns the plain text of this node, stripping all markup. + + Returns: + The plain text. + """ + + def with_text(self, text: str) -> "Node": + """Returns a new node that has the same markup, but the given text. + + Args: + text: The new text. + + Returns: + The new node, usually of the same type as the node this function is + called on. + """ + + def __getitem__(self, index): + if isinstance(index, int): + return self.plain()[index] + elif isinstance(index, slice): + text = self.plain()[index] + return self.with_text(text) + else: + raise TypeError("Node indices must be integers or slices") + + +@dataclass +class Plain(Node): + """A plain text node. + + Attributes: + text: The text content of this node. + """ + __slots__ = ("text",) + text: str + + def plain(self): + return self.text + + def with_text(self, text): + return Plain(text) + + +@dataclass +class Style(Node): + """Text that is styled with inline markup. + + Attributes: + inner: The content. + bold: Whether the text is bold. + italic: Whether the text is cursive. + monospace: Whether the text is monospaced. + """ + __slots__ = ("text", "bold", "italic", "monospace") + inner: Node + bold: bool + italic: bool + monospace: bool + + def plain(self): + return self.inner.plain() + + def with_text(self, text): + return replace(self, inner=self.inner.with_text(text)) + + +@dataclass +class InlineLink(Node): + """An inline link. + + Attributes: + href: The link target. + title: The text that should be shown. + """ + __slots__ = ("href", "title") + href: str + title: Union[Plain, Style] + + def plain(self): + if self.title is None: + return self.href + return self.title.plain() + + def with_text(self, text): + return replace(self, title=self.title.with_text(text)) + + +@dataclass +class Block: + """Base class for all top-level blocks.""" + + def cleanup(self): + """Clean up the content of this block. + + The exact meaning of this is dependent on the type of the block, but it + can involve stripping trailing/leading whitespace or other changes. + + Note that this modifies the block. + """ + + def append(self, node: Node): + """Append the given node to the block. + + Depending on the block, the node can either be inserted as-is (keeping + the markup information), or it is converted to plain text first. + + Args: + node: The node to insert. + """ + + def plain(self) -> str: + """Returns the plain text of this block, stripping all markup. + + Returns: + The plain text. + """ + + def to_nodes(self) -> List[Node]: + """Returns the inner nodes of this block. + + If the block is not made up of nodes, this will create new nodes that + contain the plain text content of this block. + + Returns: + The list of nodes. + """ + return [Plain(self.plain())] + + +@dataclass +class Paragraph(Block): + """A paragraph is a piece of text, which itself can hold inline markup.""" + __slots__ = ("nodes",) + nodes: List[Node] + + def __bool__(self): + return bool(self.nodes) + + def append(self, node): + self.nodes.append(node) + + def plain(self): + return "".join(node.plain() for node in self.nodes) + + def to_nodes(self): + return self.nodes + + def cleanup(self): + while self.nodes and re.match("^\\s+|^$", self.nodes[0].plain()): + self.nodes[0] = self.nodes[0].with_text( + self.nodes[0].plain().lstrip()) + if not self.nodes[0].plain(): + del self.nodes[0] + while self.nodes and re.search("\\s+$|^$", self.nodes[-1].plain()): + self.nodes[-1] = self.nodes[-1].with_text( + self.nodes[-1].plain().rstrip()) + if not self.nodes[-1].plain(): + del self.nodes[-1] + + +@dataclass +class Heading(Block): + """A heading. + + Attributes: + level: The level of the heading. + text: The heading text. + """ + __slots__ = ("level", "text") + level: int + text: str + + def __bool__(self): + return bool(self.text) + + def cleanup(self): + self.text = self.text.strip() + + def append(self, node): + self.text += node.plain() + + def plain(self): + return self.text + + +@dataclass +class Verbatim(Block): + """Text that should appear verbatim in the output, such as code. + + Attributes: + text: The text that should appear. + """ + __slots__ = ("text",) + text: str + + def __bool__(self): + return bool(self.text) + + def append(self, node): + self.text += node.plain() + + def plain(self): + return self.text + + +@dataclass +class ItemList(Block): + """A list of elements. + + Attributes: + items: The list of items. Each item is a list of inline :class:`Node`. + ordered: A flag indicating whether the list should be an ordered + (numbered) list. + """ + __slots__ = ("items", "ordered") + items: List[List[Node]] + ordered: bool + + def __bool__(self): + return bool(self.items) + + def new_item(self): + """Start a new item.""" + self.items.append([]) + + def append(self, node): + if not self.items: + self.new_item() + self.items[-1].append(node) + + def plain(self): + return "\n".join( + "".join(i.plain() for i in item) for item in self.items + ) + + def to_nodes(self): + return [node for item in self.items for node in item] + + def cleanup(self): + i = 0 + while i < len(self.items): + p = Paragraph(self.items[i]) + p.cleanup() + if p: + self.items[i] = p.to_nodes() + i += 1 + else: + del self.items[i] + + +@dataclass +class Blockquote(Block): + """A quote. + + Attributes: + nodes: The content of the blockquote, similar to + :attr:`Paragraph.nodes`. + """ + __slots__ = ("nodes",) + nodes: List[Node] + + def __bool__(self): + return bool(self.nodes) + + def append(self, node): + self.nodes.append(node) + + def plain(self): + return "".join(node.plain() for node in self.nodes) + + def to_nodes(self): + return self.nodes + + +@dataclass +class BlockLink(Block): + """A link on its own line. + + This is important for formats like Gemtext, where inline links will be + discarded. + + Attributes: + href: The target of the link. + title: The link text. + """ + __slots__ = ("href", "title") + href: str + title: str + + def append(self, node): + self.title += node.plain() + + def plain(self): + return self.title + + +def insert_into(blocks: List[Block], node: Node): + """Inserts the given node into the list of blocks. + + The node will always be inserted into the last block. If the list of blocks + is still empty, a fresh :class:`Paragraph` will be started. + + This function takes care of handling newlines properly. That means that a + double newline (``\\n\\n``) will start a new paragraph, and a single + newline (``\\n``) will start a new list item (if the current block is a + list). + + Note that this function will modify the given list of blocks. + + Args: + blocks: The list of blocks. + node: The node to insert. + """ + if not blocks: + blocks.append(Paragraph([])) + + current_block = blocks[-1] + + if isinstance(current_block, Paragraph): + if "\n\n" in node.plain(): + idx = node.plain().index("\n\n") + left = node[:idx] + right = node[idx+2:] + current_block.append(left) + blocks.append(Paragraph([])) + insert_into(blocks, right) + else: + current_block.append(node) + + elif isinstance(current_block, ItemList): + match = re.search("\\n\\n?", node.plain()) + if not match: + current_block.append(node) + else: + left_end, right_start = match.span() + current_block.append(node[:left_end]) + if match.group() == "\n\n": + blocks.append(Paragraph([])) + else: + current_block.new_item() + insert_into(blocks, node[right_start:]) + + else: + current_block.append(node) + + +def extract_plaintext(obj) -> str: + """Tries to extract plaintext from the given object. + + The given object can be one of many things: + + * A list of :class:`Node` + * A list of :class:`Block` + * A single :class:`Node` + * A single :class:`Block` + * A :class:`Document` + + This function is useful if you recursively call + :meth:`wikimini.Wikimini.convert` and want to include the output in + something that only accepts plain text. + """ + if isinstance(obj, Document): + return extract_plaintext(obj.blocks) + + if not isinstance(obj, list): + obj = [obj] + + if not obj: + return "" + elif isinstance(obj[0], Node): + return "".join(node.plain() for node in obj) + elif isinstance(obj[0], Block): + return "\n\n".join(block.plain() for block in obj) diff --git a/wikimini/templates/cite.py b/wikimini/templates/cite.py index ac4f597..e342a5f 100644 --- a/wikimini/templates/cite.py +++ b/wikimini/templates/cite.py @@ -1,5 +1,6 @@ """Citation related templates.""" from . import registry +from ..document import Plain def tmpl_citation(wikimini, obj): @@ -23,7 +24,7 @@ def tmpl_citation(wikimini, obj): names.append(last) elif first: names.append(first) - return "{} ({})".format(title, "; ".join(names)) + return [Plain("{} ({})".format(title, "; ".join(names)))] for name in ["cite", "citation", "cite arXiv", "cite AV media", "cite book", diff --git a/wikimini/templates/convert.py b/wikimini/templates/convert.py index a7a3f44..8bab782 100644 --- a/wikimini/templates/convert.py +++ b/wikimini/templates/convert.py @@ -1,20 +1,21 @@ """Implementations for the unit conversion templates.""" from . import registry +from ..document import Plain def tmpl_convert(wikimini, obj): """Renders the ``{{convert|...}}`` template.""" if str(obj.params[1]) in {"-", "to"}: - return "{0}{3} {1} {2}{3}".format( + return [Plain("{0}{3} {1} {2}{3}".format( obj.params[0].value.strip_code(), obj.params[1].value.strip_code(), obj.params[2].value.strip_code(), obj.params[3].value.strip_code(), - ) - return "{}{}".format( + ))] + return [Plain("{}{}".format( obj.params[0].value.strip_code(), obj.params[1].value.strip_code(), - ) + ))] registry.insert("convert", tmpl_convert) diff --git a/wikimini/templates/language.py b/wikimini/templates/language.py index e8ab738..22320da 100644 --- a/wikimini/templates/language.py +++ b/wikimini/templates/language.py @@ -1,12 +1,15 @@ """Language related templates.""" from . import registry +from ..document import Plain import pycountry def tmpl_ipa(wikimini, obj): """Renders the ``{{IPA|...}}`` template.""" - return "pronounced [{}]".format(wikimini._convert(obj.params[0].value)) + return [Plain("pronounced [{}]".format( + wikimini.convert(obj.params[0].value).plain() + ))] registry.insert("IPA", tmpl_ipa) @@ -14,7 +17,7 @@ registry.insert("IPA", tmpl_ipa) def tmpl_lang(wikimini, obj): """Renders the ``{{Lang|...}}`` template.""" - return wikimini._convert(obj.params[1].value) + return wikimini.convert(obj.params[1].value).nodes() registry.insert("lang", tmpl_lang) @@ -24,18 +27,18 @@ registry.insert("script", tmpl_lang) def tmpl_lang_code(language_name): """Creates a template renderer for a ``{{lang-xx|...}}`` template.""" def inner(wikimini, obj): - return "{}: {}".format( - language_name, wikimini._convert(obj.params[0].value) - ) + return [Plain("{}: {}".format( + language_name, wikimini.convert(obj.params[0].value).plain() + ))] return inner def tmpl_ipa_code(language_name): """Creates a template renderer for a ``{{IPA-xx|...}}`` template.""" def inner(wikimini, obj): - return "{} pronunciation: [{}]".format( - language_name, wikimini._convert(obj.params[0].value) - ) + return [Plain("{} pronunciation: [{}]".format( + language_name, wikimini.convert(obj.params[0].value).plain() + ))] return inner @@ -52,7 +55,7 @@ for language in pycountry.languages: def tmpl_country_flag(country): """Creates a template renderer for ``{{BRA}}`` country flags.""" def inner(wikimini, obj): - return country + return [Plain(country)] return inner diff --git a/wikimini/templates/mainlinks.py b/wikimini/templates/mainlinks.py index ffcbc5e..8c9abbf 100644 --- a/wikimini/templates/mainlinks.py +++ b/wikimini/templates/mainlinks.py @@ -1,14 +1,15 @@ """Renders templates that link to further articles.""" from . import registry +from ..document import Paragraph, Plain, BlockLink def tmpl_main(wikimini, obj): """Renders the ``{{main|...}}`` template.""" links = [ - "=> {} {}".format(wikimini.page_url(str(t.value)), t.value) + BlockLink(wikimini.page_url(str(t.value)), t.value.strip_code()) for t in obj.params ] - return "Main articles:\n{}\n".format("\n".join(links)) + return [Paragraph([Plain("Main articles:")])] + links registry.insert("main", tmpl_main) diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py index b51d92d..39f6fa2 100644 --- a/wikimini/templates/quotes.py +++ b/wikimini/templates/quotes.py @@ -1,15 +1,15 @@ """Renders various quote related templates.""" from . import registry +from ..document import Blockquote def tmpl_quote(wikimini, obj): """Renders the ``{{blockquote|...}}`` template.""" text = obj.get("text", None) if not text: return "" - content = wikimini._convert(text.value) - lines = content.split("\n") - return "\n".join(f"> {line}" for line in lines) + content = wikimini.convert(text.value).nodes() + return [Blockquote(content)] registry.insert("blockquote", tmpl_quote) @@ -19,9 +19,8 @@ registry.insert("quote", tmpl_quote) def tmpl_cquote(wikimini, obj): """Renders the ``{{cquote|...}}`` template.""" text = obj.params[0] - content = wikimini._convert(text.value) - lines = content.split("\n") - return "\n".join(f"> {line}" for line in lines) + content = wikimini.convert(text.value).nodes() + return [Blockquote(content)] registry.insert("cquote", tmpl_cquote) diff --git a/wikimini/templates/various.py b/wikimini/templates/various.py index 8c6e0d5..6076ac4 100644 --- a/wikimini/templates/various.py +++ b/wikimini/templates/various.py @@ -1,16 +1,17 @@ """Various small templates.""" from . import registry +from ..document import Plain def tmpl_reign(wikimini, obj): """Renders the ``{{reign|...}}`` template.""" if not obj.params: - return "r. " + return [Plain("r. ")] first = obj.params[0].value.strip_code().strip() or "?" second = "" if len(obj.params) > 1: second = obj.params[1].value.strip_code().strip() - return f"r. {first} – {second}" + return [Plain(f"r. {first} – {second}")] registry.insert("reign", tmpl_reign) -- cgit v1.2.3 From 05bb70ce36a11550222c718c6e69185b44793ca4 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Thu, 19 Aug 2021 14:24:15 +0200 Subject: implement style fixes suggested by pycodestyle --- wikimini/__init__.py | 53 ++++++++++++++++++++++------------------- wikimini/document.py | 3 ++- wikimini/templates/__init__.py | 6 ++--- wikimini/templates/mainlinks.py | 1 + wikimini/templates/quotes.py | 1 + 5 files changed, 36 insertions(+), 28 deletions(-) diff --git a/wikimini/__init__.py b/wikimini/__init__.py index fac0c84..d6e49a1 100644 --- a/wikimini/__init__.py +++ b/wikimini/__init__.py @@ -1,12 +1,14 @@ import mwparserfromhell as mwp import requests -import re from tabulate import tabulate from typing import List, Union, Tuple -from .document import * +from .document import ( + Plain, BlockLink, InlineLink, Verbatim, Document, Node, Block, ItemList, + Paragraph, Heading, insert_into, extract_plaintext, +) #: The default API URL, pointing to the english Wikipedia. API_URL = "https://en.wikipedia.org/w/api.php" @@ -79,9 +81,9 @@ class Wikimini: return (title, mwp.parse(text)) def convert( - self, - obj: Union[mwp.wikicode.Wikicode, mwp.nodes.Node], - ) -> Union[Document, List[Node], List[Block]]: + self, + obj: Union[mwp.wikicode.Wikicode, mwp.nodes.Node], + ) -> Union[Document, List[Node], List[Block]]: """Function that converts and renders a node. This function is exposed for template implementors, for normal usage, @@ -116,11 +118,12 @@ class Wikimini: if current == []: pass - # Special case: We're starting a list, but we're already in a list - elif (document and len(current) == 1 and - isinstance(current[0], ItemList) and - isinstance(document[-1], ItemList) and - document[-1].ordered == current[0].ordered): + # Special case: We're starting a list, but we're already in a + # list + elif (document and len(current) == 1 + and isinstance(current[0], ItemList) + and isinstance(document[-1], ItemList) + and document[-1].ordered == current[0].ordered): pass # Special case: We're starting a list! elif len(current) == 1 and isinstance(current[0], ItemList): @@ -135,8 +138,9 @@ class Wikimini: elif isinstance(obj, mwp.nodes.heading.Heading): return [Heading(obj.level, obj.title.strip_code())] elif isinstance(obj, mwp.nodes.tag.Tag): - # Most tags are handled just fine and can be delegated to strip_code - # (inline text styles), however we can do a bit better for list tags. + # Most tags are handled just fine and can be delegated to + # strip_code (inline text styles), however we can do a bit better + # for list tags. if str(obj.wiki_markup) == "*": return [ItemList([], False)] elif str(obj.wiki_markup) == "#": @@ -164,15 +168,15 @@ class Wikimini: rows.append(parsed) else: header = parsed - return [ - Verbatim(tabulate(rows, header, tablefmt=self.table_format)) - ] + return [Verbatim( + tabulate(rows, header, tablefmt=self.table_format) + )] else: return default(obj) elif isinstance(obj, mwp.nodes.template.Template): - # Most templates are handled fine (and completely stripped), however, - # some of them are useful and provide some output that we should mimic - # (for example, the convert template). + # Most templates are handled fine (and completely stripped), + # however, some of them are useful and provide some output that we + # should mimic (for example, the convert template). name = str(obj.name) template = templates.registry.get(name) if template is None: @@ -180,7 +184,8 @@ class Wikimini: else: return template(self, obj) elif isinstance(obj, mwp.nodes.wikilink.Wikilink): - if str(obj.title).startswith("File:") or str(obj.text).startswith("thumb|"): + if (str(obj.title).startswith("File:") + or str(obj.text).startswith("thumb|")): return [] elif str(obj.title).startswith("Category:"): return [] @@ -200,9 +205,9 @@ class Wikimini: :class:`~document.Document` representation. Note that wikicode is much more powerful than the internal - representation, so this is a lossy function. The returned document tries - to mimic the content of the Wikicode as much as possible (for human - consumption). + representation, so this is a lossy function. The returned document + tries to mimic the content of the Wikicode as much as possible (for + human consumption). This function mostly mimics :meth:`~mwparserfromhell.wikicode.Wikicode.strip_code`, with some @@ -215,8 +220,8 @@ class Wikimini: The converted Document. """ # Avoid calling str() on the whole Wikicode here - if (isinstance(obj, mwp.wikicode.Wikicode) and - str(mwp.wikicode.Wikicode(obj.nodes[:2])) == "#REDIRECT "): + if (isinstance(obj, mwp.wikicode.Wikicode) + and str(mwp.wikicode.Wikicode(obj.nodes[:2])) == "#REDIRECT "): document = Document() title = str(obj.nodes[2].title) if "#" in title: diff --git a/wikimini/document.py b/wikimini/document.py index 2c901dc..be51108 100644 --- a/wikimini/document.py +++ b/wikimini/document.py @@ -7,6 +7,7 @@ import re from dataclasses import dataclass, replace from typing import List, Union + class Document: """A rendered Wikipedia article. @@ -385,7 +386,7 @@ def insert_into(blocks: List[Block], node: Node): if "\n\n" in node.plain(): idx = node.plain().index("\n\n") left = node[:idx] - right = node[idx+2:] + right = node[idx + 2:] current_block.append(left) blocks.append(Paragraph([])) insert_into(blocks, right) diff --git a/wikimini/templates/__init__.py b/wikimini/templates/__init__.py index 9e983e1..58a5db8 100644 --- a/wikimini/templates/__init__.py +++ b/wikimini/templates/__init__.py @@ -2,9 +2,9 @@ This module contains functions that mimic Wikipedia's templates. -A template is a function that takes the :class:`~wikimini.Wikimini` instance and the -:class:`~mwparserfromhell.nodes.template.Template` node to convert, and returns -a string with the template output (see :const:`Template`). +A template is a function that takes the :class:`~wikimini.Wikimini` instance +and the :class:`~mwparserfromhell.nodes.template.Template` node to convert, and +returns a string with the template output (see :const:`Template`). """ from typing import Callable, Optional diff --git a/wikimini/templates/mainlinks.py b/wikimini/templates/mainlinks.py index 8c9abbf..3d945ed 100644 --- a/wikimini/templates/mainlinks.py +++ b/wikimini/templates/mainlinks.py @@ -3,6 +3,7 @@ from . import registry from ..document import Paragraph, Plain, BlockLink + def tmpl_main(wikimini, obj): """Renders the ``{{main|...}}`` template.""" links = [ diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py index 39f6fa2..fdf00cc 100644 --- a/wikimini/templates/quotes.py +++ b/wikimini/templates/quotes.py @@ -3,6 +3,7 @@ from . import registry from ..document import Blockquote + def tmpl_quote(wikimini, obj): """Renders the ``{{blockquote|...}}`` template.""" text = obj.get("text", None) -- cgit v1.2.3 From 635f53e9f82ede414f97087510043ec62d41468c Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 11:03:03 +0200 Subject: Rename Blockquote to BlockQuote This keeps it more in line with BlockLink. Also, this adds a LineBreak block, which templates can use to enfore extra line breaks. --- wikimini/document.py | 11 ++++++++++- wikimini/templates/quotes.py | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/wikimini/document.py b/wikimini/document.py index be51108..fe979fc 100644 --- a/wikimini/document.py +++ b/wikimini/document.py @@ -193,6 +193,15 @@ class Block: return [Plain(self.plain())] +@dataclass +class LineBreak: + """Represents an enforced empty line.""" + __slots__ = () + + def plain(self): + return "\n" + + @dataclass class Paragraph(Block): """A paragraph is a piece of text, which itself can hold inline markup.""" @@ -315,7 +324,7 @@ class ItemList(Block): @dataclass -class Blockquote(Block): +class BlockQuote(Block): """A quote. Attributes: diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py index fdf00cc..ef7f297 100644 --- a/wikimini/templates/quotes.py +++ b/wikimini/templates/quotes.py @@ -1,7 +1,7 @@ """Renders various quote related templates.""" from . import registry -from ..document import Blockquote +from ..document import BlockQuote def tmpl_quote(wikimini, obj): @@ -10,7 +10,7 @@ def tmpl_quote(wikimini, obj): if not text: return "" content = wikimini.convert(text.value).nodes() - return [Blockquote(content)] + return [BlockQuote(content)] registry.insert("blockquote", tmpl_quote) @@ -21,7 +21,7 @@ def tmpl_cquote(wikimini, obj): """Renders the ``{{cquote|...}}`` template.""" text = obj.params[0] content = wikimini.convert(text.value).nodes() - return [Blockquote(content)] + return [BlockQuote(content)] registry.insert("cquote", tmpl_cquote) -- cgit v1.2.3 From 1a05603d0e77a6aa786d1b9fb05003a4161a486b Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 11:03:36 +0200 Subject: add Format base class --- wikimini/formats/__init__.py | 148 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 wikimini/formats/__init__.py diff --git a/wikimini/formats/__init__.py b/wikimini/formats/__init__.py new file mode 100644 index 0000000..8d6296c --- /dev/null +++ b/wikimini/formats/__init__.py @@ -0,0 +1,148 @@ +"""The formats are responsible for turning a +:class:`~wikimini.document.Document` into an output string. + +Formats work by being given a file-like buffer as argument, into which the +output should be written. +""" +from typing import TextIO + +from ..document import ( + Document, Block, BlockLink, BlockQuote, Heading, ItemList, LineBreak, + Paragraph, Verbatim, Node, InlineLink, Plain, Style, +) + + +class Format: + """:class:`Format` is the base class for all output formats. + + Any output format should inherit from this class and override the specific + output methods. Note that by default, no output is generated. + + The methods :meth:`render_document`, :meth:`render_block` and + :meth:`render_node` have sensible default implementations that dispatch to + the more specific rendering methods. + + Attributes: + writer: The file-like object that output should be written to. + """ + writer: TextIO + + def __init__(self, writer: TextIO): + self.writer = writer + + def render_document(self, document: Document): + """Renders the given document. + + Args: + document: The document to render. + """ + for block in document: + self.render_block(block) + + def render_block(self, block: Block): + """Renders a single block. + + Args: + block: The block to render. + """ + if isinstance(block, BlockLink): + self.render_block_link(block) + elif isinstance(block, BlockQuote): + self.render_block_quote(block) + elif isinstance(block, Heading): + self.render_heading(block) + elif isinstance(block, ItemList): + self.render_item_list(block) + elif isinstance(block, LineBreak): + self.render_line_break(block) + elif isinstance(block, Paragraph): + self.render_paragraph(block) + elif isinstance(block, Verbatim): + self.render_verbatim(block) + else: + raise TypeError(f"Unknown Block type given: {type(block)}") + + def render_block_link(self, block_link: BlockLink): + """Renders a :class:`~wikimini.document.BlockLink`. + + Args: + block_link: The block link to render. + """ + + def render_block_quote(self, block_quote: BlockQuote): + """Renders a :class:`~wikimini.document.BlockQuote`. + + Args: + block_quote: The block quote to render. + """ + + def render_heading(self, heading: Heading): + """Renders a :class:`~wikimini.document.Heading`. + + Args: + heading: The heading to render. + """ + + def render_item_list(self, item_list: ItemList): + """Renders a :class:`~wikimini.document.ItemList`. + + Args: + item_list: The item list to render. + """ + + def render_line_break(self, line_break: LineBreak): + """Renders a :class:`~wikimini.document.LineBreak`. + + Args: + line_break: The line break to render. + """ + + def render_paragraph(self, paragraph: Paragraph): + """Renders a :class:`~wikimini.document.Paragraph`. + + Args: + paragraph: The paragraph to render. + """ + + def render_verbatim(self, verbatim: Verbatim): + """Renders a :class:`~wikimini.document.Verbatim`. + + Args: + verbatim: The verbatim to render. + """ + + def render_node(self, node: Node): + """Renders a single node. + + Args: + node: The node to render. + """ + if isinstance(node, InlineLink): + self.render_inline_link(node) + elif isinstance(node, Plain): + self.render_plain(node) + elif isinstance(node, Style): + self.render_style(node) + else: + raise TypeError(f"Unknown node type: {type(node)}") + + def render_inline_link(self, inline_link: InlineLink): + """Renders a :class:`~wikimini.document.InlineLink`. + + Args: + inline_link: The inline link to render. + """ + + def render_plain(self, plain: Plain): + """Renders a :class:`~wikimini.document.Plain`. + + Args: + plain: The plain text to render. + """ + + def render_style(self, style: Style): + """Renders a :class:`~wikimini.document.Style`. + + Args: + style: The styled text to render. + """ -- cgit v1.2.3 From 9b5b2bda1e43e659f142bb88a6b8138962e754e9 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 11:10:14 +0200 Subject: Rework ItemList/BlockQuote to hold Paragraph A List[Node] is basically a Paragraph, and we already delegated some of the methods to Paragraph (see ItemList.cleanup). Therefore, it only made sense to rework ItemList and BlockQuote to hold a Paragraph instead of a List[Node]. --- wikimini/document.py | 28 ++++++++++++---------------- wikimini/templates/quotes.py | 6 +++--- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/wikimini/document.py b/wikimini/document.py index fe979fc..7c459b6 100644 --- a/wikimini/document.py +++ b/wikimini/document.py @@ -283,12 +283,12 @@ class ItemList(Block): """A list of elements. Attributes: - items: The list of items. Each item is a list of inline :class:`Node`. + items: The list of items. Each item is a :class:`Paragraph`. ordered: A flag indicating whether the list should be an ordered (numbered) list. """ __slots__ = ("items", "ordered") - items: List[List[Node]] + items: List[Paragraph] ordered: bool def __bool__(self): @@ -296,7 +296,7 @@ class ItemList(Block): def new_item(self): """Start a new item.""" - self.items.append([]) + self.items.append(Paragraph([])) def append(self, node): if not self.items: @@ -304,20 +304,17 @@ class ItemList(Block): self.items[-1].append(node) def plain(self): - return "\n".join( - "".join(i.plain() for i in item) for item in self.items - ) + return "\n".join(paragraph.plain() for paragraph in self.items) def to_nodes(self): - return [node for item in self.items for node in item] + return [node for item in self.items for node in item.nodes] def cleanup(self): i = 0 while i < len(self.items): - p = Paragraph(self.items[i]) - p.cleanup() - if p: - self.items[i] = p.to_nodes() + paragraph = self.items[i] + paragraph.cleanup() + if paragraph: i += 1 else: del self.items[i] @@ -328,11 +325,10 @@ class BlockQuote(Block): """A quote. Attributes: - nodes: The content of the blockquote, similar to - :attr:`Paragraph.nodes`. + nodes: The content of the blockquote. """ __slots__ = ("nodes",) - nodes: List[Node] + nodes: Paragraph def __bool__(self): return bool(self.nodes) @@ -341,10 +337,10 @@ class BlockQuote(Block): self.nodes.append(node) def plain(self): - return "".join(node.plain() for node in self.nodes) + return self.nodes.plain() def to_nodes(self): - return self.nodes + return self.nodes.to_nodes() @dataclass diff --git a/wikimini/templates/quotes.py b/wikimini/templates/quotes.py index ef7f297..00b82fb 100644 --- a/wikimini/templates/quotes.py +++ b/wikimini/templates/quotes.py @@ -1,7 +1,7 @@ """Renders various quote related templates.""" from . import registry -from ..document import BlockQuote +from ..document import BlockQuote, Paragraph def tmpl_quote(wikimini, obj): @@ -10,7 +10,7 @@ def tmpl_quote(wikimini, obj): if not text: return "" content = wikimini.convert(text.value).nodes() - return [BlockQuote(content)] + return [BlockQuote(Paragraph(content))] registry.insert("blockquote", tmpl_quote) @@ -21,7 +21,7 @@ def tmpl_cquote(wikimini, obj): """Renders the ``{{cquote|...}}`` template.""" text = obj.params[0] content = wikimini.convert(text.value).nodes() - return [BlockQuote(content)] + return [BlockQuote(Paragraph(content))] registry.insert("cquote", tmpl_cquote) -- cgit v1.2.3 From a05368c8c8f9b97d727dc8d2efcf847743b29f66 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 11:58:42 +0200 Subject: implement Gemtext format --- wikimini/document.py | 16 ++++++------ wikimini/formats/__init__.py | 47 ++++++++++++++++++++++++++++++++--- wikimini/formats/gemtext.py | 58 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 12 deletions(-) create mode 100644 wikimini/formats/gemtext.py diff --git a/wikimini/document.py b/wikimini/document.py index 7c459b6..1e18a8b 100644 --- a/wikimini/document.py +++ b/wikimini/document.py @@ -325,22 +325,22 @@ class BlockQuote(Block): """A quote. Attributes: - nodes: The content of the blockquote. + content: The content of the blockquote. """ - __slots__ = ("nodes",) - nodes: Paragraph + __slots__ = ("content",) + content: Paragraph def __bool__(self): - return bool(self.nodes) + return bool(self.content) def append(self, node): - self.nodes.append(node) + self.content.append(node) def plain(self): - return self.nodes.plain() + return self.content.plain() - def to_nodes(self): - return self.nodes.to_nodes() + def to_content(self): + return self.content.to_nodes() @dataclass diff --git a/wikimini/formats/__init__.py b/wikimini/formats/__init__.py index 8d6296c..b48486a 100644 --- a/wikimini/formats/__init__.py +++ b/wikimini/formats/__init__.py @@ -4,7 +4,8 @@ Formats work by being given a file-like buffer as argument, into which the output should be written. """ -from typing import TextIO +import io +from typing import TextIO, Union from ..document import ( Document, Block, BlockLink, BlockQuote, Heading, ItemList, LineBreak, @@ -18,9 +19,9 @@ class Format: Any output format should inherit from this class and override the specific output methods. Note that by default, no output is generated. - The methods :meth:`render_document`, :meth:`render_block` and - :meth:`render_node` have sensible default implementations that dispatch to - the more specific rendering methods. + The methods :meth:`render`, :meth:`render_document`, :meth:`render_block` + and :meth:`render_node` have sensible default implementations that dispatch + to the more specific rendering methods. Attributes: writer: The file-like object that output should be written to. @@ -30,6 +31,21 @@ class Format: def __init__(self, writer: TextIO): self.writer = writer + def render(self, obj: Union[Document, Block, Node]): + """Renders the given object. + + Args: + obj: The object to render. + """ + if isinstance(obj, Document): + self.render_document(obj) + elif isinstance(obj, Block): + self.render_block(obj) + elif isinstance(obj, Node): + self.render_node(obj) + else: + raise TypeError(f"Cannot render {obj}, unknown type") + def render_document(self, document: Document): """Renders the given document. @@ -146,3 +162,26 @@ class Format: Args: style: The styled text to render. """ + + +def as_string(formatter: Format, obj: Union[Document, Node, Block]) -> str: + """Runs the given format function and returns the result as a string. + + This temporarily replaces the output writer by an in-memory string object, + runs the render function and then restores the writer. + + Args: + formatter: The formatter to run. + obj: The object to render. + + Returns: + The content, as string. + """ + old_writer = formatter.writer + buffer = io.StringIO() + formatter.writer = buffer + try: + formatter.render(obj) + finally: + formatter.writer = old_writer + return buffer.getvalue() diff --git a/wikimini/formats/gemtext.py b/wikimini/formats/gemtext.py new file mode 100644 index 0000000..935565c --- /dev/null +++ b/wikimini/formats/gemtext.py @@ -0,0 +1,58 @@ +"""This module contains a Gemtext formatter for +:class:`~wikimini.document.Document`. +""" +from itertools import zip_longest +from . import Format, as_string +from ..document import LineBreak, BlockLink, InlineLink + + +class Gemtext(Format): + """The Gemtext formatter.""" + + def render_document(self, document): + for block, next_block in zip_longest( + document.blocks, document.blocks[1:]): + self.render_block(block) + if not isinstance(next_block, (LineBreak, BlockLink)): + self.writer.write("\n") + + def render_block_link(self, block_link): + self.writer.write(f"=> {block_link.href} {block_link.title}\n") + + def render_block_quote(self, block_quote): + content = as_string(self, block_quote.content) + for line in content.split("\n"): + self.writer.write(f"> {line}\n") + + def render_heading(self, heading): + level = min(3, heading.level) + self.writer.write("#" * level + f" {heading.text}\n") + + def render_inline_link(self, inline_link): + self.render(inline_link.title) + + def render_item_list(self, item_list): + for item in item_list.items: + if len(item.nodes) == 1 and isinstance(item.nodes[0], InlineLink): + link = item.nodes[0] + self.render(BlockLink(link.href, link.title.plain())) + else: + self.writer.write("* ") + self.render(item) + + def render_line_break(self, _): + self.writer.write("\n") + + def render_paragraph(self, paragraph): + for node in paragraph.nodes: + self.render(node) + self.writer.write("\n") + + def render_plain(self, plain): + self.writer.write(plain.text) + + def render_style(self, style): + self.render(style.inner) + + def render_verbatim(self, verbatim): + self.writer.write(f"```\n{verbatim.text}\n```\n") -- cgit v1.2.3 From d6e7479fb0b845415c9d1bdcc42936a4f36dde39 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 12:35:03 +0200 Subject: properly strip File: links that got through --- wikimini/document.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/wikimini/document.py b/wikimini/document.py index 1e18a8b..a7da2a2 100644 --- a/wikimini/document.py +++ b/wikimini/document.py @@ -80,6 +80,9 @@ class Node: called on. """ + def __len__(self): + return len(self.plain()) + def __getitem__(self, index): if isinstance(index, int): return self.plain()[index] @@ -208,6 +211,16 @@ class Paragraph(Block): __slots__ = ("nodes",) nodes: List[Node] + def _find_index(self, idx): + offset = 0 + for i, node in enumerate(self.nodes): + if idx < offset + len(node): + return (i, idx - offset) + offset += len(node) + if idx == offset: + return (i, len(node)) + raise IndexError(f"{idx} is out of range") + def __bool__(self): return bool(self.nodes) @@ -221,6 +234,19 @@ class Paragraph(Block): return self.nodes def cleanup(self): + # There is a chance that some "thumbnail" links will get through + # (mainly if their text also contains links, in which case it'd require + # multiple parsing passes). As a quick and dirty fix, we just delete + # that stuff here: + while match := re.search("\\[\\[File:.+?\\]\\]", self.plain()): + start_node, start_pos = self._find_index(match.start()) + end_node, end_pos = self._find_index(match.end()) + + new_start = self.nodes[start_node][:start_pos] + new_end = self.nodes[end_node][end_pos:] + self.nodes[start_node:end_node + 1] = [new_start, new_end] + + # Strip leading and trailing whitespace while self.nodes and re.match("^\\s+|^$", self.nodes[0].plain()): self.nodes[0] = self.nodes[0].with_text( self.nodes[0].plain().lstrip()) -- cgit v1.2.3 From 22029400ef35ee7eb85bf5d89562738d65a38e75 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 12:45:57 +0200 Subject: fix handling of link items with trailing plural s --- wikimini/document.py | 20 ++++++++++++++++++++ wikimini/formats/gemtext.py | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/wikimini/document.py b/wikimini/document.py index a7da2a2..a363c25 100644 --- a/wikimini/document.py +++ b/wikimini/document.py @@ -258,6 +258,26 @@ class Paragraph(Block): if not self.nodes[-1].plain(): del self.nodes[-1] + def is_link_paragraph(self) -> bool: + """Returns whether the paragraph can be considered a "link item". + + A link item is a paragraph that only consists of a link (and + potentially a plural identifier), usually found in the "See also" + section on Wikipedia. + + In case of a link paragraph, the first node will be an + :class:`InlineLink`. + + Returns: + True if the paragraph is a link paragraph. + """ + if not self.nodes: + return False + return (isinstance(self.nodes[0], InlineLink) + and (len(self.nodes) == 1 + or len(self.nodes) == 2 and self.nodes[1].plain() == "s") + ) + @dataclass class Heading(Block): diff --git a/wikimini/formats/gemtext.py b/wikimini/formats/gemtext.py index 935565c..39df956 100644 --- a/wikimini/formats/gemtext.py +++ b/wikimini/formats/gemtext.py @@ -33,9 +33,9 @@ class Gemtext(Format): def render_item_list(self, item_list): for item in item_list.items: - if len(item.nodes) == 1 and isinstance(item.nodes[0], InlineLink): + if item.is_link_paragraph(): link = item.nodes[0] - self.render(BlockLink(link.href, link.title.plain())) + self.render(BlockLink(link.href, item.plain())) else: self.writer.write("* ") self.render(item) -- cgit v1.2.3 From a114ad49db792ec190a5cb6c96acc47669ac4b03 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 20 Aug 2021 12:49:36 +0200 Subject: strip template name before looking it up Some templates seem to be invoked with a trailing space at the end of the name, which we need to strip before searching our template registry. --- wikimini/templates/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wikimini/templates/__init__.py b/wikimini/templates/__init__.py index 58a5db8..360b3fa 100644 --- a/wikimini/templates/__init__.py +++ b/wikimini/templates/__init__.py @@ -31,6 +31,7 @@ class Registry: Returns: The template if found, or :any:`None`. """ + name = name.strip() # Are templates case-sensitive? # Yes, except usually the first letter. # (https://en.wikipedia.org/wiki/Help:A_quick_guide_to_templates#FAQ) -- cgit v1.2.3