Move MarkdownConverter subclass to separate module

author: Numerlor <[email protected]> 2020-07-26 15:11:45 +0200
committer: Numerlor <[email protected]> 2020-07-26 15:11:45 +0200
commit: 13030b8c54dd2ed37047349c5b09e4ded2c83391 (patch)
tree: ce7b816826960b3348144dd817706384837f021f
parent: Fix markdownify's handling of h tags. (diff)
2 files changed, 60 insertions, 57 deletions
diff --git a/bot/cogs/doc/markdown.py b/bot/cogs/doc/markdown.py
new file mode 100644
index 000000000..dca477d35
--- /dev/null
+++ b/bot/cogs/doc/markdown.py
@@ -0,0 +1,58 @@
+from urllib.parse import urljoin
+
+from bs4.element import PageElement
+from markdownify import MarkdownConverter
+
+
+class _DocMarkdownConverter(MarkdownConverter):
+    """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+
+    def __init__(self, *, page_url: str, **options):
+        super().__init__(**options)
+        self.page_url = page_url
+
+    def convert_li(self, el: PageElement, text: str) -> str:
+        """Fix markdownify's erroneous indexing in ol tags."""
+        parent = el.parent
+        if parent is not None and parent.name == 'ol':
+            li_tags = parent.find_all("li")
+            bullet = '%s.' % (li_tags.index(el)+1)
+        else:
+            depth = -1
+            while el:
+                if el.name == 'ul':
+                    depth += 1
+                el = el.parent
+            bullets = self.options['bullets']
+            bullet = bullets[depth % len(bullets)]
+        return '%s %s\n' % (bullet, text or '')
+
+    def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
+        """Convert h tags to bold text with ** instead of adding #."""
+        return f"**{text}**\n\n"
+
+    def convert_code(self, el: PageElement, text: str) -> str:
+        """Undo `markdownify`s underscore escaping."""
+        return f"`{text}`".replace('\\', '')
+
+    def convert_pre(self, el: PageElement, text: str) -> str:
+        """Wrap any codeblocks in `py` for syntax highlighting."""
+        code = ''.join(el.strings)
+        return f"```py\n{code}```"
+
+    def convert_a(self, el: PageElement, text: str) -> str:
+        """Resolve relative URLs to `self.page_url`."""
+        el["href"] = urljoin(self.page_url, el["href"])
+        return super().convert_a(el, text)
+
+    def convert_p(self, el: PageElement, text: str) -> str:
+        """Include only one newline instead of two when the parent is a li tag."""
+        parent = el.parent
+        if parent is not None and parent.name == "li":
+            return f"{text}\n"
+        return super().convert_p(el, text)
+
+
+def markdownify(html: str, *, url: str = "") -> str:
+    """Create a DocMarkdownConverter object from the input html."""
+    return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index ac8a94e3f..93daf3faf 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -4,15 +4,14 @@ import string
 import textwrap
 from functools import partial
 from typing import Callable, List, Optional, TYPE_CHECKING, Tuple, Union
-from urllib.parse import urljoin
 
 from aiohttp import ClientSession
 from bs4 import BeautifulSoup
 from bs4.element import NavigableString, PageElement, Tag
-from markdownify import MarkdownConverter
 
 from .cache import async_cache
 from .html import Strainer
+from .markdown import markdownify
 if TYPE_CHECKING:
     from .cog import DocItem
 
@@ -42,60 +41,6 @@ _NO_SIGNATURE_GROUPS = {
 }
 
 
-class _DocMarkdownConverter(MarkdownConverter):
-    """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
-
-    def __init__(self, *, page_url: str, **options):
-        super().__init__(**options)
-        self.page_url = page_url
-
-    def convert_li(self, el: PageElement, text: str) -> str:
-        """Fix markdownify's erroneous indexing in ol tags."""
-        parent = el.parent
-        if parent is not None and parent.name == 'ol':
-            li_tags = parent.find_all("li")
-            bullet = '%s.' % (li_tags.index(el)+1)
-        else:
-            depth = -1
-            while el:
-                if el.name == 'ul':
-                    depth += 1
-                el = el.parent
-            bullets = self.options['bullets']
-            bullet = bullets[depth % len(bullets)]
-        return '%s %s\n' % (bullet, text or '')
-
-    def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
-        """Convert h tags to bold text with ** instead of adding #."""
-        return f"**{text}**\n\n"
-
-    def convert_code(self, el: PageElement, text: str) -> str:
-        """Undo `markdownify`s underscore escaping."""
-        return f"`{text}`".replace('\\', '')
-
-    def convert_pre(self, el: PageElement, text: str) -> str:
-        """Wrap any codeblocks in `py` for syntax highlighting."""
-        code = ''.join(el.strings)
-        return f"```py\n{code}```"
-
-    def convert_a(self, el: PageElement, text: str) -> str:
-        """Resolve relative URLs to `self.page_url`."""
-        el["href"] = urljoin(self.page_url, el["href"])
-        return super().convert_a(el, text)
-
-    def convert_p(self, el: PageElement, text: str) -> str:
-        """Include only one newline instead of two when the parent is a li tag."""
-        parent = el.parent
-        if parent is not None and parent.name == "li":
-            return f"{text}\n"
-        return super().convert_p(el, text)
-
-
-def _markdownify(html: str, *, url: str = "") -> str:
-    """Create a DocMarkdownConverter object from the input html."""
-    return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
-
-
 def _find_elements_until_tag(
         start_element: PageElement,
         tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
@@ -215,7 +160,7 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: str, url:
     The signatures are wrapped in python codeblocks, separated from the description by a newline.
     The result string is truncated to be max 1000 symbols long.
     """
-    description = _truncate_markdown(_markdownify(description, url=url), 1000)
+    description = _truncate_markdown(markdownify(description, url=url), 1000)
     description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
     if signatures is not None:
         formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
author	Numerlor <[email protected]>	2020-07-26 15:11:45 +0200
committer	Numerlor <[email protected]>	2020-07-26 15:11:45 +0200
commit	13030b8c54dd2ed37047349c5b09e4ded2c83391 (patch)
tree	ce7b816826960b3348144dd817706384837f021f
parent	Fix markdownify's handling of h tags. (diff)