aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2020-07-26 15:11:45 +0200
committerGravatar Numerlor <[email protected]>2020-07-26 15:11:45 +0200
commit13030b8c54dd2ed37047349c5b09e4ded2c83391 (patch)
treece7b816826960b3348144dd817706384837f021f
parentFix markdownify's handling of h tags. (diff)
Move MarkdownConverter subclass to separate module
-rw-r--r--bot/cogs/doc/markdown.py58
-rw-r--r--bot/cogs/doc/parsing.py59
2 files changed, 60 insertions, 57 deletions
diff --git a/bot/cogs/doc/markdown.py b/bot/cogs/doc/markdown.py
new file mode 100644
index 000000000..dca477d35
--- /dev/null
+++ b/bot/cogs/doc/markdown.py
@@ -0,0 +1,58 @@
+from urllib.parse import urljoin
+
+from bs4.element import PageElement
+from markdownify import MarkdownConverter
+
+
+class _DocMarkdownConverter(MarkdownConverter):
+ """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+
+ def __init__(self, *, page_url: str, **options):
+ super().__init__(**options)
+ self.page_url = page_url
+
+ def convert_li(self, el: PageElement, text: str) -> str:
+ """Fix markdownify's erroneous indexing in ol tags."""
+ parent = el.parent
+ if parent is not None and parent.name == 'ol':
+ li_tags = parent.find_all("li")
+ bullet = '%s.' % (li_tags.index(el)+1)
+ else:
+ depth = -1
+ while el:
+ if el.name == 'ul':
+ depth += 1
+ el = el.parent
+ bullets = self.options['bullets']
+ bullet = bullets[depth % len(bullets)]
+ return '%s %s\n' % (bullet, text or '')
+
+ def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
+ """Convert h tags to bold text with ** instead of adding #."""
+ return f"**{text}**\n\n"
+
+ def convert_code(self, el: PageElement, text: str) -> str:
+ """Undo `markdownify`s underscore escaping."""
+ return f"`{text}`".replace('\\', '')
+
+ def convert_pre(self, el: PageElement, text: str) -> str:
+ """Wrap any codeblocks in `py` for syntax highlighting."""
+ code = ''.join(el.strings)
+ return f"```py\n{code}```"
+
+ def convert_a(self, el: PageElement, text: str) -> str:
+ """Resolve relative URLs to `self.page_url`."""
+ el["href"] = urljoin(self.page_url, el["href"])
+ return super().convert_a(el, text)
+
+ def convert_p(self, el: PageElement, text: str) -> str:
+ """Include only one newline instead of two when the parent is a li tag."""
+ parent = el.parent
+ if parent is not None and parent.name == "li":
+ return f"{text}\n"
+ return super().convert_p(el, text)
+
+
+def markdownify(html: str, *, url: str = "") -> str:
+ """Create a DocMarkdownConverter object from the input html."""
+ return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index ac8a94e3f..93daf3faf 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -4,15 +4,14 @@ import string
import textwrap
from functools import partial
from typing import Callable, List, Optional, TYPE_CHECKING, Tuple, Union
-from urllib.parse import urljoin
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
-from markdownify import MarkdownConverter
from .cache import async_cache
from .html import Strainer
+from .markdown import markdownify
if TYPE_CHECKING:
from .cog import DocItem
@@ -42,60 +41,6 @@ _NO_SIGNATURE_GROUPS = {
}
-class _DocMarkdownConverter(MarkdownConverter):
- """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
-
- def __init__(self, *, page_url: str, **options):
- super().__init__(**options)
- self.page_url = page_url
-
- def convert_li(self, el: PageElement, text: str) -> str:
- """Fix markdownify's erroneous indexing in ol tags."""
- parent = el.parent
- if parent is not None and parent.name == 'ol':
- li_tags = parent.find_all("li")
- bullet = '%s.' % (li_tags.index(el)+1)
- else:
- depth = -1
- while el:
- if el.name == 'ul':
- depth += 1
- el = el.parent
- bullets = self.options['bullets']
- bullet = bullets[depth % len(bullets)]
- return '%s %s\n' % (bullet, text or '')
-
- def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
- """Convert h tags to bold text with ** instead of adding #."""
- return f"**{text}**\n\n"
-
- def convert_code(self, el: PageElement, text: str) -> str:
- """Undo `markdownify`s underscore escaping."""
- return f"`{text}`".replace('\\', '')
-
- def convert_pre(self, el: PageElement, text: str) -> str:
- """Wrap any codeblocks in `py` for syntax highlighting."""
- code = ''.join(el.strings)
- return f"```py\n{code}```"
-
- def convert_a(self, el: PageElement, text: str) -> str:
- """Resolve relative URLs to `self.page_url`."""
- el["href"] = urljoin(self.page_url, el["href"])
- return super().convert_a(el, text)
-
- def convert_p(self, el: PageElement, text: str) -> str:
- """Include only one newline instead of two when the parent is a li tag."""
- parent = el.parent
- if parent is not None and parent.name == "li":
- return f"{text}\n"
- return super().convert_p(el, text)
-
-
-def _markdownify(html: str, *, url: str = "") -> str:
- """Create a DocMarkdownConverter object from the input html."""
- return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
-
-
def _find_elements_until_tag(
start_element: PageElement,
tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
@@ -215,7 +160,7 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: str, url:
The signatures are wrapped in python codeblocks, separated from the description by a newline.
The result string is truncated to be max 1000 symbols long.
"""
- description = _truncate_markdown(_markdownify(description, url=url), 1000)
+ description = _truncate_markdown(markdownify(description, url=url), 1000)
description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is not None:
formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)