diff options
| author | 2020-07-22 02:34:11 +0200 | |
|---|---|---|
| committer | 2020-07-22 02:34:11 +0200 | |
| commit | 4e9ffb210f6a8f0184ac97cb16703777cc1e0ca0 (patch) | |
| tree | cb2d1eda73985d573b29fdb8d1f0b78c279bbf2b | |
| parent | Fix ordered list indices in markdown converter. (diff) | |
Create a function for getting the result markdown.
| -rw-r--r-- | bot/cogs/doc/parsing.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py index 25001b83d..8756e0694 100644 --- a/bot/cogs/doc/parsing.py +++ b/bot/cogs/doc/parsing.py @@ -1,6 +1,7 @@ import logging import re import string +import textwrap from functools import partial from typing import Callable, List, Optional, Tuple, Union from urllib.parse import urljoin @@ -15,6 +16,8 @@ from .cache import async_cache log = logging.getLogger(__name__) UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|ΒΆ") +WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") + SEARCH_END_TAG_ATTRS = ( "data", "function", @@ -175,6 +178,24 @@ def truncate_markdown(markdown: str, max_length: int) -> str: return markdown +def _parse_into_markdown(signatures: Optional[List[str]], description: str, url: str) -> str: + """ + Create a markdown string with the signatures at the top, and the converted html description below them. + + The signatures are wrapped in python codeblocks, separated from the description by a newline. + The result string is truncated to be max 1000 symbols long. + """ + description = truncate_markdown(markdownify(description, url=url), 1000) + description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description) + if signatures is not None: + formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures) + else: + formatted_markdown = "" + formatted_markdown += f"\n{description}" + + return formatted_markdown + + @async_cache(arg_offset=1) async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup: """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed.""" |