aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2020-07-19 03:13:02 +0200
committerGravatar Numerlor <[email protected]>2020-07-19 03:13:02 +0200
commiteb8361d7fa9d0eb0dd5982c6df0fd35b80d40ba6 (patch)
tree3358f127db8b6f4c1ef18ccaa0823b07a5aac50b
parentMove main parsing methods into a new module (diff)
Move markdown truncation into parser module
-rw-r--r--bot/cogs/doc/cog.py27
-rw-r--r--bot/cogs/doc/parser.py29
2 files changed, 31 insertions, 25 deletions
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 4a275c7c6..bd4e9d4d1 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -25,7 +25,7 @@ from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
from .cache import async_cache
-from .parser import get_soup_from_url, parse_module_symbol, parse_symbol
+from .parser import get_soup_from_url, parse_module_symbol, parse_symbol, truncate_markdown
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
@@ -270,30 +270,7 @@ class DocCog(commands.Cog):
self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
signatures = scraped_html[0]
permalink = symbol_obj.url
- description = markdownify(scraped_html[1], url=permalink)
-
- # Truncate the description of the embed to the last occurrence
- # of a double newline (interpreted as a paragraph) before index 1000.
- if len(description) > 1000:
- shortened = description[:1000]
- description_cutoff = shortened.rfind('\n\n', 100)
- if description_cutoff == -1:
- # Search the shortened version for cutoff points in decreasing desirability,
- # cutoff at 1000 if none are found.
- for string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(string)
- if description_cutoff != -1:
- break
- else:
- description_cutoff = 1000
- description = description[:description_cutoff]
-
- # If there is an incomplete code block, cut it out
- if description.count("```") % 2:
- codeblock_start = description.rfind('```py')
- description = description[:codeblock_start].rstrip()
- description += f"... [read more]({permalink})"
-
+ description = truncate_markdown(markdownify(scraped_html[1], url=permalink), permalink, 1000)
description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is None:
# If symbol is a module, don't show signature.
diff --git a/bot/cogs/doc/parser.py b/bot/cogs/doc/parser.py
index 67621591b..010826a96 100644
--- a/bot/cogs/doc/parser.py
+++ b/bot/cogs/doc/parser.py
@@ -83,6 +83,35 @@ def find_all_children_until_tag(
return text
+def truncate_markdown(markdown: str, permalink: str, max_length: int) -> str:
+ """
+ Truncate `markdown` to be at most `max_length` characters.
+
+ The markdown string is searched for substrings to cut at, to keep its structure,
+ but if none are found the string is simply sliced.
+ """
+ if len(markdown) > max_length:
+ shortened = markdown[:max_length]
+ description_cutoff = shortened.rfind('\n\n', 100)
+ if description_cutoff == -1:
+ # Search the shortened version for cutoff points in decreasing desirability,
+ # cutoff at 1000 if none are found.
+ for string in (". ", ", ", ",", " "):
+ description_cutoff = shortened.rfind(string)
+ if description_cutoff != -1:
+ break
+ else:
+ description_cutoff = max_length
+ markdown = markdown[:description_cutoff]
+
+ # If there is an incomplete code block, cut it out
+ if markdown.count("```") % 2:
+ codeblock_start = markdown.rfind('```py')
+ markdown = markdown[:codeblock_start].rstrip()
+ markdown += f"... [read more]({permalink})"
+ return markdown
+
+
@async_cache(arg_offset=1)
async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
"""Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""