aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2019-11-02 18:28:04 +0100
committerGravatar Numerlor <[email protected]>2019-11-02 18:28:04 +0100
commit1aed2e4f4996f5546652bbb26e8fbf403e28aac4 (patch)
treebbf30ba37d6a68b445d6ddb6fa7f6be9268a2b6c
parentGet up to 3 signatures of a symbol (diff)
Improve module description searching
-rw-r--r--bot/cogs/doc.py42
1 files changed, 35 insertions, 7 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 2987f7245..30a14f26c 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -9,7 +9,7 @@ from typing import Any, Callable, Optional, Tuple
import discord
from bs4 import BeautifulSoup
-from bs4.element import PageElement
+from bs4.element import PageElement, Tag
from discord.errors import NotFound
from discord.ext import commands
from markdownify import MarkdownConverter
@@ -37,6 +37,16 @@ NO_OVERRIDE_PACKAGES = (
"Python",
)
UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
@@ -245,12 +255,21 @@ class Doc(commands.Cog):
return None
if symbol_id == f"module-{symbol}":
- # Get all paragraphs until the first div after the section div
- # if searched symbol is a module.
- trailing_div = symbol_heading.findNext("div")
- info_paragraphs = trailing_div.find_previous_siblings("p")[::-1]
- signature = None
- description = ''.join(str(paragraph) for paragraph in info_paragraphs).replace('¶', '')
+ search_html = str(soup)
+ # Get page content from the module headerlink to the
+ # first tag that has its class in `SEARCH_END_TAG_ATTRS`
+ start_tag = symbol_heading.find("a", attrs={"class": "headerlink"})
+ if start_tag is None:
+ return [], ""
+
+ end_tag = start_tag.find_next(self._match_end_tag)
+ if end_tag is None:
+ return [], ""
+
+ description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent))
+ description_end_index = search_html.find(str(end_tag))
+ description = search_html[description_start_index:description_end_index].replace('¶', '')
+ signatures = None
else:
# Get text of up to 3 signatures, remove unwanted symbols
@@ -422,6 +441,15 @@ class Doc(commands.Cog):
await self.refresh_inventory()
await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
+ @staticmethod
+ def _match_end_tag(tag: Tag) -> bool:
+ """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
+ for attr in SEARCH_END_TAG_ATTRS:
+ if attr in tag.get("class", ()):
+ return True
+
+ return tag.name == "table"
+
def setup(bot: commands.Bot) -> None:
"""Doc cog load."""