diff options
| author | 2019-11-02 18:28:04 +0100 | |
|---|---|---|
| committer | 2019-11-02 18:28:04 +0100 | |
| commit | 1aed2e4f4996f5546652bbb26e8fbf403e28aac4 (patch) | |
| tree | bbf30ba37d6a68b445d6ddb6fa7f6be9268a2b6c | |
| parent | Get up to 3 signatures of a symbol (diff) | |
Improve module description searching
| -rw-r--r-- | bot/cogs/doc.py | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 2987f7245..30a14f26c 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -9,7 +9,7 @@ from typing import Any, Callable, Optional, Tuple import discord from bs4 import BeautifulSoup -from bs4.element import PageElement +from bs4.element import PageElement, Tag from discord.errors import NotFound from discord.ext import commands from markdownify import MarkdownConverter @@ -37,6 +37,16 @@ NO_OVERRIDE_PACKAGES = ( "Python", ) UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") +SEARCH_END_TAG_ATTRS = ( + "data", + "function", + "class", + "exception", + "seealso", + "section", + "rubric", + "sphinxsidebar", +) WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") @@ -245,12 +255,21 @@ class Doc(commands.Cog): return None if symbol_id == f"module-{symbol}": - # Get all paragraphs until the first div after the section div - # if searched symbol is a module. - trailing_div = symbol_heading.findNext("div") - info_paragraphs = trailing_div.find_previous_siblings("p")[::-1] - signature = None - description = ''.join(str(paragraph) for paragraph in info_paragraphs).replace('¶', '') + search_html = str(soup) + # Get page content from the module headerlink to the + # first tag that has its class in `SEARCH_END_TAG_ATTRS` + start_tag = symbol_heading.find("a", attrs={"class": "headerlink"}) + if start_tag is None: + return [], "" + + end_tag = start_tag.find_next(self._match_end_tag) + if end_tag is None: + return [], "" + + description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent)) + description_end_index = search_html.find(str(end_tag)) + description = search_html[description_start_index:description_end_index].replace('¶', '') + signatures = None else: # Get text of up to 3 signatures, remove unwanted symbols @@ -422,6 +441,15 @@ class Doc(commands.Cog): await self.refresh_inventory() await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.") + @staticmethod + def _match_end_tag(tag: Tag) -> bool: + """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table.""" + for attr in SEARCH_END_TAG_ATTRS: + if attr in tag.get("class", ()): + return True + + return tag.name == "table" + def setup(bot: commands.Bot) -> None: """Doc cog load.""" |