diff options
| author | 2020-06-18 00:20:25 +0200 | |
|---|---|---|
| committer | 2020-06-18 00:20:25 +0200 | |
| commit | 41e906d6b978f0745f0aff5e7065ce142282a44f (patch) | |
| tree | 32174973c1d4094ad554e9f789b18977b7e9f8c9 | |
| parent | Skip symbols with slashes in them. (diff) | |
Move symbol parsing into separate methods.
| -rw-r--r-- | bot/cogs/doc.py | 66 |
1 files changed, 43 insertions, 23 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 59c3cc729..a1364dd8b 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -6,7 +6,7 @@ import textwrap from collections import OrderedDict from contextlib import suppress from types import SimpleNamespace -from typing import Any, Callable, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple from urllib.parse import urljoin import discord @@ -265,30 +265,14 @@ class Doc(commands.Cog): return None if symbol_id == f"module-{symbol}": - # Get page content from the module headerlink to the - # first tag that has its class in `SEARCH_END_TAG_ATTRS` - start_tag = symbol_heading.find("a", attrs={"class": "headerlink"}) - if start_tag is None: - return [], "" - - end_tag = start_tag.find_next(self._match_end_tag) - if end_tag is None: - return [], "" - - description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent)) - description_end_index = search_html.find(str(end_tag)) - description = search_html[description_start_index:description_end_index] - signatures = None + parsed_module = self.parse_module_symbol(symbol_heading, search_html) + if parsed_module is None: + return None + else: + signatures, description = parsed_module else: - signatures = [] - description = str(symbol_heading.find_next_sibling("dd")) - description_pos = search_html.find(description) - # Get text of up to 3 signatures, remove unwanted symbols - for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2): - signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) - if signature and search_html.find(str(element)) < description_pos: - signatures.append(signature) + signatures, description = self.parse_symbol(symbol_heading, search_html) return signatures, description.replace('ΒΆ', '') @@ -354,6 +338,42 @@ class Doc(commands.Cog): ) return embed + @classmethod + def parse_module_symbol(cls, heading: PageElement, html: str) -> Optional[Tuple[None, str]]: + """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.""" + start_tag = heading.find("a", attrs={"class": "headerlink"}) + if start_tag is None: + return None + + end_tag = start_tag.find_next(cls._match_end_tag) + if end_tag is None: + return None + + description_start_index = html.find(str(start_tag.parent)) + len(str(start_tag.parent)) + description_end_index = html.find(str(end_tag)) + description = html[description_start_index:description_end_index] + + return None, description + + @staticmethod + def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]: + """ + Parse the signatures and description of a symbol. + + Collects up to 3 signatures from dt tags and a description from their sibling dd tag. + """ + signatures = [] + description = str(heading.find_next_sibling("dd")) + description_pos = html.find(description) + + for element in [heading] + heading.find_next_siblings("dt", limit=2): + signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) + + if signature and html.find(str(element)) < description_pos: + signatures.append(signature) + + return signatures, description + @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True) async def docs_group(self, ctx: commands.Context, *, symbol: str) -> None: """Lookup documentation for Python symbols.""" |