diff options
author | 2022-06-19 14:27:54 +0200 | |
---|---|---|
committer | 2022-06-19 14:42:08 +0200 | |
commit | f14aa5cffb3f1d39c17fb4dc1e50c77149e580cf (patch) | |
tree | 5598e41fca6c0258cfa87c0b24e4e501b7497a0f | |
parent | filter out headerlinks for descriptions (diff) |
filter out source code tags, remove unnecessary regex
-rw-r--r-- | bot/exts/info/doc/_html.py | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/bot/exts/info/doc/_html.py b/bot/exts/info/doc/_html.py index c101ec250..497246375 100644 --- a/bot/exts/info/doc/_html.py +++ b/bot/exts/info/doc/_html.py @@ -1,4 +1,3 @@ -import re from functools import partial from typing import Callable, Container, Iterable, List, Union @@ -11,7 +10,6 @@ from . import MAX_SIGNATURE_AMOUNT log = get_logger(__name__) -_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|ΒΆ") _SEARCH_END_TAG_ATTRS = ( "data", "function", @@ -129,12 +127,23 @@ def get_signatures(start_signature: PageElement) -> List[str]: start_signature, *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2), )[-MAX_SIGNATURE_AMOUNT:]: - for tag in element.find_all("a", class_="headerlink", recursive=False): + for tag in element.find_all(_filter_signature_links, recursive=False): tag.decompose() - signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) - + signature = element.text if signature: signatures.append(signature) return signatures + + +def _filter_signature_links(tag: Tag) -> bool: + """Return True if `tag` is a headerlink, or a link to source code; False otherwise.""" + if tag.name == "a": + if "headerlink" in tag.get("class", ()): + return True + + if tag.find(class_="viewcode-link"): + return True + + return False |