From f14aa5cffb3f1d39c17fb4dc1e50c77149e580cf Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 19 Jun 2022 14:27:54 +0200 Subject: filter out source code tags, remove unnecessary regex --- bot/exts/info/doc/_html.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/bot/exts/info/doc/_html.py b/bot/exts/info/doc/_html.py index c101ec250..497246375 100644 --- a/bot/exts/info/doc/_html.py +++ b/bot/exts/info/doc/_html.py @@ -1,4 +1,3 @@ -import re from functools import partial from typing import Callable, Container, Iterable, List, Union @@ -11,7 +10,6 @@ from . import MAX_SIGNATURE_AMOUNT log = get_logger(__name__) -_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|ΒΆ") _SEARCH_END_TAG_ATTRS = ( "data", "function", @@ -129,12 +127,23 @@ def get_signatures(start_signature: PageElement) -> List[str]: start_signature, *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2), )[-MAX_SIGNATURE_AMOUNT:]: - for tag in element.find_all("a", class_="headerlink", recursive=False): + for tag in element.find_all(_filter_signature_links, recursive=False): tag.decompose() - signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) - + signature = element.text if signature: signatures.append(signature) return signatures + + +def _filter_signature_links(tag: Tag) -> bool: + """Return True if `tag` is a headerlink, or a link to source code; False otherwise.""" + if tag.name == "a": + if "headerlink" in tag.get("class", ()): + return True + + if tag.find(class_="viewcode-link"): + return True + + return False -- cgit v1.2.3