diff options
| author | 2020-07-20 17:35:07 +0200 | |
|---|---|---|
| committer | 2020-07-20 17:35:07 +0200 | |
| commit | 082867253cd19c70516102a3d4972da6d501ff6f (patch) | |
| tree | 4bedca4fe7bb0e86ed0145414be690c53e66c85c | |
| parent | Simplify module parsing method. (diff) | |
Create a function for collecting signatures.
By getting the signatures without the description we get more
flexibility of parsing different symbol groups and decouple the logic
from the description which can be parsed directly with the new
`find_elements_until_tag` based function.
| -rw-r--r-- | bot/cogs/doc/parsing.py | 46 |
1 files changed, 10 insertions, 36 deletions
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py index 368feeb68..5b60f1609 100644 --- a/bot/cogs/doc/parsing.py +++ b/bot/cogs/doc/parsing.py @@ -73,51 +73,25 @@ def get_module_description(start_element: PageElement) -> Optional[str]: return description -def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]: +def get_signatures(start_signature: PageElement) -> List[str]: """ - Parse the signatures and description of a symbol. + Collect up to 3 signatures from dt tags around the `start_signature` dt tag. - Collects up to 3 signatures from dt tags and a description from their sibling dd tag. + First the signatures under the `start_signature` are included; + if less than 2 are found, tags above the start signature are added to the result if any are present. """ signatures = [] - description_element = heading.find_next_sibling("dd") - description_pos = html.find(str(description_element)) - description = find_all_children_until_tag(description_element, tag_filter=("dt", "dl")) - for element in ( - *reversed(heading.find_previous_siblings("dt", limit=2)), - heading, - *heading.find_next_siblings("dt", limit=2), + *reversed(find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)), + start_signature, + *find_next_siblings_until_tag(start_signature, ("dd",), limit=2), )[-3:]: - signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) + signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element) - if signature and html.find(str(element)) < description_pos: + if signature: signatures.append(signature) - return signatures, description - - -def find_all_children_until_tag( - start_element: PageElement, - tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]] -) -> Optional[str]: - """ - Get all direct children until a child matching `tag_filter` is found. - - `tag_filter` can be either a tuple of string names to check against, - or a filtering callable that's applied to the tags. - """ - text = "" - - for element in start_element.find_next().find_next_siblings(): - if isinstance(tag_filter, tuple): - if element.name in tag_filter: - break - elif tag_filter(element): - break - text += str(element) - - return text + return signatures def truncate_markdown(markdown: str, max_length: int) -> str: |