Create a function for collecting signatures.

By getting the signatures without the description we get more flexibility of parsing different symbol groups and decouple the logic from the description which can be parsed directly with the new `find_elements_until_tag` based function.
author: Numerlor <[email protected]> 2020-07-20 17:35:07 +0200
committer: Numerlor <[email protected]> 2020-07-20 17:35:07 +0200
commit: 082867253cd19c70516102a3d4972da6d501ff6f (patch)
tree: 4bedca4fe7bb0e86ed0145414be690c53e66c85c
parent: Simplify module parsing method. (diff)
1 files changed, 10 insertions, 36 deletions
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 368feeb68..5b60f1609 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -73,51 +73,25 @@ def get_module_description(start_element: PageElement) -> Optional[str]:
     return description
 
 
-def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
+def get_signatures(start_signature: PageElement) -> List[str]:
     """
-    Parse the signatures and description of a symbol.
+    Collect up to 3 signatures from dt tags around the `start_signature` dt tag.
 
-    Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
+    First the signatures under the `start_signature` are included;
+    if less than 2 are found, tags above the start signature are added to the result if any are present.
     """
     signatures = []
-    description_element = heading.find_next_sibling("dd")
-    description_pos = html.find(str(description_element))
-    description = find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
-
     for element in (
-            *reversed(heading.find_previous_siblings("dt", limit=2)),
-            heading,
-            *heading.find_next_siblings("dt", limit=2),
+            *reversed(find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
+            start_signature,
+            *find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
     )[-3:]:
-        signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+        signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element)
 
-        if signature and html.find(str(element)) < description_pos:
+        if signature:
             signatures.append(signature)
 
-    return signatures, description
-
-
-def find_all_children_until_tag(
-        start_element: PageElement,
-        tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
-) -> Optional[str]:
-    """
-    Get all direct children until a child matching `tag_filter` is found.
-
-    `tag_filter` can be either a tuple of string names to check against,
-    or a filtering callable that's applied to the tags.
-    """
-    text = ""
-
-    for element in start_element.find_next().find_next_siblings():
-        if isinstance(tag_filter, tuple):
-            if element.name in tag_filter:
-                break
-        elif tag_filter(element):
-            break
-        text += str(element)
-
-    return text
+    return signatures
 
 
 def truncate_markdown(markdown: str, max_length: int) -> str:
author	Numerlor <[email protected]>	2020-07-20 17:35:07 +0200
committer	Numerlor <[email protected]>	2020-07-20 17:35:07 +0200
commit	082867253cd19c70516102a3d4972da6d501ff6f (patch)
tree	4bedca4fe7bb0e86ed0145414be690c53e66c85c
parent	Simplify module parsing method. (diff)