Redesign `find_all_text_until_tag` to search through all direct children.

The previous approach didn't work for arbitrary tags with text.
author: Numerlor <[email protected]> 2020-06-27 15:46:47 +0200
committer: Numerlor <[email protected]> 2020-06-27 15:46:47 +0200
commit: c461bef250cd3d44fac2c0e64da21072f963909d (patch)
tree: 9ad200cad66eb8cb559b3592b824f103ed2522ec
parent: Strip backticks from symbol input. (diff)
1 files changed, 12 insertions, 27 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 0dc1713a3..e4b54f0a5 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -11,7 +11,7 @@ from urllib.parse import urljoin
 
 import discord
 from bs4 import BeautifulSoup
-from bs4.element import NavigableString, PageElement, Tag
+from bs4.element import PageElement, Tag
 from discord.errors import NotFound
 from discord.ext import commands
 from markdownify import MarkdownConverter
@@ -357,7 +357,7 @@ class Doc(commands.Cog):
         if start_tag is None:
             return None
 
-        description = cls.find_all_text_until_tag(start_tag, cls._match_end_tag)
+        description = cls.find_all_children_until_tag(start_tag, cls._match_end_tag)
         if description is None:
             return None
 
@@ -373,7 +373,7 @@ class Doc(commands.Cog):
         signatures = []
         description_element = heading.find_next_sibling("dd")
         description_pos = html.find(str(description_element))
-        description = cls.find_all_text_until_tag(description_element, ("dt",))
+        description = cls.find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
 
         for element in (
             *reversed(heading.find_previous_siblings("dt", limit=2)),
@@ -388,41 +388,26 @@ class Doc(commands.Cog):
         return signatures, description
 
     @staticmethod
-    def find_all_text_until_tag(
+    def find_all_children_until_tag(
             start_element: PageElement,
-            tag_filter: Union[Tuple[str], Callable[[Tag], bool]]
+            tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
     ) -> Optional[str]:
         """
-        Get all text from <p> elements and strings until a tag matching `tag_filter` is found.
-
-        Max 1000 elements are searched to avoid going through whole pages when no matching tag is found.
+        Get all direct children until a child matching `tag_filter` is found.
 
         `tag_filter` can be either a tuple of string names to check against,
         or a filtering callable that's applied to the tags.
-        If no matching end tag is found, None is returned.
         """
         text = ""
-        element = start_element
-        for _ in range(1000):
-            if element is None:
-                break
-
-            element = element.next
-            while isinstance(element, NavigableString):
-                text += element
-                element = element.next
 
-            if element.name == "p":
-                text += str(element)
-
-            elif isinstance(tag_filter, tuple):
+        for element in start_element.find_next().find_next_siblings():
+            if isinstance(tag_filter, tuple):
                 if element.name in tag_filter:
                     break
-            else:
-                if tag_filter(element):
-                    break
-        else:
-            return None
+            elif tag_filter(element):
+                break
+            text += str(element)
+
         return text
 
     @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
author	Numerlor <[email protected]>	2020-06-27 15:46:47 +0200
committer	Numerlor <[email protected]>	2020-06-27 15:46:47 +0200
commit	c461bef250cd3d44fac2c0e64da21072f963909d (patch)
tree	9ad200cad66eb8cb559b3592b824f103ed2522ec
parent	Strip backticks from symbol input. (diff)