aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2020-06-27 15:46:47 +0200
committerGravatar Numerlor <[email protected]>2020-06-27 15:46:47 +0200
commitc461bef250cd3d44fac2c0e64da21072f963909d (patch)
tree9ad200cad66eb8cb559b3592b824f103ed2522ec
parentStrip backticks from symbol input. (diff)
Redesign `find_all_text_until_tag` to search through all direct children.
The previous approach didn't work for arbitrary tags with text.
-rw-r--r--bot/cogs/doc.py39
1 files changed, 12 insertions, 27 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 0dc1713a3..e4b54f0a5 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -11,7 +11,7 @@ from urllib.parse import urljoin
import discord
from bs4 import BeautifulSoup
-from bs4.element import NavigableString, PageElement, Tag
+from bs4.element import PageElement, Tag
from discord.errors import NotFound
from discord.ext import commands
from markdownify import MarkdownConverter
@@ -357,7 +357,7 @@ class Doc(commands.Cog):
if start_tag is None:
return None
- description = cls.find_all_text_until_tag(start_tag, cls._match_end_tag)
+ description = cls.find_all_children_until_tag(start_tag, cls._match_end_tag)
if description is None:
return None
@@ -373,7 +373,7 @@ class Doc(commands.Cog):
signatures = []
description_element = heading.find_next_sibling("dd")
description_pos = html.find(str(description_element))
- description = cls.find_all_text_until_tag(description_element, ("dt",))
+ description = cls.find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
for element in (
*reversed(heading.find_previous_siblings("dt", limit=2)),
@@ -388,41 +388,26 @@ class Doc(commands.Cog):
return signatures, description
@staticmethod
- def find_all_text_until_tag(
+ def find_all_children_until_tag(
start_element: PageElement,
- tag_filter: Union[Tuple[str], Callable[[Tag], bool]]
+ tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
) -> Optional[str]:
"""
- Get all text from <p> elements and strings until a tag matching `tag_filter` is found.
-
- Max 1000 elements are searched to avoid going through whole pages when no matching tag is found.
+ Get all direct children until a child matching `tag_filter` is found.
`tag_filter` can be either a tuple of string names to check against,
or a filtering callable that's applied to the tags.
- If no matching end tag is found, None is returned.
"""
text = ""
- element = start_element
- for _ in range(1000):
- if element is None:
- break
-
- element = element.next
- while isinstance(element, NavigableString):
- text += element
- element = element.next
- if element.name == "p":
- text += str(element)
-
- elif isinstance(tag_filter, tuple):
+ for element in start_element.find_next().find_next_siblings():
+ if isinstance(tag_filter, tuple):
if element.name in tag_filter:
break
- else:
- if tag_filter(element):
- break
- else:
- return None
+ elif tag_filter(element):
+ break
+ text += str(element)
+
return text
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)