4 files changed, 126 insertions, 120 deletions
diff --git a/bot/exts/info/doc/__init__.py b/bot/exts/info/doc/__init__.py
index e9eb9428c..af0bbff2d 100644
--- a/bot/exts/info/doc/__init__.py
+++ b/bot/exts/info/doc/__init__.py
@@ -1,6 +1,11 @@
 from bot.bot import Bot
 from ._cog import DocCog
 
+MAX_SIGNATURE_AMOUNT = 3
+PRIORITY_PACKAGES = (
+    "python",
+)
+
 
 def setup(bot: Bot) -> None:
     """Load the Doc cog."""
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 3f7604072..fd211d9f1 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -24,6 +24,7 @@ from bot.pagination import LinePaginator
 from bot.utils.lock import lock
 from bot.utils.messages import send_denial, wait_for_deletion
 from bot.utils.scheduling import Scheduler
+from . import PRIORITY_PACKAGES
 from ._inventory_parser import INVENTORY_DICT, fetch_inventory
 from ._parsing import get_symbol_markdown
 from ._redis_cache import DocRedisCache
@@ -38,9 +39,6 @@ FORCE_PREFIX_GROUPS = (
     "pdbcommand",
     "term",
 )
-PRIORITY_PACKAGES = (
-    "python",
-)
 WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
 NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
 # Delay to wait before trying to reach a rescheduled inventory again, in minutes
diff --git a/bot/exts/info/doc/_html.py b/bot/exts/info/doc/_html.py
index 88fbc8825..f9fe542ce 100644
--- a/bot/exts/info/doc/_html.py
+++ b/bot/exts/info/doc/_html.py
@@ -1,10 +1,27 @@
 import logging
-from typing import List, Union
+import re
+from functools import partial
+from typing import Callable, Container, Iterable, List, Union
 
-from bs4.element import PageElement, SoupStrainer
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString, PageElement, SoupStrainer, Tag
+
+from . import MAX_SIGNATURE_AMOUNT
 
 log = logging.getLogger(__name__)
 
+_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+_SEARCH_END_TAG_ATTRS = (
+    "data",
+    "function",
+    "class",
+    "exception",
+    "seealso",
+    "section",
+    "rubric",
+    "sphinxsidebar",
+)
+
 
 class Strainer(SoupStrainer):
     """Subclass of SoupStrainer to allow matching of both `Tag`s and `NavigableString`s."""
@@ -26,3 +43,94 @@ class Strainer(SoupStrainer):
                 return markup
         else:
             return super().search(markup)
+
+
+def _find_elements_until_tag(
+        start_element: PageElement,
+        end_tag_filter: Union[Container[str], Callable[[Tag], bool]],
+        *,
+        func: Callable,
+        include_strings: bool = False,
+        limit: int = None,
+) -> List[Union[Tag, NavigableString]]:
+    """
+    Get all elements up to `limit` or until a tag matching `tag_filter` is found.
+
+    `end_tag_filter` can be either a container of string names to check against,
+    or a filtering callable that's applied to tags.
+
+    When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
+
+    `func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
+    The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
+    """
+    use_container_filter = not callable(end_tag_filter)
+    elements = []
+
+    for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
+        if isinstance(element, Tag):
+            if use_container_filter:
+                if element.name in end_tag_filter:
+                    break
+            elif end_tag_filter(element):
+                break
+        elements.append(element)
+
+    return elements
+
+
+_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
+_find_recursive_children_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_all)
+_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
+_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
+
+
+def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
+    """Create callable that returns True when the passed in tag's class is in `class_names` or when it's is a table."""
+    def match_tag(tag: Tag) -> bool:
+        for attr in class_names:
+            if attr in tag.get("class", ()):
+                return True
+        return tag.name == "table"
+
+    return match_tag
+
+
+def get_general_description(start_element: Tag) -> List[Union[Tag, NavigableString]]:
+    """
+    Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
+
+    A headerlink a tag is attempted to be found to skip repeating the symbol information in the description,
+    if it's found it's used as the tag to start the search from instead of the `start_element`.
+    """
+    child_tags = _find_recursive_children_until_tag(start_element, _class_filter_factory(["section"]), limit=100)
+    header = next(filter(_class_filter_factory(["headerlink"]), child_tags), None)
+    start_tag = header.parent if header is not None else start_element
+    return _find_next_siblings_until_tag(start_tag, _class_filter_factory(_SEARCH_END_TAG_ATTRS), include_strings=True)
+
+
+def get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
+    """Get the contents of the next dd tag, up to a dt or a dl tag."""
+    description_tag = symbol.find_next("dd")
+    return _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
+
+
+def get_signatures(start_signature: PageElement) -> List[str]:
+    """
+    Collect up to `_MAX_SIGNATURE_AMOUNT` signatures from dt tags around the `start_signature` dt tag.
+
+    First the signatures under the `start_signature` are included;
+    if less than 2 are found, tags above the start signature are added to the result if any are present.
+    """
+    signatures = []
+    for element in (
+            *reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
+            start_signature,
+            *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
+    )[-MAX_SIGNATURE_AMOUNT:]:
+        signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+        if signature:
+            signatures.append(signature)
+
+    return signatures
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 46ae33b92..d68f7c8d7 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -5,37 +5,23 @@ import re
 import string
 import textwrap
 from collections import namedtuple
-from functools import partial
-from typing import Callable, Collection, Container, Iterable, Iterator, List, Optional, TYPE_CHECKING, Union
+from typing import Collection, Iterable, Iterator, List, Optional, TYPE_CHECKING, Union
 
 from bs4 import BeautifulSoup
-from bs4.element import NavigableString, PageElement, Tag
+from bs4.element import NavigableString, Tag
 
 from bot.utils.helpers import find_nth_occurrence
-from ._html import Strainer
+from . import MAX_SIGNATURE_AMOUNT
+from ._html import get_dd_description, get_general_description, get_signatures
 from ._markdown import DocMarkdownConverter
 if TYPE_CHECKING:
     from ._cog import DocItem
 
 log = logging.getLogger(__name__)
 
-_MAX_SIGNATURE_AMOUNT = 3
-
-_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
 _WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
 _PARAMETERS_RE = re.compile(r"\((.+)\)")
 
-_SEARCH_END_TAG_ATTRS = (
-    "data",
-    "function",
-    "class",
-    "exception",
-    "seealso",
-    "section",
-    "rubric",
-    "sphinxsidebar",
-)
-
 _NO_SIGNATURE_GROUPS = {
     "attribute",
     "envvar",
@@ -46,7 +32,7 @@ _NO_SIGNATURE_GROUPS = {
 }
 _EMBED_CODE_BLOCK_LINE_LENGTH = 61
 # _MAX_SIGNATURE_AMOUNT code block wrapped lines with py syntax highlight
-_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * _MAX_SIGNATURE_AMOUNT
+_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * MAX_SIGNATURE_AMOUNT
 # Maximum discord message length - signatures on top
 _MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
 _TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
@@ -118,86 +104,6 @@ def _split_parameters(parameters_string: str) -> Iterator[str]:
     yield parameters_string[last_split:]
 
 
-def _find_elements_until_tag(
-        start_element: PageElement,
-        end_tag_filter: Union[Container[str], Callable[[Tag], bool]],
-        *,
-        func: Callable,
-        include_strings: bool = False,
-        limit: int = None,
-) -> List[Union[Tag, NavigableString]]:
-    """
-    Get all elements up to `limit` or until a tag matching `tag_filter` is found.
-
-    `end_tag_filter` can be either a container of string names to check against,
-    or a filtering callable that's applied to tags.
-
-    When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
-
-    `func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
-    The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
-    """
-    use_container_filter = not callable(end_tag_filter)
-    elements = []
-
-    for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
-        if isinstance(element, Tag):
-            if use_container_filter:
-                if element.name in end_tag_filter:
-                    break
-            elif end_tag_filter(element):
-                break
-        elements.append(element)
-
-    return elements
-
-
-_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
-_find_recursive_children_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_all)
-_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
-_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-
-
-def _get_general_description(start_element: Tag) -> List[Union[Tag, NavigableString]]:
-    """
-    Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
-
-    A headerlink a tag is attempted to be found to skip repeating the symbol information in the description,
-    if it's found it's used as the tag to start the search from instead of the `start_element`.
-    """
-    child_tags = _find_recursive_children_until_tag(start_element, _class_filter_factory(["section"]), limit=100)
-    header = next(filter(_class_filter_factory(["headerlink"]), child_tags), None)
-    start_tag = header.parent if header is not None else start_element
-    return _find_next_siblings_until_tag(start_tag, _class_filter_factory(_SEARCH_END_TAG_ATTRS), include_strings=True)
-
-
-def _get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
-    """Get the contents of the next dd tag, up to a dt or a dl tag."""
-    description_tag = symbol.find_next("dd")
-    return _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
-
-
-def _get_signatures(start_signature: PageElement) -> List[str]:
-    """
-    Collect up to `_MAX_SIGNATURE_AMOUNT` signatures from dt tags around the `start_signature` dt tag.
-
-    First the signatures under the `start_signature` are included;
-    if less than 2 are found, tags above the start signature are added to the result if any are present.
-    """
-    signatures = []
-    for element in (
-            *reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
-            start_signature,
-            *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
-    )[-(_MAX_SIGNATURE_AMOUNT):]:
-        signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
-
-        if signature:
-            signatures.append(signature)
-
-    return signatures
-
-
 def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collection[str]]:
     """
     Truncate passed signatures to not exceed `_MAX_SIGNAUTRES_LENGTH`.
@@ -210,7 +116,7 @@ def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collec
     if not sum(len(signature) for signature in signatures) > _MAX_SIGNATURES_LENGTH:
         return signatures
 
-    max_signature_length = _EMBED_CODE_BLOCK_LINE_LENGTH * (_MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
+    max_signature_length = _EMBED_CODE_BLOCK_LINE_LENGTH * (MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
     formatted_signatures = []
     for signature in signatures:
         signature = signature.strip()
@@ -317,17 +223,6 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag]
     return formatted_markdown
 
 
-def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
-    """Create callable that returns True when the passed in tag's class is in `class_names` or when it's is a table."""
-    def match_tag(tag: Tag) -> bool:
-        for attr in class_names:
-            if attr in tag.get("class", ()):
-                return True
-        return tag.name == "table"
-
-    return match_tag
-
-
 def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[str]:
     """
     Return parsed markdown of the passed symbol using the passed in soup, truncated to 1000 characters.
@@ -342,12 +237,12 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[s
     # Modules, doc pages and labels don't point to description list tags but to tags like divs,
     # no special parsing can be done so we only try to include what's under them.
     if symbol_data.group in {"module", "doc", "label"} or symbol_heading.name != "dt":
-        description = _get_general_description(symbol_heading)
+        description = get_general_description(symbol_heading)
 
     elif symbol_data.group in _NO_SIGNATURE_GROUPS:
-        description = _get_dd_description(symbol_heading)
+        description = get_dd_description(symbol_heading)
 
     else:
-        signature = _get_signatures(symbol_heading)
-        description = _get_dd_description(symbol_heading)
+        signature = get_signatures(symbol_heading)
+        description = get_dd_description(symbol_heading)
     return _create_markdown(signature, description, symbol_data.url).replace('¶', '').strip()