diff options
| author | 2020-09-26 17:49:43 +0200 | |
|---|---|---|
| committer | 2020-10-10 15:03:37 +0200 | |
| commit | 730f30197c43cc170aaecde664712f6f4aaea246 (patch) | |
| tree | beaab114653ccf69130c621f6baf53ab39b6e3ce | |
| parent | Use List typehint that has a narrower scope (diff) | |
Collapse signatures between args instead of spaces
The signature length needed more logic and shorter limits
to ensure messages would fit in a discord message in a nice way.
Diffstat (limited to '')
| -rw-r--r-- | bot/cogs/doc/parsing.py | 95 | 
1 files changed, 92 insertions, 3 deletions
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py index 9c82a1c13..7dddadf43 100644 --- a/bot/cogs/doc/parsing.py +++ b/bot/cogs/doc/parsing.py @@ -5,7 +5,7 @@ import re  import string  import textwrap  from functools import partial -from typing import Callable, Iterable, List, Optional, TYPE_CHECKING, Tuple, Union +from typing import Callable, Collection, Iterable, List, Optional, TYPE_CHECKING, Tuple, Union  from bs4 import BeautifulSoup  from bs4.element import NavigableString, PageElement, Tag @@ -19,6 +19,7 @@ log = logging.getLogger(__name__)  _UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")  _WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") +_PARAMETERS_RE = re.compile(r"\((.+)\)")  _SEARCH_END_TAG_ATTRS = (      "data", @@ -39,8 +40,59 @@ _NO_SIGNATURE_GROUPS = {      "templatetag",      "term",  } -_MAX_DESCRIPTION_LENGTH = 1800 +_EMBED_CODE_BLOCK_LENGTH = 61 +# Three code block wrapped lines with py syntax highlight +_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LENGTH + 8) * 3 +# Maximum discord message length - signatures on top +_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH  _TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace +_BRACKET_PAIRS = { +    "{": "}", +    "(": ")", +    "[": "]", +} + + +def _split_parameters(parameters_string: str) -> List[str]: +    """ +    Split parameters of a signature into individual parameter strings on commas. + +    Long string literals are not accounted for. +    """ +    parameters_list = [] +    last_split = 0 +    depth = 0 +    expected_end = None +    current_search = None +    previous_character = "" + +    for index, character in enumerate(parameters_string): +        if character in _BRACKET_PAIRS: +            if current_search is None: +                current_search = character +                expected_end = _BRACKET_PAIRS[character] +            if character == current_search: +                depth += 1 + +        elif character in {"'", '"'}: +            if depth == 0: +                depth += 1 +            elif not previous_character == "\\": +                depth -= 1 + +        elif character == expected_end: +            depth -= 1 +            if depth == 0: +                current_search = None +                expected_end = None + +        elif depth == 0 and character == ",": +            parameters_list.append(parameters_string[last_split:index]) +            last_split = index + 1 +        previous_character = character + +    parameters_list.append(parameters_string[last_split:]) +    return parameters_list  def _find_elements_until_tag( @@ -121,6 +173,43 @@ def _get_signatures(start_signature: PageElement) -> List[str]:      return signatures +def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collection[str]]: +    """ +    Truncate passed signatures to not exceed `_MAX_SIGNAUTRES_LENGTH`. + +    If the signatures need to be truncated, parameters are collapsed until they fit withing the limit. +    Individual signatures can consist of max 1, 2 or 3 lines of text, inversely  proportional to the amount of them. +    A maximum of 3 signatures is assumed to be passed. +    """ +    if not sum(len(signature) for signature in signatures) > _MAX_SIGNATURES_LENGTH: +        return signatures + +    max_signature_length = _EMBED_CODE_BLOCK_LENGTH * (4 - len(signatures)) +    formatted_signatures = [] +    for signature in signatures: +        signature = signature.strip() +        if len(signature) > max_signature_length: +            if (parameters_match := _PARAMETERS_RE.search(signature)) is None: +                formatted_signatures.append(textwrap.shorten(signature, max_signature_length)) +                continue + +            truncated_signature = [] +            parameters_string = parameters_match[1] +            running_length = len(signature) - len(parameters_string) +            for parameter in _split_parameters(parameters_string): +                if (len(parameter) + running_length) <= max_signature_length - 4:  # account for comma and placeholder +                    truncated_signature.append(parameter) +                    running_length += len(parameter) + 1 +                else: +                    truncated_signature.append(" ...") +                    formatted_signatures.append(signature.replace(parameters_string, ",".join(truncated_signature))) +                    break +        else: +            formatted_signatures.append(signature) + +    return formatted_signatures + +  def _get_truncated_description(          elements: Iterable[Union[Tag, NavigableString]],          markdown_converter: DocMarkdownConverter, @@ -174,7 +263,7 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: Iterable[      description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750)      description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)      if signatures is not None: -        formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures) +        formatted_markdown = "".join(f"```py\n{signature}```" for signature in _truncate_signatures(signatures))      else:          formatted_markdown = ""      formatted_markdown += f"\n{description}"  |