diff options
| author | 2020-11-14 02:39:07 +0100 | |
|---|---|---|
| committer | 2020-11-15 03:12:58 +0100 | |
| commit | aeac77a08cdafadcc180a400c32ce21732d7d20d (patch) | |
| tree | 8f9eee4ce90f41f9c0196a5f94232cc3737e1237 | |
| parent | Intern relative url paths (diff) | |
Limit newlines in doc descriptions
| -rw-r--r-- | bot/exts/info/doc/_parsing.py | 48 | 
1 files changed, 32 insertions, 16 deletions
| diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py index 72e81982a..418405ca9 100644 --- a/bot/exts/info/doc/_parsing.py +++ b/bot/exts/info/doc/_parsing.py @@ -10,6 +10,7 @@ from typing import Callable, Collection, Container, Iterable, List, Optional, TY  from bs4 import BeautifulSoup  from bs4.element import NavigableString, PageElement, Tag +from bot.utils.helpers import find_nth_occurrence  from ._html import Strainer  from ._markdown import DocMarkdownConverter  if TYPE_CHECKING: @@ -219,21 +220,23 @@ def _get_truncated_description(          elements: Iterable[Union[Tag, NavigableString]],          markdown_converter: DocMarkdownConverter,          max_length: int, +        max_lines: int,  ) -> str:      """ -    Truncate markdown from `elements` to be at most `max_length` characters when rendered. +    Truncate markdown from `elements` to be at most `max_length` characters when rendered or `max_lines` newlines.      `max_length` limits the length of the rendered characters in the string,      with the real string length limited to `_MAX_DESCRIPTION_LENGTH` to accommodate discord length limits      """ +    result = "" +    markdown_element_ends = []      rendered_length = 0 -    real_length = 0 -    result = [] -    shortened = False +    tag_end_index = 0      for element in elements:          is_tag = isinstance(element, Tag)          element_length = len(element.text) if is_tag else len(element) +          if rendered_length + element_length < max_length:              if is_tag:                  element_markdown = markdown_converter.process_tag(element) @@ -241,21 +244,29 @@ def _get_truncated_description(                  element_markdown = markdown_converter.process_text(element)              element_markdown_length = len(element_markdown) -            if real_length + element_markdown_length < _MAX_DESCRIPTION_LENGTH: -                result.append(element_markdown) -            else: -                shortened = True -                break -            real_length += element_markdown_length              rendered_length += element_length +            tag_end_index += element_markdown_length + +            if not element_markdown.isspace(): +                markdown_element_ends.append(tag_end_index) +            result += element_markdown          else: -            shortened = True              break -    markdown_string = "".join(result) -    if shortened: -        markdown_string = markdown_string.rstrip(_TRUNCATE_STRIP_CHARACTERS) + "..." -    return markdown_string +    if not markdown_element_ends: +        return "" + +    newline_truncate_index = find_nth_occurrence(result, "\n", max_lines) +    if newline_truncate_index is not None and newline_truncate_index < _MAX_DESCRIPTION_LENGTH: +        truncate_index = newline_truncate_index +    else: +        truncate_index = _MAX_DESCRIPTION_LENGTH + +    if truncate_index >= markdown_element_ends[-1]: +        return result + +    markdown_truncate_index = max(cut for cut in markdown_element_ends if cut < truncate_index) +    return result[:markdown_truncate_index].strip(_TRUNCATE_STRIP_CHARACTERS) + "..."  def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str: @@ -265,7 +276,12 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag]      The signatures are wrapped in python codeblocks, separated from the description by a newline.      The result markdown string is max 750 rendered characters for the description with signatures at the start.      """ -    description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750) +    description = _get_truncated_description( +        description, +        markdown_converter=DocMarkdownConverter(bullets="•", page_url=url), +        max_length=750, +        max_lines=13 +    )      description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)      if signatures is not None:          formatted_markdown = "".join(f"```py\n{signature}```" for signature in _truncate_signatures(signatures)) | 
