diff options
| author | 2020-11-14 02:39:07 +0100 | |
|---|---|---|
| committer | 2020-11-15 03:12:58 +0100 | |
| commit | aeac77a08cdafadcc180a400c32ce21732d7d20d (patch) | |
| tree | 8f9eee4ce90f41f9c0196a5f94232cc3737e1237 | |
| parent | Intern relative url paths (diff) | |
Limit newlines in doc descriptions
| -rw-r--r-- | bot/exts/info/doc/_parsing.py | 48 |
1 files changed, 32 insertions, 16 deletions
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py index 72e81982a..418405ca9 100644 --- a/bot/exts/info/doc/_parsing.py +++ b/bot/exts/info/doc/_parsing.py @@ -10,6 +10,7 @@ from typing import Callable, Collection, Container, Iterable, List, Optional, TY from bs4 import BeautifulSoup from bs4.element import NavigableString, PageElement, Tag +from bot.utils.helpers import find_nth_occurrence from ._html import Strainer from ._markdown import DocMarkdownConverter if TYPE_CHECKING: @@ -219,21 +220,23 @@ def _get_truncated_description( elements: Iterable[Union[Tag, NavigableString]], markdown_converter: DocMarkdownConverter, max_length: int, + max_lines: int, ) -> str: """ - Truncate markdown from `elements` to be at most `max_length` characters when rendered. + Truncate markdown from `elements` to be at most `max_length` characters when rendered or `max_lines` newlines. `max_length` limits the length of the rendered characters in the string, with the real string length limited to `_MAX_DESCRIPTION_LENGTH` to accommodate discord length limits """ + result = "" + markdown_element_ends = [] rendered_length = 0 - real_length = 0 - result = [] - shortened = False + tag_end_index = 0 for element in elements: is_tag = isinstance(element, Tag) element_length = len(element.text) if is_tag else len(element) + if rendered_length + element_length < max_length: if is_tag: element_markdown = markdown_converter.process_tag(element) @@ -241,21 +244,29 @@ def _get_truncated_description( element_markdown = markdown_converter.process_text(element) element_markdown_length = len(element_markdown) - if real_length + element_markdown_length < _MAX_DESCRIPTION_LENGTH: - result.append(element_markdown) - else: - shortened = True - break - real_length += element_markdown_length rendered_length += element_length + tag_end_index += element_markdown_length + + if not element_markdown.isspace(): + markdown_element_ends.append(tag_end_index) + result += element_markdown else: - shortened = True break - markdown_string = "".join(result) - if shortened: - markdown_string = markdown_string.rstrip(_TRUNCATE_STRIP_CHARACTERS) + "..." - return markdown_string + if not markdown_element_ends: + return "" + + newline_truncate_index = find_nth_occurrence(result, "\n", max_lines) + if newline_truncate_index is not None and newline_truncate_index < _MAX_DESCRIPTION_LENGTH: + truncate_index = newline_truncate_index + else: + truncate_index = _MAX_DESCRIPTION_LENGTH + + if truncate_index >= markdown_element_ends[-1]: + return result + + markdown_truncate_index = max(cut for cut in markdown_element_ends if cut < truncate_index) + return result[:markdown_truncate_index].strip(_TRUNCATE_STRIP_CHARACTERS) + "..." def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str: @@ -265,7 +276,12 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag] The signatures are wrapped in python codeblocks, separated from the description by a newline. The result markdown string is max 750 rendered characters for the description with signatures at the start. """ - description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750) + description = _get_truncated_description( + description, + markdown_converter=DocMarkdownConverter(bullets="•", page_url=url), + max_length=750, + max_lines=13 + ) description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description) if signatures is not None: formatted_markdown = "".join(f"```py\n{signature}```" for signature in _truncate_signatures(signatures)) |