aboutsummaryrefslogtreecommitdiffstats
path: root/pydis_core/utils/regex.py
blob: 1ccc0813acb2eba2df9bfa25d54d142678fa5db3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""Common regular expressions."""

import re

DISCORD_INVITE = re.compile(
    r"(https?://)?(www\.)?"                      # Optional http(s) and www.
    r"("
        r"discord([.,]|dot)gg|"                  # Could be discord.gg
        r"discord([.,]|dot)com|"                 # or discord.com/invite
        r"discordapp([.,]|dot)com|"              # or discordapp.com/invite
        r"discord([.,]|dot)me|"                  # or discord.me
        r"discord([.,]|dot)li|"                  # or discord.li
        r"discord([.,]|dot)io|"                  # or discord.io
        r"((?<!\w)([.,]|dot))gg"                 # or .gg
    r")"
    r"((/|slash|\\)(invite))?"                   # / or \ or 'slash' invite
    r"(/|slash|\\)"                              # / or \ or 'slash'
    r"(?P<invite>\S+)",                          # the invite code itself
    flags=re.IGNORECASE
)
"""
Regex for Discord server invites.

.. warning::
    This regex pattern will capture until a whitespace, if you are to use the 'invite' capture group in
    any HTTP requests or similar. Please ensure you sanitise the output using something
    such as :func:`urllib.parse.quote`.

:meta hide-value:
"""

FORMATTED_CODE_REGEX = re.compile(
    r"(?P<delim>(?P<block>```)|``?)"        # code delimiter: 1-3 backticks; (?P=block) only matches if it's a block
    r"(?(block)(?:(?P<lang>[a-z]+)\n)?)"    # if we're in a block, match optional language (only letters plus newline)
    r"(?:[ \t]*\n)*"                        # any blank (empty or tabs/spaces only) lines before the code
    r"(?P<code>.*?)"                        # extract all code inside the markup
    r"\s*"                                  # any more whitespace before the end of the code markup
    r"(?P=delim)",                          # match the exact same delimiter from the start again
    flags=re.DOTALL | re.IGNORECASE         # "." also matches newlines, case insensitive
)
"""
Regex for formatted code, using Discord's code blocks.

:meta hide-value:
"""

RAW_CODE_REGEX = re.compile(
    r"^(?:[ \t]*\n)*"                       # any blank (empty or tabs/spaces only) lines before the code
    r"(?P<code>.*?)"                        # extract all the rest as code
    r"\s*$",                                # any trailing whitespace until the end of the string
    flags=re.DOTALL                         # "." also matches newlines
)
"""
Regex for raw code, *not* using Discord's code blocks.

:meta hide-value:
"""