From 962968fecedca3bef33ba9524d87ffedf815f16d Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sat, 5 Nov 2022 13:39:52 +0000 Subject: Rename package due to naming conflict --- pydis_core/utils/regex.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 pydis_core/utils/regex.py (limited to 'pydis_core/utils/regex.py') diff --git a/pydis_core/utils/regex.py b/pydis_core/utils/regex.py new file mode 100644 index 00000000..de82a1ed --- /dev/null +++ b/pydis_core/utils/regex.py @@ -0,0 +1,54 @@ +"""Common regular expressions.""" + +import re + +DISCORD_INVITE = re.compile( + r"(https?://)?(www\.)?" # Optional http(s) and www. + r"(discord([.,]|dot)gg|" # Could be discord.gg/ + r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/ + r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/ + r"discord([.,]|dot)me|" # or discord.me + r"discord([.,]|dot)li|" # or discord.li + r"discord([.,]|dot)io|" # or discord.io. + r"((?\S+)", # the invite code itself + flags=re.IGNORECASE +) +""" +Regex for Discord server invites. + +.. warning:: + This regex pattern will capture until a whitespace, if you are to use the 'invite' capture group in + any HTTP requests or similar. Please ensure you sanitise the output using something + such as :func:`urllib.parse.quote`. + +:meta hide-value: +""" + +FORMATTED_CODE_REGEX = re.compile( + r"(?P(?P```)|``?)" # code delimiter: 1-3 backticks; (?P=block) only matches if it's a block + r"(?(block)(?:(?P[a-z]+)\n)?)" # if we're in a block, match optional language (only letters plus newline) + r"(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code + r"(?P.*?)" # extract all code inside the markup + r"\s*" # any more whitespace before the end of the code markup + r"(?P=delim)", # match the exact same delimiter from the start again + flags=re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive +) +""" +Regex for formatted code, using Discord's code blocks. + +:meta hide-value: +""" + +RAW_CODE_REGEX = re.compile( + r"^(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code + r"(?P.*?)" # extract all the rest as code + r"\s*$", # any trailing whitespace until the end of the string + flags=re.DOTALL # "." also matches newlines +) +""" +Regex for raw code, *not* using Discord's code blocks. + +:meta hide-value: +""" -- cgit v1.2.3