diff options
| -rw-r--r-- | Pipfile | 1 | ||||
| -rw-r--r-- | Pipfile.lock | 89 | ||||
| -rw-r--r-- | bot/exts/filters/filtering.py | 22 | 
3 files changed, 95 insertions, 17 deletions
| @@ -25,6 +25,7 @@ more_itertools = "~=8.2"  python-dateutil = "~=2.8"  python-frontmatter = "~=1.0.0"  pyyaml = "~=5.1" +regex = "==2021.4.4"  requests = "~=2.22"  sentry-sdk = "~=0.19"  sphinx = "~=2.2" diff --git a/Pipfile.lock b/Pipfile.lock index cbec48ef0..d6792ac35 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@  {      "_meta": {          "hash": { -            "sha256": "91b5639198b35740611e7ac923cfc262e5897b8cbc3ca243dc98335705804ba7" +            "sha256": "fc3421fc4c95d73b620f2b8b0a7dea288d4fc559e0d288ed4ad6cf4eb312f630"          },          "pipfile-spec": 6,          "requires": { @@ -221,6 +221,7 @@                  "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",                  "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"              ], +            "index": "pypi",              "markers": "sys_platform == 'win32'",              "version": "==0.4.4"          }, @@ -250,11 +251,11 @@          },          "docutils": {              "hashes": [ -                "sha256:0c5b78adfbf7762415433f5515cd5c9e762339e23369dbe8000d84a4bf4ab3af", -                "sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc" +                "sha256:a71042bb7207c03d5647f280427f14bfbd1a65c9eb84f4b341d85fafb6bb4bdf", +                "sha256:e2ffeea817964356ba4470efba7c2f42b6b0de0b04e66378507e3e2504bbff4c"              ],              "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", -            "version": "==0.16" +            "version": "==0.17"          },          "emoji": {              "hashes": [ @@ -605,6 +606,15 @@              "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",              "version": "==2.4.7"          }, +        "pyreadline": { +            "hashes": [ +                "sha256:4530592fc2e85b25b1a9f79664433da09237c1a270e4d78ea5aa3a2c7229e2d1", +                "sha256:65540c21bfe14405a3a77e4c085ecfce88724743a4ead47c66b84defcf82c32e", +                "sha256:9ce5fa65b8992dfa373bddc5b6e0864ead8f291c94fbfec05fbd5c836162e67b" +            ], +            "markers": "sys_platform == 'win32'", +            "version": "==2.1" +        },          "python-dateutil": {              "hashes": [                  "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", @@ -671,6 +681,53 @@              "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",              "version": "==3.5.3"          }, +        "regex": { +            "hashes": [ +                "sha256:01afaf2ec48e196ba91b37451aa353cb7eda77efe518e481707e0515025f0cd5", +                "sha256:11d773d75fa650cd36f68d7ca936e3c7afaae41b863b8c387a22aaa78d3c5c79", +                "sha256:18c071c3eb09c30a264879f0d310d37fe5d3a3111662438889ae2eb6fc570c31", +                "sha256:1e1c20e29358165242928c2de1482fb2cf4ea54a6a6dea2bd7a0e0d8ee321500", +                "sha256:281d2fd05555079448537fe108d79eb031b403dac622621c78944c235f3fcf11", +                "sha256:314d66636c494ed9c148a42731b3834496cc9a2c4251b1661e40936814542b14", +                "sha256:32e65442138b7b76dd8173ffa2cf67356b7bc1768851dded39a7a13bf9223da3", +                "sha256:339456e7d8c06dd36a22e451d58ef72cef293112b559010db3d054d5560ef439", +                "sha256:3916d08be28a1149fb97f7728fca1f7c15d309a9f9682d89d79db75d5e52091c", +                "sha256:3a9cd17e6e5c7eb328517969e0cb0c3d31fd329298dd0c04af99ebf42e904f82", +                "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711", +                "sha256:4c46e22a0933dd783467cf32b3516299fb98cfebd895817d685130cc50cd1093", +                "sha256:4c557a7b470908b1712fe27fb1ef20772b78079808c87d20a90d051660b1d69a", +                "sha256:52ba3d3f9b942c49d7e4bc105bb28551c44065f139a65062ab7912bef10c9afb", +                "sha256:563085e55b0d4fb8f746f6a335893bda5c2cef43b2f0258fe1020ab1dd874df8", +                "sha256:598585c9f0af8374c28edd609eb291b5726d7cbce16be6a8b95aa074d252ee17", +                "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000", +                "sha256:67bdb9702427ceddc6ef3dc382455e90f785af4c13d495f9626861763ee13f9d", +                "sha256:6d1b01031dedf2503631d0903cb563743f397ccaf6607a5e3b19a3d76fc10480", +                "sha256:741a9647fcf2e45f3a1cf0e24f5e17febf3efe8d4ba1281dcc3aa0459ef424dc", +                "sha256:7c2a1af393fcc09e898beba5dd59196edaa3116191cc7257f9224beaed3e1aa0", +                "sha256:7d9884d86dd4dd489e981d94a65cd30d6f07203d90e98f6f657f05170f6324c9", +                "sha256:90f11ff637fe8798933fb29f5ae1148c978cccb0452005bf4c69e13db951e765", +                "sha256:919859aa909429fb5aa9cf8807f6045592c85ef56fdd30a9a3747e513db2536e", +                "sha256:96fcd1888ab4d03adfc9303a7b3c0bd78c5412b2bfbe76db5b56d9eae004907a", +                "sha256:97f29f57d5b84e73fbaf99ab3e26134e6687348e95ef6b48cfd2c06807005a07", +                "sha256:980d7be47c84979d9136328d882f67ec5e50008681d94ecc8afa8a65ed1f4a6f", +                "sha256:a91aa8619b23b79bcbeb37abe286f2f408d2f2d6f29a17237afda55bb54e7aac", +                "sha256:ade17eb5d643b7fead300a1641e9f45401c98eee23763e9ed66a43f92f20b4a7", +                "sha256:b9c3db21af35e3b3c05764461b262d6f05bbca08a71a7849fd79d47ba7bc33ed", +                "sha256:bd28bc2e3a772acbb07787c6308e00d9626ff89e3bfcdebe87fa5afbfdedf968", +                "sha256:bf5824bfac591ddb2c1f0a5f4ab72da28994548c708d2191e3b87dd207eb3ad7", +                "sha256:c0502c0fadef0d23b128605d69b58edb2c681c25d44574fc673b0e52dce71ee2", +                "sha256:c38c71df845e2aabb7fb0b920d11a1b5ac8526005e533a8920aea97efb8ec6a4", +                "sha256:ce15b6d103daff8e9fee13cf7f0add05245a05d866e73926c358e871221eae87", +                "sha256:d3029c340cfbb3ac0a71798100ccc13b97dddf373a4ae56b6a72cf70dfd53bc8", +                "sha256:e512d8ef5ad7b898cdb2d8ee1cb09a8339e4f8be706d27eaa180c2f177248a10", +                "sha256:e8e5b509d5c2ff12f8418006d5a90e9436766133b564db0abaec92fd27fcee29", +                "sha256:ee54ff27bf0afaf4c3b3a62bcd016c12c3fdb4ec4f413391a90bd38bc3624605", +                "sha256:fa4537fb4a98fe8fde99626e4681cc644bdcf2a795038533f9f711513a862ae6", +                "sha256:fd45ff9293d9274c5008a2054ecef86a9bfe819a67c7be1afb65e69b405b3042" +            ], +            "index": "pypi", +            "version": "==2021.4.4" +        },          "requests": {              "hashes": [                  "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", @@ -976,11 +1033,11 @@          },          "flake8-annotations": {              "hashes": [ -                "sha256:40a4d504cdf64126ea0bdca39edab1608bc6d515e96569b7e7c3c59c84f66c36", -                "sha256:eabbfb2dd59ae0e9835f509f930e79cd99fa4ff1026fe6ca073503a57407037c" +                "sha256:0d6cd2e770b5095f09689c9d84cc054c51b929c41a68969ea1beb4b825cac515", +                "sha256:d10c4638231f8a50c0a597c4efce42bd7b7d85df4f620a0ddaca526138936a4f"              ],              "index": "pypi", -            "version": "==2.6.1" +            "version": "==2.6.2"          },          "flake8-bugbear": {              "hashes": [ @@ -1038,11 +1095,11 @@          },          "identify": {              "hashes": [ -                "sha256:43cb1965e84cdd247e875dec6d13332ef5be355ddc16776396d98089b9053d87", -                "sha256:c7c0f590526008911ccc5ceee6ed7b085cbc92f7b6591d0ee5913a130ad64034" +                "sha256:398cb92a7599da0b433c65301a1b62b9b1f4bb8248719b84736af6c0b22289d6", +                "sha256:4537474817e0bbb8cea3e5b7504b7de6d44e3f169a90846cbc6adb0fc8294502"              ],              "markers": "python_full_version >= '3.6.1'", -            "version": "==2.2.2" +            "version": "==2.2.3"          },          "idna": {              "hashes": [ @@ -1061,10 +1118,10 @@          },          "nodeenv": {              "hashes": [ -                "sha256:5304d424c529c997bc888453aeaa6362d242b6b4631e90f3d4bf1b290f1c84a9", -                "sha256:ab45090ae383b716c4ef89e690c41ff8c2b257b85b309f01f3654df3d084bd7c" +                "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b", +                "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"              ], -            "version": "==1.5.0" +            "version": "==1.6.0"          },          "pep8-naming": {              "hashes": [ @@ -1076,11 +1133,11 @@          },          "pre-commit": {              "hashes": [ -                "sha256:94c82f1bf5899d56edb1d926732f4e75a7df29a0c8c092559c77420c9d62428b", -                "sha256:de55c5c72ce80d79106e48beb1b54104d16495ce7f95b0c7b13d4784193a00af" +                "sha256:029d53cb83c241fe7d66eeee1e24db426f42c858f15a38d20bcefd8d8e05c9da", +                "sha256:46b6ffbab37986c47d0a35e40906ae029376deed89a0eb2e446fb6e67b220427"              ],              "index": "pypi", -            "version": "==2.11.1" +            "version": "==2.12.0"          },          "pycodestyle": {              "hashes": [ diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index c90b18dcb..464732453 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Mapping, NamedTuple, Optional, Tuple, Union  import dateutil  import discord.errors +import regex  from async_rediscache import RedisCache  from dateutil.relativedelta import relativedelta  from discord import Colour, HTTPException, Member, Message, NotFound, TextChannel @@ -34,7 +35,11 @@ CODE_BLOCK_RE = re.compile(  EVERYONE_PING_RE = re.compile(rf"@everyone|<@&{Guild.id}>|@here")  SPOILER_RE = re.compile(r"(\|\|.+?\|\|)", re.DOTALL)  URL_RE = re.compile(r"(https?://[^\s]+)", flags=re.IGNORECASE) -ZALGO_RE = re.compile(r"[\u0300-\u036F\u0489]") + +# Exclude variation selectors from zalgo because they're actually invisible. +VARIATION_SELECTORS = r"\uFE00-\uFE0F\U000E0100-\U000E01EF" +INVISIBLE_RE = regex.compile(rf"[{VARIATION_SELECTORS}\p{{UNASSIGNED}}\p{{FORMAT}}\p{{CONTROL}}--\s]", regex.V1) +ZALGO_RE = regex.compile(rf"[\p{{NONSPACING MARK}}\p{{ENCLOSING MARK}}--[{VARIATION_SELECTORS}]]", regex.V1)  # Other constants.  DAYS_BETWEEN_ALERTS = 3 @@ -178,6 +183,7 @@ class Filtering(Cog):      def get_name_matches(self, name: str) -> List[re.Match]:          """Check bad words from passed string (name). Return list of matches.""" +        name = self.clean_input(name)          matches = []          watchlist_patterns = self._get_filterlist_items('filter_token', allowed=False)          for pattern in watchlist_patterns: @@ -444,6 +450,8 @@ class Filtering(Cog):          if SPOILER_RE.search(text):              text = self._expand_spoilers(text) +        text = self.clean_input(text) +          # Make sure it's not a URL          if URL_RE.search(text):              return False, None @@ -462,6 +470,7 @@ class Filtering(Cog):          Second return value is a reason of URL blacklisting (can be None).          """ +        text = self.clean_input(text)          if not URL_RE.search(text):              return False, None @@ -492,6 +501,8 @@ class Filtering(Cog):          Attempts to catch some of common ways to try to cheat the system.          """ +        text = self.clean_input(text) +          # Remove backslashes to prevent escape character aroundfuckery like          # discord\.gg/gdudes-pony-farm          text = text.replace("\\", "") @@ -628,6 +639,15 @@ class Filtering(Cog):          await self.bot.api_client.delete(f'bot/offensive-messages/{msg["id"]}')          log.info(f"Deleted the offensive message with id {msg['id']}.") +    @staticmethod +    def clean_input(string: str) -> str: +        """Remove zalgo and invisible characters from `string`.""" +        # For future consideration: remove characters in the Mc, Sk, and Lm categories too. +        # Can be normalised with form C to merge char + combining char into a single char to avoid +        # removing legit diacritics, but this would open up a way to bypass filters. +        no_zalgo = ZALGO_RE.sub("", string) +        return INVISIBLE_RE.sub("", no_zalgo) +  def setup(bot: Bot) -> None:      """Load the Filtering cog.""" | 
