aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--LICENSE-THIRD-PARTY30
-rw-r--r--Pipfile4
-rw-r--r--Pipfile.lock241
-rw-r--r--bot/converters.py44
-rw-r--r--bot/decorators.py12
-rw-r--r--bot/exts/info/doc.py485
-rw-r--r--bot/exts/info/doc/__init__.py16
-rw-r--r--bot/exts/info/doc/_batch_parser.py186
-rw-r--r--bot/exts/info/doc/_cog.py442
-rw-r--r--bot/exts/info/doc/_html.py136
-rw-r--r--bot/exts/info/doc/_inventory_parser.py126
-rw-r--r--bot/exts/info/doc/_markdown.py58
-rw-r--r--bot/exts/info/doc/_parsing.py256
-rw-r--r--bot/exts/info/doc/_redis_cache.py70
-rw-r--r--bot/exts/info/source.py3
-rw-r--r--bot/pagination.py36
-rw-r--r--bot/utils/function.py72
-rw-r--r--bot/utils/lock.py37
-rw-r--r--bot/utils/messages.py70
-rw-r--r--bot/utils/scheduling.py10
-rw-r--r--tests/bot/exts/info/doc/__init__.py0
-rw-r--r--tests/bot/exts/info/doc/test_parsing.py66
-rw-r--r--tests/bot/test_converters.py21
23 files changed, 1629 insertions, 792 deletions
diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY
index eacd9b952..ab715630d 100644
--- a/LICENSE-THIRD-PARTY
+++ b/LICENSE-THIRD-PARTY
@@ -35,6 +35,36 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------------------------------------
+ BSD 2-Clause License
+Applies to:
+ - Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file). All rights reserved.
+ - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__.
+---------------------------------------------------------------------------------------------------
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---------------------------------------------------------------------------------------------------
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
Applies to:
- Copyright © 2001-2020 Python Software Foundation. All rights reserved.
diff --git a/Pipfile b/Pipfile
index 2ac5645dd..e924f5ddb 100644
--- a/Pipfile
+++ b/Pipfile
@@ -20,15 +20,13 @@ emoji = "~=0.6"
feedparser = "~=5.2"
fuzzywuzzy = "~=0.17"
lxml = "~=4.4"
-markdownify = "==0.5.3"
+markdownify = "==0.6.1"
more_itertools = "~=8.2"
python-dateutil = "~=2.8"
python-frontmatter = "~=1.0.0"
pyyaml = "~=5.1"
regex = "==2021.4.4"
-requests = "~=2.22"
sentry-sdk = "~=0.19"
-sphinx = "~=2.2"
statsd = "~=3.3"
[dev-packages]
diff --git a/Pipfile.lock b/Pipfile.lock
index d6792ac35..1e1a8167b 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
- "sha256": "fc3421fc4c95d73b620f2b8b0a7dea288d4fc559e0d288ed4ad6cf4eb312f630"
+ "sha256": "e35c9bad81b01152ad3e10b85f1abf5866aa87b9d87e03bc30bdb9d37668ccae"
},
"pipfile-spec": 6,
"requires": {
@@ -99,13 +99,6 @@
"markers": "python_version >= '3.6'",
"version": "==3.3.1"
},
- "alabaster": {
- "hashes": [
- "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
- "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
- ],
- "version": "==0.7.12"
- },
"arrow": {
"hashes": [
"sha256:3515630f11a15c61dcb4cdd245883270dd334c83f3e639824e65a4b79cc48543",
@@ -142,14 +135,6 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.3.0"
},
- "babel": {
- "hashes": [
- "sha256:9d35c22fcc79893c3ecc85ac4a56cde1ecf3f19c540bba0922308a6c06ca6fa5",
- "sha256:da031ab54472314f210b0adcff1588ee5d1d1d0ba4dbd07b94dba82bde791e05"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.9.0"
- },
"beautifulsoup4": {
"hashes": [
"sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
@@ -221,7 +206,6 @@
"sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
"sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
],
- "index": "pypi",
"markers": "sys_platform == 'win32'",
"version": "==0.4.4"
},
@@ -249,14 +233,6 @@
"index": "pypi",
"version": "==1.6.0"
},
- "docutils": {
- "hashes": [
- "sha256:a71042bb7207c03d5647f280427f14bfbd1a65c9eb84f4b341d85fafb6bb4bdf",
- "sha256:e2ffeea817964356ba4470efba7c2f42b6b0de0b04e66378507e3e2504bbff4c"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
- "version": "==0.17"
- },
"emoji": {
"hashes": [
"sha256:e42da4f8d648f8ef10691bc246f682a1ec6b18373abfd9be10ec0b398823bd11"
@@ -345,27 +321,11 @@
},
"idna": {
"hashes": [
- "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
- "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
+ "sha256:5205d03e7bcbb919cc9c19885f9920d622ca52448306f2377daede5cf3faac16",
+ "sha256:c5b02147e01ea9920e6b0a3f1f7bb833612d507592c837a6c49552768f4054e1"
],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.10"
- },
- "imagesize": {
- "hashes": [
- "sha256:6965f19a6a2039c7d48bca7dba2473069ff854c36ae6f19d2cde309d998228a1",
- "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.2.0"
- },
- "jinja2": {
- "hashes": [
- "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419",
- "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
- "version": "==2.11.3"
+ "markers": "python_version >= '3.4'",
+ "version": "==3.1"
},
"lxml": {
"hashes": [
@@ -411,69 +371,11 @@
},
"markdownify": {
"hashes": [
- "sha256:30be8340724e706c9e811c27fe8c1542cf74a15b46827924fff5c54b40dd9b0d",
- "sha256:a69588194fd76634f0139d6801b820fd652dc5eeba9530e90d323dfdc0155252"
+ "sha256:31d7c13ac2ada8bfc7535a25fee6622ca720e1b5f2d4a9cbc429d167c21f886d",
+ "sha256:7489fd5c601536996a376c4afbcd1dd034db7690af807120681461e82fbc0acc"
],
"index": "pypi",
- "version": "==0.5.3"
- },
- "markupsafe": {
- "hashes": [
- "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
- "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161",
- "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
- "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
- "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
- "sha256:195d7d2c4fbb0ee8139a6cf67194f3973a6b3042d742ebe0a9ed36d8b6f0c07f",
- "sha256:22c178a091fc6630d0d045bdb5992d2dfe14e3259760e713c490da5323866c39",
- "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
- "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
- "sha256:2beec1e0de6924ea551859edb9e7679da6e4870d32cb766240ce17e0a0ba2014",
- "sha256:3b8a6499709d29c2e2399569d96719a1b21dcd94410a586a18526b143ec8470f",
- "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
- "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
- "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
- "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66",
- "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b",
- "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1",
- "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
- "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
- "sha256:6f1e273a344928347c1290119b493a1f0303c52f5a5eae5f16d74f48c15d4a85",
- "sha256:6fffc775d90dcc9aed1b89219549b329a9250d918fd0b8fa8d93d154918422e1",
- "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
- "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
- "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
- "sha256:7fed13866cf14bba33e7176717346713881f56d9d2bcebab207f7a036f41b850",
- "sha256:84dee80c15f1b560d55bcfe6d47b27d070b4681c699c572af2e3c7cc90a3b8e0",
- "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
- "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d",
- "sha256:98bae9582248d6cf62321dcb52aaf5d9adf0bad3b40582925ef7c7f0ed85fceb",
- "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e",
- "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d",
- "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c",
- "sha256:a6a744282b7718a2a62d2ed9d993cad6f5f585605ad352c11de459f4108df0a1",
- "sha256:acf08ac40292838b3cbbb06cfe9b2cb9ec78fce8baca31ddb87aaac2e2dc3bc2",
- "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21",
- "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2",
- "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5",
- "sha256:b1dba4527182c95a0db8b6060cc98ac49b9e2f5e64320e2b56e47cb2831978c7",
- "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b",
- "sha256:b7d644ddb4dbd407d31ffb699f1d140bc35478da613b441c582aeb7c43838dd8",
- "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6",
- "sha256:bf5aa3cbcfdf57fa2ee9cd1822c862ef23037f5c832ad09cfea57fa846dec193",
- "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f",
- "sha256:caabedc8323f1e93231b52fc32bdcde6db817623d33e100708d9a68e1f53b26b",
- "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f",
- "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2",
- "sha256:d53bc011414228441014aa71dbec320c66468c1030aae3a6e29778a3382d96e5",
- "sha256:d73a845f227b0bfe8a7455ee623525ee656a9e2e749e4742706d80a6065d5e2c",
- "sha256:d9be0ba6c527163cbed5e0857c451fcd092ce83947944d6c14bc95441203f032",
- "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7",
- "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be",
- "sha256:feb7b34d6325451ef96bc0e36e1a6c0c1c64bc1fbec4b854f4529e51887b1621"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.1.1"
+ "version": "==0.6.1"
},
"more-itertools": {
"hashes": [
@@ -533,14 +435,6 @@
"markers": "python_version >= '3.5'",
"version": "==4.0.2"
},
- "packaging": {
- "hashes": [
- "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
- "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==20.9"
- },
"pamqp": {
"hashes": [
"sha256:2f81b5c186f668a67f165193925b6bfd83db4363a6222f599517f29ecee60b02",
@@ -590,31 +484,6 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.20"
},
- "pygments": {
- "hashes": [
- "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94",
- "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==2.8.1"
- },
- "pyparsing": {
- "hashes": [
- "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
- "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
- ],
- "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.4.7"
- },
- "pyreadline": {
- "hashes": [
- "sha256:4530592fc2e85b25b1a9f79664433da09237c1a270e4d78ea5aa3a2c7229e2d1",
- "sha256:65540c21bfe14405a3a77e4c085ecfce88724743a4ead47c66b84defcf82c32e",
- "sha256:9ce5fa65b8992dfa373bddc5b6e0864ead8f291c94fbfec05fbd5c836162e67b"
- ],
- "markers": "sys_platform == 'win32'",
- "version": "==2.1"
- },
"python-dateutil": {
"hashes": [
"sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
@@ -631,13 +500,6 @@
"index": "pypi",
"version": "==1.0.0"
},
- "pytz": {
- "hashes": [
- "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
- "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
- ],
- "version": "==2021.1"
- },
"pyyaml": {
"hashes": [
"sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf",
@@ -728,14 +590,6 @@
"index": "pypi",
"version": "==2021.4.4"
},
- "requests": {
- "hashes": [
- "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
- "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
- ],
- "index": "pypi",
- "version": "==2.25.1"
- },
"sentry-sdk": {
"hashes": [
"sha256:4ae8d1ced6c67f1c8ea51d82a16721c166c489b76876c9f2c202b8a50334b237",
@@ -749,16 +603,9 @@
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"version": "==1.15.0"
},
- "snowballstemmer": {
- "hashes": [
- "sha256:b51b447bea85f9968c13b650126a888aabd4cb4463fca868ec596826325dedc2",
- "sha256:e997baa4f2e9139951b6f4c631bad912dfd3c792467e2f03d7239464af90e914"
- ],
- "version": "==2.1.0"
- },
"sortedcontainers": {
"hashes": [
"sha256:37257a32add0a3ee490bb170b599e93095eed89a55da91fa9f48753ea12fd73f",
@@ -774,62 +621,6 @@
"markers": "python_version >= '3.0'",
"version": "==2.2.1"
},
- "sphinx": {
- "hashes": [
- "sha256:b4c750d546ab6d7e05bdff6ac24db8ae3e8b8253a3569b754e445110a0a12b66",
- "sha256:fc312670b56cb54920d6cc2ced455a22a547910de10b3142276495ced49231cb"
- ],
- "index": "pypi",
- "version": "==2.4.4"
- },
- "sphinxcontrib-applehelp": {
- "hashes": [
- "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
- "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.2"
- },
- "sphinxcontrib-devhelp": {
- "hashes": [
- "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
- "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.2"
- },
- "sphinxcontrib-htmlhelp": {
- "hashes": [
- "sha256:3c0bc24a2c41e340ac37c85ced6dafc879ab485c095b1d65d2461ac2f7cca86f",
- "sha256:e8f5bb7e31b2dbb25b9cc435c8ab7a79787ebf7f906155729338f3156d93659b"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.3"
- },
- "sphinxcontrib-jsmath": {
- "hashes": [
- "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
- "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.1"
- },
- "sphinxcontrib-qthelp": {
- "hashes": [
- "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
- "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.3"
- },
- "sphinxcontrib-serializinghtml": {
- "hashes": [
- "sha256:eaa0eccc86e982a9b939b2b82d12cc5d013385ba5eadcc7e4fed23f4405f77bc",
- "sha256:f242a81d423f59617a8e5cf16f5d4d74e28ee9a66f9e5b637a18082991db5a9a"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.1.4"
- },
"statsd": {
"hashes": [
"sha256:c610fb80347fca0ef62666d241bce64184bd7cc1efe582f9690e045c25535eaa",
@@ -1103,11 +894,11 @@
},
"idna": {
"hashes": [
- "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
- "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
+ "sha256:5205d03e7bcbb919cc9c19885f9920d622ca52448306f2377daede5cf3faac16",
+ "sha256:c5b02147e01ea9920e6b0a3f1f7bb833612d507592c837a6c49552768f4054e1"
],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.10"
+ "markers": "python_version >= '3.4'",
+ "version": "==3.1"
},
"mccabe": {
"hashes": [
@@ -1203,7 +994,7 @@
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
],
- "index": "pypi",
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==2.25.1"
},
"six": {
@@ -1211,7 +1002,7 @@
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"version": "==1.15.0"
},
"snowballstemmer": {
@@ -1226,7 +1017,7 @@
"sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
"sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
],
- "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'",
"version": "==0.10.2"
},
"urllib3": {
diff --git a/bot/converters.py b/bot/converters.py
index 67525cd4d..3bf05cfb3 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -15,6 +15,7 @@ from discord.utils import DISCORD_EPOCH, snowflake_time
from bot.api import ResponseCodeError
from bot.constants import URLs
+from bot.exts.info.doc import _inventory_parser
from bot.utils.regex import INVITE_RE
from bot.utils.time import parse_duration_string
@@ -127,22 +128,20 @@ class ValidFilterListType(Converter):
return list_type
-class ValidPythonIdentifier(Converter):
+class PackageName(Converter):
"""
- A converter that checks whether the given string is a valid Python identifier.
+ A converter that checks whether the given string is a valid package name.
- This is used to have package names that correspond to how you would use the package in your
- code, e.g. `import package`.
-
- Raises `BadArgument` if the argument is not a valid Python identifier, and simply passes through
- the given argument otherwise.
+ Package names are used for stats and are restricted to the a-z and _ characters.
"""
- @staticmethod
- async def convert(ctx: Context, argument: str) -> str:
- """Checks whether the given string is a valid Python identifier."""
- if not argument.isidentifier():
- raise BadArgument(f"`{argument}` is not a valid Python identifier")
+ PACKAGE_NAME_RE = re.compile(r"[^a-z0-9_]")
+
+ @classmethod
+ async def convert(cls, ctx: Context, argument: str) -> str:
+ """Checks whether the given string is a valid package name."""
+ if cls.PACKAGE_NAME_RE.search(argument):
+ raise BadArgument("The provided package name is not valid; please only use the _, 0-9, and a-z characters.")
return argument
@@ -178,6 +177,27 @@ class ValidURL(Converter):
return url
+class Inventory(Converter):
+ """
+ Represents an Intersphinx inventory URL.
+
+ This converter checks whether intersphinx accepts the given inventory URL, and raises
+ `BadArgument` if that is not the case or if the url is unreachable.
+
+ Otherwise, it returns the url and the fetched inventory dict in a tuple.
+ """
+
+ @staticmethod
+ async def convert(ctx: Context, url: str) -> t.Tuple[str, _inventory_parser.InventoryDict]:
+ """Convert url to Intersphinx inventory URL."""
+ await ctx.trigger_typing()
+ if (inventory := await _inventory_parser.fetch_inventory(url)) is None:
+ raise BadArgument(
+ f"Failed to fetch inventory file after {_inventory_parser.FAILED_REQUEST_ATTEMPTS} attempts."
+ )
+ return url, inventory
+
+
class Snowflake(IDConverter):
"""
Converts to an int if the argument is a valid Discord snowflake.
diff --git a/bot/decorators.py b/bot/decorators.py
index 0b50cc365..1d30317ef 100644
--- a/bot/decorators.py
+++ b/bot/decorators.py
@@ -1,9 +1,9 @@
import asyncio
import functools
import logging
+import types
import typing as t
from contextlib import suppress
-from functools import wraps
from discord import Member, NotFound
from discord.ext import commands
@@ -12,6 +12,7 @@ from discord.ext.commands import Cog, Context
from bot.constants import Channels, DEBUG_MODE, RedirectOutput
from bot.utils import function
from bot.utils.checks import in_whitelist_check
+from bot.utils.function import command_wraps
log = logging.getLogger(__name__)
@@ -71,8 +72,8 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
This decorator must go before (below) the `command` decorator.
"""
- def wrap(func: t.Callable) -> t.Callable:
- @wraps(func)
+ def wrap(func: types.FunctionType) -> types.FunctionType:
+ @command_wraps(func)
async def inner(self: Cog, ctx: Context, *args, **kwargs) -> None:
if ctx.channel.id == destination_channel:
log.trace(f"Command {ctx.command.name} was invoked in destination_channel, not redirecting")
@@ -106,7 +107,6 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
with suppress(NotFound):
await ctx.message.delete()
log.trace("Redirect output: Deleted invocation message")
-
return inner
return wrap
@@ -123,8 +123,8 @@ def respect_role_hierarchy(member_arg: function.Argument) -> t.Callable:
This decorator must go before (below) the `command` decorator.
"""
- def decorator(func: t.Callable) -> t.Callable:
- @wraps(func)
+ def decorator(func: types.FunctionType) -> types.FunctionType:
+ @command_wraps(func)
async def wrapper(*args, **kwargs) -> None:
log.trace(f"{func.__name__}: respect role hierarchy decorator called")
diff --git a/bot/exts/info/doc.py b/bot/exts/info/doc.py
deleted file mode 100644
index 9b5bd6504..000000000
--- a/bot/exts/info/doc.py
+++ /dev/null
@@ -1,485 +0,0 @@
-import asyncio
-import functools
-import logging
-import re
-import textwrap
-from contextlib import suppress
-from types import SimpleNamespace
-from typing import Optional, Tuple
-
-import discord
-from bs4 import BeautifulSoup
-from bs4.element import PageElement, Tag
-from discord.errors import NotFound
-from discord.ext import commands
-from markdownify import MarkdownConverter
-from requests import ConnectTimeout, ConnectionError, HTTPError
-from sphinx.ext import intersphinx
-from urllib3.exceptions import ProtocolError
-
-from bot.bot import Bot
-from bot.constants import MODERATION_ROLES, RedirectOutput
-from bot.converters import ValidPythonIdentifier, ValidURL
-from bot.pagination import LinePaginator
-from bot.utils.cache import AsyncCache
-from bot.utils.messages import wait_for_deletion
-
-
-log = logging.getLogger(__name__)
-logging.getLogger('urllib3').setLevel(logging.WARNING)
-
-# Since Intersphinx is intended to be used with Sphinx,
-# we need to mock its configuration.
-SPHINX_MOCK_APP = SimpleNamespace(
- config=SimpleNamespace(
- intersphinx_timeout=3,
- tls_verify=True,
- user_agent="python3:python-discord/bot:1.0.0"
- )
-)
-
-NO_OVERRIDE_GROUPS = (
- "2to3fixer",
- "token",
- "label",
- "pdbcommand",
- "term",
-)
-NO_OVERRIDE_PACKAGES = (
- "python",
-)
-
-SEARCH_END_TAG_ATTRS = (
- "data",
- "function",
- "class",
- "exception",
- "seealso",
- "section",
- "rubric",
- "sphinxsidebar",
-)
-UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
-WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
-
-FAILED_REQUEST_RETRY_AMOUNT = 3
-NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
-
-symbol_cache = AsyncCache()
-
-
-class DocMarkdownConverter(MarkdownConverter):
- """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
-
- def convert_code(self, el: PageElement, text: str) -> str:
- """Undo `markdownify`s underscore escaping."""
- return f"`{text}`".replace('\\', '')
-
- def convert_pre(self, el: PageElement, text: str) -> str:
- """Wrap any codeblocks in `py` for syntax highlighting."""
- code = ''.join(el.strings)
- return f"```py\n{code}```"
-
-
-def markdownify(html: str) -> DocMarkdownConverter:
- """Create a DocMarkdownConverter object from the input html."""
- return DocMarkdownConverter(bullets='•').convert(html)
-
-
-class InventoryURL(commands.Converter):
- """
- Represents an Intersphinx inventory URL.
-
- This converter checks whether intersphinx accepts the given inventory URL, and raises
- `BadArgument` if that is not the case.
-
- Otherwise, it simply passes through the given URL.
- """
-
- @staticmethod
- async def convert(ctx: commands.Context, url: str) -> str:
- """Convert url to Intersphinx inventory URL."""
- try:
- intersphinx.fetch_inventory(SPHINX_MOCK_APP, '', url)
- except AttributeError:
- raise commands.BadArgument(f"Failed to fetch Intersphinx inventory from URL `{url}`.")
- except ConnectionError:
- if url.startswith('https'):
- raise commands.BadArgument(
- f"Cannot establish a connection to `{url}`. Does it support HTTPS?"
- )
- raise commands.BadArgument(f"Cannot connect to host with URL `{url}`.")
- except ValueError:
- raise commands.BadArgument(
- f"Failed to read Intersphinx inventory from URL `{url}`. "
- "Are you sure that it's a valid inventory file?"
- )
- return url
-
-
-class Doc(commands.Cog):
- """A set of commands for querying & displaying documentation."""
-
- def __init__(self, bot: Bot):
- self.base_urls = {}
- self.bot = bot
- self.inventories = {}
- self.renamed_symbols = set()
-
- self.bot.loop.create_task(self.init_refresh_inventory())
-
- async def init_refresh_inventory(self) -> None:
- """Refresh documentation inventory on cog initialization."""
- await self.bot.wait_until_guild_available()
- await self.refresh_inventory()
-
- async def update_single(
- self, package_name: str, base_url: str, inventory_url: str
- ) -> None:
- """
- Rebuild the inventory for a single package.
-
- Where:
- * `package_name` is the package name to use, appears in the log
- * `base_url` is the root documentation URL for the specified package, used to build
- absolute paths that link to specific symbols
- * `inventory_url` is the absolute URL to the intersphinx inventory, fetched by running
- `intersphinx.fetch_inventory` in an executor on the bot's event loop
- """
- self.base_urls[package_name] = base_url
-
- package = await self._fetch_inventory(inventory_url)
- if not package:
- return None
-
- for group, value in package.items():
- for symbol, (package_name, _version, relative_doc_url, _) in value.items():
- absolute_doc_url = base_url + relative_doc_url
-
- if symbol in self.inventories:
- group_name = group.split(":")[1]
- symbol_base_url = self.inventories[symbol].split("/", 3)[2]
- if (
- group_name in NO_OVERRIDE_GROUPS
- or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
- ):
-
- symbol = f"{group_name}.{symbol}"
- # If renamed `symbol` already exists, add library name in front to differentiate between them.
- if symbol in self.renamed_symbols:
- # Split `package_name` because of packages like Pillow that have spaces in them.
- symbol = f"{package_name.split()[0]}.{symbol}"
-
- self.inventories[symbol] = absolute_doc_url
- self.renamed_symbols.add(symbol)
- continue
-
- self.inventories[symbol] = absolute_doc_url
-
- log.trace(f"Fetched inventory for {package_name}.")
-
- async def refresh_inventory(self) -> None:
- """Refresh internal documentation inventory."""
- log.debug("Refreshing documentation inventory...")
-
- # Clear the old base URLS and inventories to ensure
- # that we start from a fresh local dataset.
- # Also, reset the cache used for fetching documentation.
- self.base_urls.clear()
- self.inventories.clear()
- self.renamed_symbols.clear()
- symbol_cache.clear()
-
- # Run all coroutines concurrently - since each of them performs a HTTP
- # request, this speeds up fetching the inventory data heavily.
- coros = [
- self.update_single(
- package["package"], package["base_url"], package["inventory_url"]
- ) for package in await self.bot.api_client.get('bot/documentation-links')
- ]
- await asyncio.gather(*coros)
-
- async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]:
- """
- Given a Python symbol, return its signature and description.
-
- The first tuple element is the signature of the given symbol as a markup-free string, and
- the second tuple element is the description of the given symbol with HTML markup included.
-
- If the given symbol is a module, returns a tuple `(None, str)`
- else if the symbol could not be found, returns `None`.
- """
- url = self.inventories.get(symbol)
- if url is None:
- return None
-
- async with self.bot.http_session.get(url) as response:
- html = await response.text(encoding='utf-8')
-
- # Find the signature header and parse the relevant parts.
- symbol_id = url.split('#')[-1]
- soup = BeautifulSoup(html, 'lxml')
- symbol_heading = soup.find(id=symbol_id)
- search_html = str(soup)
-
- if symbol_heading is None:
- return None
-
- if symbol_id == f"module-{symbol}":
- # Get page content from the module headerlink to the
- # first tag that has its class in `SEARCH_END_TAG_ATTRS`
- start_tag = symbol_heading.find("a", attrs={"class": "headerlink"})
- if start_tag is None:
- return [], ""
-
- end_tag = start_tag.find_next(self._match_end_tag)
- if end_tag is None:
- return [], ""
-
- description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent))
- description_end_index = search_html.find(str(end_tag))
- description = search_html[description_start_index:description_end_index]
- signatures = None
-
- else:
- signatures = []
- description = str(symbol_heading.find_next_sibling("dd"))
- description_pos = search_html.find(description)
- # Get text of up to 3 signatures, remove unwanted symbols
- for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2):
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
- if signature and search_html.find(str(element)) < description_pos:
- signatures.append(signature)
-
- return signatures, description.replace('¶', '')
-
- @symbol_cache(arg_offset=1)
- async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
- """
- Attempt to scrape and fetch the data for the given `symbol`, and build an embed from its contents.
-
- If the symbol is known, an Embed with documentation about it is returned.
- """
- scraped_html = await self.get_symbol_html(symbol)
- if scraped_html is None:
- return None
-
- signatures = scraped_html[0]
- permalink = self.inventories[symbol]
- description = markdownify(scraped_html[1])
-
- # Truncate the description of the embed to the last occurrence
- # of a double newline (interpreted as a paragraph) before index 1000.
- if len(description) > 1000:
- shortened = description[:1000]
- description_cutoff = shortened.rfind('\n\n', 100)
- if description_cutoff == -1:
- # Search the shortened version for cutoff points in decreasing desirability,
- # cutoff at 1000 if none are found.
- for string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(string)
- if description_cutoff != -1:
- break
- else:
- description_cutoff = 1000
- description = description[:description_cutoff]
-
- # If there is an incomplete code block, cut it out
- if description.count("```") % 2:
- codeblock_start = description.rfind('```py')
- description = description[:codeblock_start].rstrip()
- description += f"... [read more]({permalink})"
-
- description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
- if signatures is None:
- # If symbol is a module, don't show signature.
- embed_description = description
-
- elif not signatures:
- # It's some "meta-page", for example:
- # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views
- embed_description = "This appears to be a generic page not tied to a specific symbol."
-
- else:
- embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
- embed_description += f"\n{description}"
-
- embed = discord.Embed(
- title=f'`{symbol}`',
- url=permalink,
- description=embed_description
- )
- # Show all symbols with the same name that were renamed in the footer.
- embed.set_footer(
- text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}"))
- )
- return embed
-
- @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
- async def docs_group(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None:
- """Lookup documentation for Python symbols."""
- await self.get_command(ctx, symbol)
-
- @docs_group.command(name='get', aliases=('g',))
- async def get_command(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None:
- """
- Return a documentation embed for a given symbol.
-
- If no symbol is given, return a list of all available inventories.
-
- Examples:
- !docs
- !docs aiohttp
- !docs aiohttp.ClientSession
- !docs get aiohttp.ClientSession
- """
- if symbol is None:
- inventory_embed = discord.Embed(
- title=f"All inventories (`{len(self.base_urls)}` total)",
- colour=discord.Colour.blue()
- )
-
- lines = sorted(f"• [`{name}`]({url})" for name, url in self.base_urls.items())
- if self.base_urls:
- await LinePaginator.paginate(lines, ctx, inventory_embed, max_size=400, empty=False)
-
- else:
- inventory_embed.description = "Hmmm, seems like there's nothing here yet."
- await ctx.send(embed=inventory_embed)
-
- else:
- # Fetching documentation for a symbol (at least for the first time, since
- # caching is used) takes quite some time, so let's send typing to indicate
- # that we got the command, but are still working on it.
- async with ctx.typing():
- doc_embed = await self.get_symbol_embed(symbol)
-
- if doc_embed is None:
- error_embed = discord.Embed(
- description=f"Sorry, I could not find any documentation for `{symbol}`.",
- colour=discord.Colour.red()
- )
- error_message = await ctx.send(embed=error_embed)
- with suppress(NotFound):
- await error_message.delete(delay=NOT_FOUND_DELETE_DELAY)
- await ctx.message.delete(delay=NOT_FOUND_DELETE_DELAY)
- else:
- msg = await ctx.send(embed=doc_embed)
- await wait_for_deletion(msg, (ctx.author.id,))
-
- @docs_group.command(name='set', aliases=('s',))
- @commands.has_any_role(*MODERATION_ROLES)
- async def set_command(
- self, ctx: commands.Context, package_name: ValidPythonIdentifier,
- base_url: ValidURL, inventory_url: InventoryURL
- ) -> None:
- """
- Adds a new documentation metadata object to the site's database.
-
- The database will update the object, should an existing item with the specified `package_name` already exist.
-
- Example:
- !docs set \
- python \
- https://docs.python.org/3/ \
- https://docs.python.org/3/objects.inv
- """
- body = {
- 'package': package_name,
- 'base_url': base_url,
- 'inventory_url': inventory_url
- }
- await self.bot.api_client.post('bot/documentation-links', json=body)
-
- log.info(
- f"User @{ctx.author} ({ctx.author.id}) added a new documentation package:\n"
- f"Package name: {package_name}\n"
- f"Base url: {base_url}\n"
- f"Inventory URL: {inventory_url}"
- )
-
- # Rebuilding the inventory can take some time, so lets send out a
- # typing event to show that the Bot is still working.
- async with ctx.typing():
- await self.refresh_inventory()
- await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
-
- @docs_group.command(name='delete', aliases=('remove', 'rm', 'd'))
- @commands.has_any_role(*MODERATION_ROLES)
- async def delete_command(self, ctx: commands.Context, package_name: ValidPythonIdentifier) -> None:
- """
- Removes the specified package from the database.
-
- Examples:
- !docs delete aiohttp
- """
- await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
-
- async with ctx.typing():
- # Rebuild the inventory to ensure that everything
- # that was from this package is properly deleted.
- await self.refresh_inventory()
- await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
-
- @docs_group.command(name="refresh", aliases=("rfsh", "r"))
- @commands.has_any_role(*MODERATION_ROLES)
- async def refresh_command(self, ctx: commands.Context) -> None:
- """Refresh inventories and send differences to channel."""
- old_inventories = set(self.base_urls)
- with ctx.typing():
- await self.refresh_inventory()
- # Get differences of added and removed inventories
- added = ', '.join(inv for inv in self.base_urls if inv not in old_inventories)
- if added:
- added = f"+ {added}"
-
- removed = ', '.join(inv for inv in old_inventories if inv not in self.base_urls)
- if removed:
- removed = f"- {removed}"
-
- embed = discord.Embed(
- title="Inventories refreshed",
- description=f"```diff\n{added}\n{removed}```" if added or removed else ""
- )
- await ctx.send(embed=embed)
-
- async def _fetch_inventory(self, inventory_url: str) -> Optional[dict]:
- """Get and return inventory from `inventory_url`. If fetching fails, return None."""
- fetch_func = functools.partial(intersphinx.fetch_inventory, SPHINX_MOCK_APP, '', inventory_url)
- for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1):
- try:
- package = await self.bot.loop.run_in_executor(None, fetch_func)
- except ConnectTimeout:
- log.error(
- f"Fetching of inventory {inventory_url} timed out,"
- f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
- )
- except ProtocolError:
- log.error(
- f"Connection lost while fetching inventory {inventory_url},"
- f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
- )
- except HTTPError as e:
- log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.")
- return None
- except ConnectionError:
- log.error(f"Couldn't establish connection to inventory {inventory_url}.")
- return None
- else:
- return package
- log.error(f"Fetching of inventory {inventory_url} failed.")
- return None
-
- @staticmethod
- def _match_end_tag(tag: Tag) -> bool:
- """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
- for attr in SEARCH_END_TAG_ATTRS:
- if attr in tag.get("class", ()):
- return True
-
- return tag.name == "table"
-
-
-def setup(bot: Bot) -> None:
- """Load the Doc cog."""
- bot.add_cog(Doc(bot))
diff --git a/bot/exts/info/doc/__init__.py b/bot/exts/info/doc/__init__.py
new file mode 100644
index 000000000..38a8975c0
--- /dev/null
+++ b/bot/exts/info/doc/__init__.py
@@ -0,0 +1,16 @@
+from bot.bot import Bot
+from ._redis_cache import DocRedisCache
+
+MAX_SIGNATURE_AMOUNT = 3
+PRIORITY_PACKAGES = (
+ "python",
+)
+NAMESPACE = "doc"
+
+doc_cache = DocRedisCache(namespace=NAMESPACE)
+
+
+def setup(bot: Bot) -> None:
+ """Load the Doc cog."""
+ from ._cog import DocCog
+ bot.add_cog(DocCog(bot))
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py
new file mode 100644
index 000000000..369bb462c
--- /dev/null
+++ b/bot/exts/info/doc/_batch_parser.py
@@ -0,0 +1,186 @@
+from __future__ import annotations
+
+import asyncio
+import collections
+import logging
+from collections import defaultdict
+from contextlib import suppress
+from operator import attrgetter
+from typing import Deque, Dict, List, NamedTuple, Optional, Union
+
+import discord
+from bs4 import BeautifulSoup
+
+import bot
+from bot.constants import Channels
+from bot.utils import scheduling
+from . import _cog, doc_cache
+from ._parsing import get_symbol_markdown
+
+log = logging.getLogger(__name__)
+
+
+class StaleInventoryNotifier:
+ """Handle sending notifications about stale inventories through `DocItem`s to dev log."""
+
+ def __init__(self):
+ self._init_task = bot.instance.loop.create_task(
+ self._init_channel(),
+ name="StaleInventoryNotifier channel init"
+ )
+ self._warned_urls = set()
+
+ async def _init_channel(self) -> None:
+ """Wait for guild and get channel."""
+ await bot.instance.wait_until_guild_available()
+ self._dev_log = bot.instance.get_channel(Channels.dev_log)
+
+ async def send_warning(self, doc_item: _cog.DocItem) -> None:
+ """Send a warning to dev log if one wasn't already sent for `item`'s url."""
+ if doc_item.url not in self._warned_urls:
+ self._warned_urls.add(doc_item.url)
+ await self._init_task
+ embed = discord.Embed(
+ description=f"Doc item `{doc_item.symbol_id=}` present in loaded documentation inventories "
+ f"not found on [site]({doc_item.url}), inventories may need to be refreshed."
+ )
+ await self._dev_log.send(embed=embed)
+
+
+class QueueItem(NamedTuple):
+ """Contains a `DocItem` and the `BeautifulSoup` object needed to parse it."""
+
+ doc_item: _cog.DocItem
+ soup: BeautifulSoup
+
+ def __eq__(self, other: Union[QueueItem, _cog.DocItem]):
+ if isinstance(other, _cog.DocItem):
+ return self.doc_item == other
+ return NamedTuple.__eq__(self, other)
+
+
+class ParseResultFuture(asyncio.Future):
+ """
+ Future with metadata for the parser class.
+
+ `user_requested` is set by the parser when a Future is requested by an user and moved to the front,
+ allowing the futures to only be waited for when clearing if they were user requested.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.user_requested = False
+
+
+class BatchParser:
+ """
+ Get the Markdown of all symbols on a page and send them to redis when a symbol is requested.
+
+ DocItems are added through the `add_item` method which adds them to the `_page_doc_items` dict.
+ `get_markdown` is used to fetch the Markdown; when this is used for the first time on a page,
+ all of the symbols are queued to be parsed to avoid multiple web requests to the same page.
+ """
+
+ def __init__(self):
+ self._queue: Deque[QueueItem] = collections.deque()
+ self._page_doc_items: Dict[str, List[_cog.DocItem]] = defaultdict(list)
+ self._item_futures: Dict[_cog.DocItem, ParseResultFuture] = defaultdict(ParseResultFuture)
+ self._parse_task = None
+
+ self.stale_inventory_notifier = StaleInventoryNotifier()
+
+ async def get_markdown(self, doc_item: _cog.DocItem) -> Optional[str]:
+ """
+ Get the result Markdown of `doc_item`.
+
+ If no symbols were fetched from `doc_item`s page before,
+ the HTML has to be fetched and then all items from the page are put into the parse queue.
+
+ Not safe to run while `self.clear` is running.
+ """
+ if doc_item not in self._item_futures and doc_item not in self._queue:
+ self._item_futures[doc_item].user_requested = True
+
+ async with bot.instance.http_session.get(doc_item.url) as response:
+ soup = await bot.instance.loop.run_in_executor(
+ None,
+ BeautifulSoup,
+ await response.text(encoding="utf8"),
+ "lxml",
+ )
+
+ self._queue.extendleft(QueueItem(item, soup) for item in self._page_doc_items[doc_item.url])
+ log.debug(f"Added items from {doc_item.url} to the parse queue.")
+
+ if self._parse_task is None:
+ self._parse_task = scheduling.create_task(self._parse_queue(), name="Queue parse")
+ else:
+ self._item_futures[doc_item].user_requested = True
+ with suppress(ValueError):
+ # If the item is not in the queue then the item is already parsed or is being parsed
+ self._move_to_front(doc_item)
+ return await self._item_futures[doc_item]
+
+ async def _parse_queue(self) -> None:
+ """
+ Parse all items from the queue, setting their result Markdown on the futures and sending them to redis.
+
+ The coroutine will run as long as the queue is not empty, resetting `self._parse_task` to None when finished.
+ """
+ log.trace("Starting queue parsing.")
+ try:
+ while self._queue:
+ item, soup = self._queue.pop()
+ markdown = None
+
+ if (future := self._item_futures[item]).done():
+ # Some items are present in the inventories multiple times under different symbol names,
+ # if we already parsed an equal item, we can just skip it.
+ continue
+
+ try:
+ markdown = await bot.instance.loop.run_in_executor(None, get_symbol_markdown, soup, item)
+ if markdown is not None:
+ await doc_cache.set(item, markdown)
+ else:
+ # Don't wait for this coro as the parsing doesn't depend on anything it does.
+ scheduling.create_task(
+ self.stale_inventory_notifier.send_warning(item), name="Stale inventory warning"
+ )
+ except Exception:
+ log.exception(f"Unexpected error when handling {item}")
+ future.set_result(markdown)
+ del self._item_futures[item]
+ await asyncio.sleep(0.1)
+ finally:
+ self._parse_task = None
+ log.trace("Finished parsing queue.")
+
+ def _move_to_front(self, item: Union[QueueItem, _cog.DocItem]) -> None:
+ """Move `item` to the front of the parse queue."""
+ # The parse queue stores soups along with the doc symbols in QueueItem objects,
+ # in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
+ item_index = self._queue.index(item)
+ queue_item = self._queue[item_index]
+ del self._queue[item_index]
+
+ self._queue.append(queue_item)
+ log.trace(f"Moved {item} to the front of the queue.")
+
+ def add_item(self, doc_item: _cog.DocItem) -> None:
+ """Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
+ self._page_doc_items[doc_item.url].append(doc_item)
+
+ async def clear(self) -> None:
+ """
+ Clear all internal symbol data.
+
+ Wait for all user-requested symbols to be parsed before clearing the parser.
+ """
+ for future in filter(attrgetter("user_requested"), self._item_futures.values()):
+ await future
+ if self._parse_task is not None:
+ self._parse_task.cancel()
+ self._queue.clear()
+ self._page_doc_items.clear()
+ self._item_futures.clear()
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
new file mode 100644
index 000000000..2a8016fb8
--- /dev/null
+++ b/bot/exts/info/doc/_cog.py
@@ -0,0 +1,442 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import sys
+import textwrap
+from collections import defaultdict
+from contextlib import suppress
+from types import SimpleNamespace
+from typing import Dict, NamedTuple, Optional, Tuple, Union
+
+import aiohttp
+import discord
+from discord.ext import commands
+
+from bot.bot import Bot
+from bot.constants import MODERATION_ROLES, RedirectOutput
+from bot.converters import Inventory, PackageName, ValidURL, allowed_strings
+from bot.pagination import LinePaginator
+from bot.utils.lock import SharedEvent, lock
+from bot.utils.messages import send_denial, wait_for_deletion
+from bot.utils.scheduling import Scheduler
+from . import NAMESPACE, PRIORITY_PACKAGES, _batch_parser, doc_cache
+from ._inventory_parser import InventoryDict, fetch_inventory
+
+log = logging.getLogger(__name__)
+
+# symbols with a group contained here will get the group prefixed on duplicates
+FORCE_PREFIX_GROUPS = (
+ "2to3fixer",
+ "token",
+ "label",
+ "pdbcommand",
+ "term",
+)
+NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+# Delay to wait before trying to reach a rescheduled inventory again, in minutes
+FETCH_RESCHEDULE_DELAY = SimpleNamespace(first=2, repeated=5)
+
+COMMAND_LOCK_SINGLETON = "inventory refresh"
+
+
+class DocItem(NamedTuple):
+ """Holds inventory symbol information."""
+
+ package: str # Name of the package name the symbol is from
+ group: str # Interpshinx "role" of the symbol, for example `label` or `method`
+ base_url: str # Absolute path to to which the relative path resolves, same for all items with the same package
+ relative_url_path: str # Relative path to the page where the symbol is located
+ symbol_id: str # Fragment id used to locate the symbol on the page
+
+ @property
+ def url(self) -> str:
+ """Return the absolute url to the symbol."""
+ return self.base_url + self.relative_url_path
+
+
+class DocCog(commands.Cog):
+ """A set of commands for querying & displaying documentation."""
+
+ def __init__(self, bot: Bot):
+ # Contains URLs to documentation home pages.
+ # Used to calculate inventory diffs on refreshes and to display all currently stored inventories.
+ self.base_urls = {}
+ self.bot = bot
+ self.doc_symbols: Dict[str, DocItem] = {} # Maps symbol names to objects containing their metadata.
+ self.item_fetcher = _batch_parser.BatchParser()
+ # Maps a conflicting symbol name to a list of the new, disambiguated names created from conflicts with the name.
+ self.renamed_symbols = defaultdict(list)
+
+ self.inventory_scheduler = Scheduler(self.__class__.__name__)
+
+ self.refresh_event = asyncio.Event()
+ self.refresh_event.set()
+ self.symbol_get_event = SharedEvent()
+
+ self.init_refresh_task = self.bot.loop.create_task(
+ self.init_refresh_inventory(),
+ name="Doc inventory init"
+ )
+
+ @lock(NAMESPACE, COMMAND_LOCK_SINGLETON, raise_error=True)
+ async def init_refresh_inventory(self) -> None:
+ """Refresh documentation inventory on cog initialization."""
+ await self.bot.wait_until_guild_available()
+ await self.refresh_inventories()
+
+ def update_single(self, package_name: str, base_url: str, inventory: InventoryDict) -> None:
+ """
+ Build the inventory for a single package.
+
+ Where:
+ * `package_name` is the package name to use in logs and when qualifying symbols
+ * `base_url` is the root documentation URL for the specified package, used to build
+ absolute paths that link to specific symbols
+ * `package` is the content of a intersphinx inventory.
+ """
+ self.base_urls[package_name] = base_url
+
+ for group, items in inventory.items():
+ for symbol_name, relative_doc_url in items:
+
+ # e.g. get 'class' from 'py:class'
+ group_name = group.split(":")[1]
+ symbol_name = self.ensure_unique_symbol_name(
+ package_name,
+ group_name,
+ symbol_name,
+ )
+
+ relative_url_path, _, symbol_id = relative_doc_url.partition("#")
+ # Intern fields that have shared content so we're not storing unique strings for every object
+ doc_item = DocItem(
+ package_name,
+ sys.intern(group_name),
+ base_url,
+ sys.intern(relative_url_path),
+ symbol_id,
+ )
+ self.doc_symbols[symbol_name] = doc_item
+ self.item_fetcher.add_item(doc_item)
+
+ log.trace(f"Fetched inventory for {package_name}.")
+
+ async def update_or_reschedule_inventory(
+ self,
+ api_package_name: str,
+ base_url: str,
+ inventory_url: str,
+ ) -> None:
+ """
+ Update the cog's inventories, or reschedule this method to execute again if the remote inventory is unreachable.
+
+ The first attempt is rescheduled to execute in `FETCH_RESCHEDULE_DELAY.first` minutes, the subsequent attempts
+ in `FETCH_RESCHEDULE_DELAY.repeated` minutes.
+ """
+ package = await fetch_inventory(inventory_url)
+
+ if not package:
+ if api_package_name in self.inventory_scheduler:
+ self.inventory_scheduler.cancel(api_package_name)
+ delay = FETCH_RESCHEDULE_DELAY.repeated
+ else:
+ delay = FETCH_RESCHEDULE_DELAY.first
+ log.info(f"Failed to fetch inventory; attempting again in {delay} minutes.")
+ self.inventory_scheduler.schedule_later(
+ delay*60,
+ api_package_name,
+ self.update_or_reschedule_inventory(api_package_name, base_url, inventory_url),
+ )
+ else:
+ self.update_single(api_package_name, base_url, package)
+
+ def ensure_unique_symbol_name(self, package_name: str, group_name: str, symbol_name: str) -> str:
+ """
+ Ensure `symbol_name` doesn't overwrite an another symbol in `doc_symbols`.
+
+ For conflicts, rename either the current symbol or the existing symbol with which it conflicts.
+ Store the new name in `renamed_symbols` and return the name to use for the symbol.
+
+ If the existing symbol was renamed or there was no conflict, the returned name is equivalent to `symbol_name`.
+ """
+ if (item := self.doc_symbols.get(symbol_name)) is None:
+ return symbol_name # There's no conflict so it's fine to simply use the given symbol name.
+
+ def rename(prefix: str, *, rename_extant: bool = False) -> str:
+ new_name = f"{prefix}.{symbol_name}"
+ if new_name in self.doc_symbols:
+ # If there's still a conflict, qualify the name further.
+ if rename_extant:
+ new_name = f"{item.package}.{item.group}.{symbol_name}"
+ else:
+ new_name = f"{package_name}.{group_name}.{symbol_name}"
+
+ self.renamed_symbols[symbol_name].append(new_name)
+
+ if rename_extant:
+ # Instead of renaming the current symbol, rename the symbol with which it conflicts.
+ self.doc_symbols[new_name] = self.doc_symbols[symbol_name]
+ return symbol_name
+ else:
+ return new_name
+
+ # Certain groups are added as prefixes to disambiguate the symbols.
+ if group_name in FORCE_PREFIX_GROUPS:
+ return rename(group_name)
+
+ # The existing symbol with which the current symbol conflicts should have a group prefix.
+ # It currently doesn't have the group prefix because it's only added once there's a conflict.
+ elif item.group in FORCE_PREFIX_GROUPS:
+ return rename(item.group, rename_extant=True)
+
+ elif package_name in PRIORITY_PACKAGES:
+ return rename(item.package, rename_extant=True)
+
+ # If we can't specially handle the symbol through its group or package,
+ # fall back to prepending its package name to the front.
+ else:
+ return rename(package_name)
+
+ async def refresh_inventories(self) -> None:
+ """Refresh internal documentation inventories."""
+ self.refresh_event.clear()
+ await self.symbol_get_event.wait()
+ log.debug("Refreshing documentation inventory...")
+ self.inventory_scheduler.cancel_all()
+
+ self.base_urls.clear()
+ self.doc_symbols.clear()
+ self.renamed_symbols.clear()
+ await self.item_fetcher.clear()
+
+ coros = [
+ self.update_or_reschedule_inventory(
+ package["package"], package["base_url"], package["inventory_url"]
+ ) for package in await self.bot.api_client.get("bot/documentation-links")
+ ]
+ await asyncio.gather(*coros)
+ log.debug("Finished inventory refresh.")
+ self.refresh_event.set()
+
+ def get_symbol_item(self, symbol_name: str) -> Tuple[str, Optional[DocItem]]:
+ """
+ Get the `DocItem` and the symbol name used to fetch it from the `doc_symbols` dict.
+
+ If the doc item is not found directly from the passed in name and the name contains a space,
+ the first word of the name will be attempted to be used to get the item.
+ """
+ doc_item = self.doc_symbols.get(symbol_name)
+ if doc_item is None and " " in symbol_name:
+ symbol_name = symbol_name.split(" ", maxsplit=1)[0]
+ doc_item = self.doc_symbols.get(symbol_name)
+
+ return symbol_name, doc_item
+
+ async def get_symbol_markdown(self, doc_item: DocItem) -> str:
+ """
+ Get the Markdown from the symbol `doc_item` refers to.
+
+ First a redis lookup is attempted, if that fails the `item_fetcher`
+ is used to fetch the page and parse the HTML from it into Markdown.
+ """
+ markdown = await doc_cache.get(doc_item)
+
+ if markdown is None:
+ log.debug(f"Redis cache miss with {doc_item}.")
+ try:
+ markdown = await self.item_fetcher.get_markdown(doc_item)
+
+ except aiohttp.ClientError as e:
+ log.warning(f"A network error has occurred when requesting parsing of {doc_item}.", exc_info=e)
+ return "Unable to parse the requested symbol due to a network error."
+
+ except Exception:
+ log.exception(f"An unexpected error has occurred when requesting parsing of {doc_item}.")
+ return "Unable to parse the requested symbol due to an error."
+
+ if markdown is None:
+ return "Unable to parse the requested symbol."
+ return markdown
+
+ async def create_symbol_embed(self, symbol_name: str) -> Optional[discord.Embed]:
+ """
+ Attempt to scrape and fetch the data for the given `symbol_name`, and build an embed from its contents.
+
+ If the symbol is known, an Embed with documentation about it is returned.
+
+ First check the DocRedisCache before querying the cog's `BatchParser`.
+ """
+ log.trace(f"Building embed for symbol `{symbol_name}`")
+ if not self.refresh_event.is_set():
+ log.debug("Waiting for inventories to be refreshed before processing item.")
+ await self.refresh_event.wait()
+ # Ensure a refresh can't run in case of a context switch until the with block is exited
+ with self.symbol_get_event:
+ symbol_name, doc_item = self.get_symbol_item(symbol_name)
+ if doc_item is None:
+ log.debug("Symbol does not exist.")
+ return None
+
+ self.bot.stats.incr(f"doc_fetches.{doc_item.package}")
+
+ # Show all symbols with the same name that were renamed in the footer,
+ # with a max of 200 chars.
+ if symbol_name in self.renamed_symbols:
+ renamed_symbols = ", ".join(self.renamed_symbols[symbol_name])
+ footer_text = textwrap.shorten("Similar names: " + renamed_symbols, 200, placeholder=" ...")
+ else:
+ footer_text = ""
+
+ embed = discord.Embed(
+ title=discord.utils.escape_markdown(symbol_name),
+ url=f"{doc_item.url}#{doc_item.symbol_id}",
+ description=await self.get_symbol_markdown(doc_item)
+ )
+ embed.set_footer(text=footer_text)
+ return embed
+
+ @commands.group(name="docs", aliases=("doc", "d"), invoke_without_command=True)
+ async def docs_group(self, ctx: commands.Context, *, symbol_name: Optional[str]) -> None:
+ """Look up documentation for Python symbols."""
+ await self.get_command(ctx, symbol_name=symbol_name)
+
+ @docs_group.command(name="getdoc", aliases=("g",))
+ async def get_command(self, ctx: commands.Context, *, symbol_name: Optional[str]) -> None:
+ """
+ Return a documentation embed for a given symbol.
+
+ If no symbol is given, return a list of all available inventories.
+
+ Examples:
+ !docs
+ !docs aiohttp
+ !docs aiohttp.ClientSession
+ !docs getdoc aiohttp.ClientSession
+ """
+ if not symbol_name:
+ inventory_embed = discord.Embed(
+ title=f"All inventories (`{len(self.base_urls)}` total)",
+ colour=discord.Colour.blue()
+ )
+
+ lines = sorted(f"• [`{name}`]({url})" for name, url in self.base_urls.items())
+ if self.base_urls:
+ await LinePaginator.paginate(lines, ctx, inventory_embed, max_size=400, empty=False)
+
+ else:
+ inventory_embed.description = "Hmmm, seems like there's nothing here yet."
+ await ctx.send(embed=inventory_embed)
+
+ else:
+ symbol = symbol_name.strip("`")
+ async with ctx.typing():
+ doc_embed = await self.create_symbol_embed(symbol)
+
+ if doc_embed is None:
+ error_message = await send_denial(ctx, "No documentation found for the requested symbol.")
+ await wait_for_deletion(error_message, (ctx.author.id,), timeout=NOT_FOUND_DELETE_DELAY)
+ with suppress(discord.NotFound):
+ await ctx.message.delete()
+ with suppress(discord.NotFound):
+ await error_message.delete()
+ else:
+ msg = await ctx.send(embed=doc_embed)
+ await wait_for_deletion(msg, (ctx.author.id,))
+
+ @docs_group.command(name="setdoc", aliases=("s",))
+ @commands.has_any_role(*MODERATION_ROLES)
+ @lock(NAMESPACE, COMMAND_LOCK_SINGLETON, raise_error=True)
+ async def set_command(
+ self,
+ ctx: commands.Context,
+ package_name: PackageName,
+ base_url: ValidURL,
+ inventory: Inventory,
+ ) -> None:
+ """
+ Adds a new documentation metadata object to the site's database.
+
+ The database will update the object, should an existing item with the specified `package_name` already exist.
+
+ Example:
+ !docs setdoc \
+ python \
+ https://docs.python.org/3/ \
+ https://docs.python.org/3/objects.inv
+ """
+ if not base_url.endswith("/"):
+ raise commands.BadArgument("The base url must end with a slash.")
+ inventory_url, inventory_dict = inventory
+ body = {
+ "package": package_name,
+ "base_url": base_url,
+ "inventory_url": inventory_url
+ }
+ await self.bot.api_client.post("bot/documentation-links", json=body)
+
+ log.info(
+ f"User @{ctx.author} ({ctx.author.id}) added a new documentation package:\n"
+ + "\n".join(f"{key}: {value}" for key, value in body.items())
+ )
+
+ self.update_single(package_name, base_url, inventory_dict)
+ await ctx.send(f"Added the package `{package_name}` to the database and updated the inventories.")
+
+ @docs_group.command(name="deletedoc", aliases=("removedoc", "rm", "d"))
+ @commands.has_any_role(*MODERATION_ROLES)
+ @lock(NAMESPACE, COMMAND_LOCK_SINGLETON, raise_error=True)
+ async def delete_command(self, ctx: commands.Context, package_name: PackageName) -> None:
+ """
+ Removes the specified package from the database.
+
+ Example:
+ !docs deletedoc aiohttp
+ """
+ await self.bot.api_client.delete(f"bot/documentation-links/{package_name}")
+
+ async with ctx.typing():
+ await self.refresh_inventories()
+ await doc_cache.delete(package_name)
+ await ctx.send(f"Successfully deleted `{package_name}` and refreshed the inventories.")
+
+ @docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
+ @commands.has_any_role(*MODERATION_ROLES)
+ @lock(NAMESPACE, COMMAND_LOCK_SINGLETON, raise_error=True)
+ async def refresh_command(self, ctx: commands.Context) -> None:
+ """Refresh inventories and show the difference."""
+ old_inventories = set(self.base_urls)
+ with ctx.typing():
+ await self.refresh_inventories()
+ new_inventories = set(self.base_urls)
+
+ if added := ", ".join(new_inventories - old_inventories):
+ added = "+ " + added
+
+ if removed := ", ".join(old_inventories - new_inventories):
+ removed = "- " + removed
+
+ embed = discord.Embed(
+ title="Inventories refreshed",
+ description=f"```diff\n{added}\n{removed}```" if added or removed else ""
+ )
+ await ctx.send(embed=embed)
+
+ @docs_group.command(name="cleardoccache", aliases=("deletedoccache",))
+ @commands.has_any_role(*MODERATION_ROLES)
+ async def clear_cache_command(
+ self,
+ ctx: commands.Context,
+ package_name: Union[PackageName, allowed_strings("*")] # noqa: F722
+ ) -> None:
+ """Clear the persistent redis cache for `package`."""
+ if await doc_cache.delete(package_name):
+ await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
+ else:
+ await ctx.send("No keys matching the package found.")
+
+ def cog_unload(self) -> None:
+ """Clear scheduled inventories, queued symbols and cleanup task on cog unload."""
+ self.inventory_scheduler.cancel_all()
+ self.init_refresh_task.cancel()
+ asyncio.create_task(self.item_fetcher.clear(), name="DocCog.item_fetcher unload clear")
diff --git a/bot/exts/info/doc/_html.py b/bot/exts/info/doc/_html.py
new file mode 100644
index 000000000..94efd81b7
--- /dev/null
+++ b/bot/exts/info/doc/_html.py
@@ -0,0 +1,136 @@
+import logging
+import re
+from functools import partial
+from typing import Callable, Container, Iterable, List, Union
+
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString, PageElement, SoupStrainer, Tag
+
+from . import MAX_SIGNATURE_AMOUNT
+
+log = logging.getLogger(__name__)
+
+_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+_SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
+
+
+class Strainer(SoupStrainer):
+ """Subclass of SoupStrainer to allow matching of both `Tag`s and `NavigableString`s."""
+
+ def __init__(self, *, include_strings: bool, **kwargs):
+ self.include_strings = include_strings
+ passed_text = kwargs.pop("text", None)
+ if passed_text is not None:
+ log.warning("`text` is not a supported kwarg in the custom strainer.")
+ super().__init__(**kwargs)
+
+ Markup = Union[PageElement, List["Markup"]]
+
+ def search(self, markup: Markup) -> Union[PageElement, str]:
+ """Extend default SoupStrainer behaviour to allow matching both `Tag`s` and `NavigableString`s."""
+ if isinstance(markup, str):
+ # Let everything through the text filter if we're including strings and tags.
+ if not self.name and not self.attrs and self.include_strings:
+ return markup
+ else:
+ return super().search(markup)
+
+
+def _find_elements_until_tag(
+ start_element: PageElement,
+ end_tag_filter: Union[Container[str], Callable[[Tag], bool]],
+ *,
+ func: Callable,
+ include_strings: bool = False,
+ limit: int = None,
+) -> List[Union[Tag, NavigableString]]:
+ """
+ Get all elements up to `limit` or until a tag matching `end_tag_filter` is found.
+
+ `end_tag_filter` can be either a container of string names to check against,
+ or a filtering callable that's applied to tags.
+
+ When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
+
+ `func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
+ The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
+ """
+ use_container_filter = not callable(end_tag_filter)
+ elements = []
+
+ for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
+ if isinstance(element, Tag):
+ if use_container_filter:
+ if element.name in end_tag_filter:
+ break
+ elif end_tag_filter(element):
+ break
+ elements.append(element)
+
+ return elements
+
+
+_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
+_find_recursive_children_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_all)
+_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
+_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
+
+
+def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
+ """Create callable that returns True when the passed in tag's class is in `class_names` or when it's a table."""
+ def match_tag(tag: Tag) -> bool:
+ for attr in class_names:
+ if attr in tag.get("class", ()):
+ return True
+ return tag.name == "table"
+
+ return match_tag
+
+
+def get_general_description(start_element: Tag) -> List[Union[Tag, NavigableString]]:
+ """
+ Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
+
+ A headerlink tag is attempted to be found to skip repeating the symbol information in the description.
+ If it's found it's used as the tag to start the search from instead of the `start_element`.
+ """
+ child_tags = _find_recursive_children_until_tag(start_element, _class_filter_factory(["section"]), limit=100)
+ header = next(filter(_class_filter_factory(["headerlink"]), child_tags), None)
+ start_tag = header.parent if header is not None else start_element
+ return _find_next_siblings_until_tag(start_tag, _class_filter_factory(_SEARCH_END_TAG_ATTRS), include_strings=True)
+
+
+def get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
+ """Get the contents of the next dd tag, up to a dt or a dl tag."""
+ description_tag = symbol.find_next("dd")
+ return _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
+
+
+def get_signatures(start_signature: PageElement) -> List[str]:
+ """
+ Collect up to `_MAX_SIGNATURE_AMOUNT` signatures from dt tags around the `start_signature` dt tag.
+
+ First the signatures under the `start_signature` are included;
+ if less than 2 are found, tags above the start signature are added to the result if any are present.
+ """
+ signatures = []
+ for element in (
+ *reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
+ start_signature,
+ *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
+ )[-MAX_SIGNATURE_AMOUNT:]:
+ signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+ if signature:
+ signatures.append(signature)
+
+ return signatures
diff --git a/bot/exts/info/doc/_inventory_parser.py b/bot/exts/info/doc/_inventory_parser.py
new file mode 100644
index 000000000..80d5841a0
--- /dev/null
+++ b/bot/exts/info/doc/_inventory_parser.py
@@ -0,0 +1,126 @@
+import logging
+import re
+import zlib
+from collections import defaultdict
+from typing import AsyncIterator, DefaultDict, List, Optional, Tuple
+
+import aiohttp
+
+import bot
+
+log = logging.getLogger(__name__)
+
+FAILED_REQUEST_ATTEMPTS = 3
+_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)')
+
+InventoryDict = DefaultDict[str, List[Tuple[str, str]]]
+
+
+class ZlibStreamReader:
+ """Class used for decoding zlib data of a stream line by line."""
+
+ READ_CHUNK_SIZE = 16 * 1024
+
+ def __init__(self, stream: aiohttp.StreamReader) -> None:
+ self.stream = stream
+
+ async def _read_compressed_chunks(self) -> AsyncIterator[bytes]:
+ """Read zlib data in `READ_CHUNK_SIZE` sized chunks and decompress."""
+ decompressor = zlib.decompressobj()
+ async for chunk in self.stream.iter_chunked(self.READ_CHUNK_SIZE):
+ yield decompressor.decompress(chunk)
+
+ yield decompressor.flush()
+
+ async def __aiter__(self) -> AsyncIterator[str]:
+ """Yield lines of decompressed text."""
+ buf = b''
+ async for chunk in self._read_compressed_chunks():
+ buf += chunk
+ pos = buf.find(b'\n')
+ while pos != -1:
+ yield buf[:pos].decode()
+ buf = buf[pos + 1:]
+ pos = buf.find(b'\n')
+
+
+async def _load_v1(stream: aiohttp.StreamReader) -> InventoryDict:
+ invdata = defaultdict(list)
+
+ async for line in stream:
+ name, type_, location = line.decode().rstrip().split(maxsplit=2)
+ # version 1 did not add anchors to the location
+ if type_ == "mod":
+ type_ = "py:module"
+ location += "#module-" + name
+ else:
+ type_ = "py:" + type_
+ location += "#" + name
+ invdata[type_].append((name, location))
+ return invdata
+
+
+async def _load_v2(stream: aiohttp.StreamReader) -> InventoryDict:
+ invdata = defaultdict(list)
+
+ async for line in ZlibStreamReader(stream):
+ m = _V2_LINE_RE.match(line.rstrip())
+ name, type_, _prio, location, _dispname = m.groups() # ignore the parsed items we don't need
+ if location.endswith("$"):
+ location = location[:-1] + name
+
+ invdata[type_].append((name, location))
+ return invdata
+
+
+async def _fetch_inventory(url: str) -> InventoryDict:
+ """Fetch, parse and return an intersphinx inventory file from an url."""
+ timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
+ async with bot.instance.http_session.get(url, timeout=timeout, raise_for_status=True) as response:
+ stream = response.content
+
+ inventory_header = (await stream.readline()).decode().rstrip()
+ inventory_version = int(inventory_header[-1:])
+ await stream.readline() # skip project name
+ await stream.readline() # skip project version
+
+ if inventory_version == 1:
+ return await _load_v1(stream)
+
+ elif inventory_version == 2:
+ if b"zlib" not in await stream.readline():
+ raise ValueError(f"Invalid inventory file at url {url}.")
+ return await _load_v2(stream)
+
+ raise ValueError(f"Invalid inventory file at url {url}.")
+
+
+async def fetch_inventory(url: str) -> Optional[InventoryDict]:
+ """
+ Get an inventory dict from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors.
+
+ `url` should point at a valid sphinx objects.inv inventory file, which will be parsed into the
+ inventory dict in the format of {"domain:role": [("symbol_name", "relative_url_to_symbol"), ...], ...}
+ """
+ for attempt in range(1, FAILED_REQUEST_ATTEMPTS+1):
+ try:
+ inventory = await _fetch_inventory(url)
+ except aiohttp.ClientConnectorError:
+ log.warning(
+ f"Failed to connect to inventory url at {url}; "
+ f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
+ )
+ except aiohttp.ClientError:
+ log.error(
+ f"Failed to get inventory from {url}; "
+ f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
+ )
+ except Exception:
+ log.exception(
+ f"An unexpected error has occurred during fetching of {url}; "
+ f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
+ )
+ else:
+ return inventory
+
+ return None
diff --git a/bot/exts/info/doc/_markdown.py b/bot/exts/info/doc/_markdown.py
new file mode 100644
index 000000000..1b7d8232b
--- /dev/null
+++ b/bot/exts/info/doc/_markdown.py
@@ -0,0 +1,58 @@
+from urllib.parse import urljoin
+
+from bs4.element import PageElement
+from markdownify import MarkdownConverter
+
+
+class DocMarkdownConverter(MarkdownConverter):
+ """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+
+ def __init__(self, *, page_url: str, **options):
+ super().__init__(**options)
+ self.page_url = page_url
+
+ def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
+ """Fix markdownify's erroneous indexing in ol tags."""
+ parent = el.parent
+ if parent is not None and parent.name == "ol":
+ li_tags = parent.find_all("li")
+ bullet = f"{li_tags.index(el)+1}."
+ else:
+ depth = -1
+ while el:
+ if el.name == "ul":
+ depth += 1
+ el = el.parent
+ bullets = self.options["bullets"]
+ bullet = bullets[depth % len(bullets)]
+ return f"{bullet} {text}\n"
+
+ def convert_hn(self, _n: int, el: PageElement, text: str, convert_as_inline: bool) -> str:
+ """Convert h tags to bold text with ** instead of adding #."""
+ if convert_as_inline:
+ return text
+ return f"**{text}**\n\n"
+
+ def convert_code(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
+ """Undo `markdownify`s underscore escaping."""
+ return f"`{text}`".replace("\\", "")
+
+ def convert_pre(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
+ """Wrap any codeblocks in `py` for syntax highlighting."""
+ code = "".join(el.strings)
+ return f"```py\n{code}```"
+
+ def convert_a(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
+ """Resolve relative URLs to `self.page_url`."""
+ el["href"] = urljoin(self.page_url, el["href"])
+ return super().convert_a(el, text, convert_as_inline)
+
+ def convert_p(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
+ """Include only one newline instead of two when the parent is a li tag."""
+ if convert_as_inline:
+ return text
+
+ parent = el.parent
+ if parent is not None and parent.name == "li":
+ return f"{text}\n"
+ return super().convert_p(el, text, convert_as_inline)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
new file mode 100644
index 000000000..bf840b96f
--- /dev/null
+++ b/bot/exts/info/doc/_parsing.py
@@ -0,0 +1,256 @@
+from __future__ import annotations
+
+import logging
+import re
+import string
+import textwrap
+from collections import namedtuple
+from typing import Collection, Iterable, Iterator, List, Optional, TYPE_CHECKING, Union
+
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString, Tag
+
+from bot.utils.helpers import find_nth_occurrence
+from . import MAX_SIGNATURE_AMOUNT
+from ._html import get_dd_description, get_general_description, get_signatures
+from ._markdown import DocMarkdownConverter
+if TYPE_CHECKING:
+ from ._cog import DocItem
+
+log = logging.getLogger(__name__)
+
+_WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
+_PARAMETERS_RE = re.compile(r"\((.+)\)")
+
+_NO_SIGNATURE_GROUPS = {
+ "attribute",
+ "envvar",
+ "setting",
+ "tempaltefilter",
+ "templatetag",
+ "term",
+}
+_EMBED_CODE_BLOCK_LINE_LENGTH = 61
+# _MAX_SIGNATURE_AMOUNT code block wrapped lines with py syntax highlight
+_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * MAX_SIGNATURE_AMOUNT
+# Maximum embed description length - signatures on top
+_MAX_DESCRIPTION_LENGTH = 2048 - _MAX_SIGNATURES_LENGTH
+_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
+
+BracketPair = namedtuple("BracketPair", ["opening_bracket", "closing_bracket"])
+_BRACKET_PAIRS = {
+ "{": BracketPair("{", "}"),
+ "(": BracketPair("(", ")"),
+ "[": BracketPair("[", "]"),
+ "<": BracketPair("<", ">"),
+}
+
+
+def _split_parameters(parameters_string: str) -> Iterator[str]:
+ """
+ Split parameters of a signature into individual parameter strings on commas.
+
+ Long string literals are not accounted for.
+ """
+ last_split = 0
+ depth = 0
+ current_search: Optional[BracketPair] = None
+
+ enumerated_string = enumerate(parameters_string)
+ for index, character in enumerated_string:
+ if character in {"'", '"'}:
+ # Skip everything inside of strings, regardless of the depth.
+ quote_character = character # The closing quote must equal the opening quote.
+ preceding_backslashes = 0
+ for _, character in enumerated_string:
+ # If an odd number of backslashes precedes the quote, it was escaped.
+ if character == quote_character and not preceding_backslashes % 2:
+ break
+ if character == "\\":
+ preceding_backslashes += 1
+ else:
+ preceding_backslashes = 0
+
+ elif current_search is None:
+ if (current_search := _BRACKET_PAIRS.get(character)) is not None:
+ depth = 1
+ elif character == ",":
+ yield parameters_string[last_split:index]
+ last_split = index + 1
+
+ else:
+ if character == current_search.opening_bracket:
+ depth += 1
+
+ elif character == current_search.closing_bracket:
+ depth -= 1
+ if depth == 0:
+ current_search = None
+
+ yield parameters_string[last_split:]
+
+
+def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collection[str]]:
+ """
+ Truncate passed signatures to not exceed `_MAX_SIGNATURES_LENGTH`.
+
+ If the signatures need to be truncated, parameters are collapsed until they fit withing the limit.
+ Individual signatures can consist of max 1, 2, ..., `_MAX_SIGNATURE_AMOUNT` lines of text,
+ inversely proportional to the amount of signatures.
+ A maximum of `_MAX_SIGNATURE_AMOUNT` signatures is assumed to be passed.
+ """
+ if sum(len(signature) for signature in signatures) <= _MAX_SIGNATURES_LENGTH:
+ # Total length of signatures is under the length limit; no truncation needed.
+ return signatures
+
+ max_signature_length = _EMBED_CODE_BLOCK_LINE_LENGTH * (MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
+ formatted_signatures = []
+ for signature in signatures:
+ signature = signature.strip()
+ if len(signature) > max_signature_length:
+ if (parameters_match := _PARAMETERS_RE.search(signature)) is None:
+ # The signature has no parameters or the regex failed; perform a simple truncation of the text.
+ formatted_signatures.append(textwrap.shorten(signature, max_signature_length, placeholder="..."))
+ continue
+
+ truncated_signature = []
+ parameters_string = parameters_match[1]
+ running_length = len(signature) - len(parameters_string)
+ for parameter in _split_parameters(parameters_string):
+ # Check if including this parameter would still be within the maximum length.
+ if (len(parameter) + running_length) <= max_signature_length - 5: # account for comma and placeholder
+ truncated_signature.append(parameter)
+ running_length += len(parameter) + 1
+ else:
+ # There's no more room for this parameter. Truncate the parameter list and put it in the signature.
+ truncated_signature.append(" ...")
+ formatted_signatures.append(signature.replace(parameters_string, ",".join(truncated_signature)))
+ break
+ else:
+ # The current signature is under the length limit; no truncation needed.
+ formatted_signatures.append(signature)
+
+ return formatted_signatures
+
+
+def _get_truncated_description(
+ elements: Iterable[Union[Tag, NavigableString]],
+ markdown_converter: DocMarkdownConverter,
+ max_length: int,
+ max_lines: int,
+) -> str:
+ """
+ Truncate the Markdown from `elements` to be at most `max_length` characters when rendered or `max_lines` newlines.
+
+ `max_length` limits the length of the rendered characters in the string,
+ with the real string length limited to `_MAX_DESCRIPTION_LENGTH` to accommodate discord length limits.
+ """
+ result = ""
+ markdown_element_ends = [] # Stores indices into `result` which point to the end boundary of each Markdown element.
+ rendered_length = 0
+
+ tag_end_index = 0
+ for element in elements:
+ is_tag = isinstance(element, Tag)
+ element_length = len(element.text) if is_tag else len(element)
+
+ if rendered_length + element_length < max_length:
+ if is_tag:
+ element_markdown = markdown_converter.process_tag(element, convert_as_inline=False)
+ else:
+ element_markdown = markdown_converter.process_text(element)
+
+ rendered_length += element_length
+ tag_end_index += len(element_markdown)
+
+ if not element_markdown.isspace():
+ markdown_element_ends.append(tag_end_index)
+ result += element_markdown
+ else:
+ break
+
+ if not markdown_element_ends:
+ return ""
+
+ # Determine the "hard" truncation index. Account for the ellipsis placeholder for the max length.
+ newline_truncate_index = find_nth_occurrence(result, "\n", max_lines)
+ if newline_truncate_index is not None and newline_truncate_index < _MAX_DESCRIPTION_LENGTH - 3:
+ # Truncate based on maximum lines if there are more than the maximum number of lines.
+ truncate_index = newline_truncate_index
+ else:
+ # There are less than the maximum number of lines; truncate based on the max char length.
+ truncate_index = _MAX_DESCRIPTION_LENGTH - 3
+
+ # Nothing needs to be truncated if the last element ends before the truncation index.
+ if truncate_index >= markdown_element_ends[-1]:
+ return result
+
+ # Determine the actual truncation index.
+ possible_truncation_indices = [cut for cut in markdown_element_ends if cut < truncate_index]
+ if not possible_truncation_indices:
+ # In case there is no Markdown element ending before the truncation index, try to find a good cutoff point.
+ force_truncated = result[:truncate_index]
+ # If there is an incomplete codeblock, cut it out.
+ if force_truncated.count("```") % 2:
+ force_truncated = force_truncated[:force_truncated.rfind("```")]
+ # Search for substrings to truncate at, with decreasing desirability.
+ for string_ in ("\n\n", "\n", ". ", ", ", ",", " "):
+ cutoff = force_truncated.rfind(string_)
+
+ if cutoff != -1:
+ truncated_result = force_truncated[:cutoff]
+ break
+ else:
+ truncated_result = force_truncated
+
+ else:
+ # Truncate at the last Markdown element that comes before the truncation index.
+ markdown_truncate_index = possible_truncation_indices[-1]
+ truncated_result = result[:markdown_truncate_index]
+
+ return truncated_result.strip(_TRUNCATE_STRIP_CHARACTERS) + "..."
+
+
+def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str:
+ """
+ Create a Markdown string with the signatures at the top, and the converted html description below them.
+
+ The signatures are wrapped in python codeblocks, separated from the description by a newline.
+ The result Markdown string is max 750 rendered characters for the description with signatures at the start.
+ """
+ description = _get_truncated_description(
+ description,
+ markdown_converter=DocMarkdownConverter(bullets="•", page_url=url),
+ max_length=750,
+ max_lines=13
+ )
+ description = _WHITESPACE_AFTER_NEWLINES_RE.sub("", description)
+ if signatures is not None:
+ signature = "".join(f"```py\n{signature}```" for signature in _truncate_signatures(signatures))
+ return f"{signature}\n{description}"
+ else:
+ return description
+
+
+def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[str]:
+ """
+ Return parsed Markdown of the passed item using the passed in soup, truncated to fit within a discord message.
+
+ The method of parsing and what information gets included depends on the symbol's group.
+ """
+ symbol_heading = soup.find(id=symbol_data.symbol_id)
+ if symbol_heading is None:
+ return None
+ signature = None
+ # Modules, doc pages and labels don't point to description list tags but to tags like divs,
+ # no special parsing can be done so we only try to include what's under them.
+ if symbol_heading.name != "dt":
+ description = get_general_description(symbol_heading)
+
+ elif symbol_data.group in _NO_SIGNATURE_GROUPS:
+ description = get_dd_description(symbol_heading)
+
+ else:
+ signature = get_signatures(symbol_heading)
+ description = get_dd_description(symbol_heading)
+ return _create_markdown(signature, description, symbol_data.url).replace("¶", "").strip()
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
new file mode 100644
index 000000000..ad764816f
--- /dev/null
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import datetime
+from typing import Optional, TYPE_CHECKING
+
+from async_rediscache.types.base import RedisObject, namespace_lock
+if TYPE_CHECKING:
+ from ._cog import DocItem
+
+WEEK_SECONDS = datetime.timedelta(weeks=1).total_seconds()
+
+
+class DocRedisCache(RedisObject):
+ """Interface for redis functionality needed by the Doc cog."""
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._set_expires = set()
+
+ @namespace_lock
+ async def set(self, item: DocItem, value: str) -> None:
+ """
+ Set the Markdown `value` for the symbol `item`.
+
+ All keys from a single page are stored together, expiring a week after the first set.
+ """
+ url_key = remove_suffix(item.relative_url_path, ".html")
+ redis_key = f"{self.namespace}:{item.package}:{url_key}"
+ needs_expire = False
+
+ with await self._get_pool_connection() as connection:
+ if redis_key not in self._set_expires:
+ # An expire is only set if the key didn't exist before.
+ # If this is the first time setting values for this key check if it exists and add it to
+ # `_set_expires` to prevent redundant checks for subsequent uses with items from the same page.
+ self._set_expires.add(redis_key)
+ needs_expire = not await connection.exists(redis_key)
+
+ await connection.hset(redis_key, item.symbol_id, value)
+ if needs_expire:
+ await connection.expire(redis_key, WEEK_SECONDS)
+
+ @namespace_lock
+ async def get(self, item: DocItem) -> Optional[str]:
+ """Return the Markdown content of the symbol `item` if it exists."""
+ url_key = remove_suffix(item.relative_url_path, ".html")
+
+ with await self._get_pool_connection() as connection:
+ return await connection.hget(f"{self.namespace}:{item.package}:{url_key}", item.symbol_id, encoding="utf8")
+
+ @namespace_lock
+ async def delete(self, package: str) -> bool:
+ """Remove all values for `package`; return True if at least one key was deleted, False otherwise."""
+ with await self._get_pool_connection() as connection:
+ package_keys = [
+ package_key async for package_key in connection.iscan(match=f"{self.namespace}:{package}:*")
+ ]
+ if package_keys:
+ await connection.delete(*package_keys)
+ return True
+ return False
+
+
+def remove_suffix(string: str, suffix: str) -> str:
+ """Remove `suffix` from end of `string`."""
+ # TODO replace usages with str.removesuffix on 3.9
+ if string.endswith(suffix):
+ return string[:-len(suffix)]
+ else:
+ return string
diff --git a/bot/exts/info/source.py b/bot/exts/info/source.py
index dc452d21d..ef07c77a1 100644
--- a/bot/exts/info/source.py
+++ b/bot/exts/info/source.py
@@ -69,7 +69,8 @@ class BotSource(commands.Cog):
Raise BadArgument if `source_item` is a dynamically-created object (e.g. via internal eval).
"""
if isinstance(source_item, commands.Command):
- src = source_item.callback.__code__
+ source_item = inspect.unwrap(source_item.callback)
+ src = source_item.__code__
filename = src.co_filename
elif isinstance(source_item, str):
tags_cog = self.bot.get_cog("Tags")
diff --git a/bot/pagination.py b/bot/pagination.py
index 3b16cc9ff..c5c84afd9 100644
--- a/bot/pagination.py
+++ b/bot/pagination.py
@@ -2,14 +2,14 @@ import asyncio
import logging
import typing as t
from contextlib import suppress
+from functools import partial
import discord
-from discord import Member
from discord.abc import User
from discord.ext.commands import Context, Paginator
from bot import constants
-from bot.constants import MODERATION_ROLES
+from bot.utils import messages
FIRST_EMOJI = "\u23EE" # [:track_previous:]
LEFT_EMOJI = "\u2B05" # [:arrow_left:]
@@ -220,29 +220,6 @@ class LinePaginator(Paginator):
>>> embed.set_author(name="Some Operation", url=url, icon_url=icon)
>>> await LinePaginator.paginate([line for line in lines], ctx, embed)
"""
- def event_check(reaction_: discord.Reaction, user_: discord.Member) -> bool:
- """Make sure that this reaction is what we want to operate on."""
- no_restrictions = (
- # The reaction was by a whitelisted user
- user_.id == restrict_to_user.id
- # The reaction was by a moderator
- or isinstance(user_, Member) and any(role.id in MODERATION_ROLES for role in user_.roles)
- )
-
- return (
- # Conditions for a successful pagination:
- all((
- # Reaction is on this message
- reaction_.message.id == message.id,
- # Reaction is one of the pagination emotes
- str(reaction_.emoji) in PAGINATION_EMOJI,
- # Reaction was not made by the Bot
- user_.id != ctx.bot.user.id,
- # There were no restrictions
- no_restrictions
- ))
- )
-
paginator = cls(prefix=prefix, suffix=suffix, max_size=max_size, max_lines=max_lines,
scale_to_size=scale_to_size)
current_page = 0
@@ -303,9 +280,16 @@ class LinePaginator(Paginator):
log.trace(f"Adding reaction: {repr(emoji)}")
await message.add_reaction(emoji)
+ check = partial(
+ messages.reaction_check,
+ message_id=message.id,
+ allowed_emoji=PAGINATION_EMOJI,
+ allowed_users=(restrict_to_user.id,),
+ )
+
while True:
try:
- reaction, user = await ctx.bot.wait_for("reaction_add", timeout=timeout, check=event_check)
+ reaction, user = await ctx.bot.wait_for("reaction_add", timeout=timeout, check=check)
log.trace(f"Got reaction: {reaction}")
except asyncio.TimeoutError:
log.debug("Timed out waiting for a reaction")
diff --git a/bot/utils/function.py b/bot/utils/function.py
index 3ab32fe3c..9bc44e753 100644
--- a/bot/utils/function.py
+++ b/bot/utils/function.py
@@ -1,14 +1,23 @@
"""Utilities for interaction with functions."""
+import functools
import inspect
+import logging
+import types
import typing as t
+log = logging.getLogger(__name__)
+
Argument = t.Union[int, str]
BoundArgs = t.OrderedDict[str, t.Any]
Decorator = t.Callable[[t.Callable], t.Callable]
ArgValGetter = t.Callable[[BoundArgs], t.Any]
+class GlobalNameConflictError(Exception):
+ """Raised when there's a conflict between the globals used to resolve annotations of wrapped and its wrapper."""
+
+
def get_arg_value(name_or_pos: Argument, arguments: BoundArgs) -> t.Any:
"""
Return a value from `arguments` based on a name or position.
@@ -73,3 +82,66 @@ def get_bound_args(func: t.Callable, args: t.Tuple, kwargs: t.Dict[str, t.Any])
bound_args.apply_defaults()
return bound_args.arguments
+
+
+def update_wrapper_globals(
+ wrapper: types.FunctionType,
+ wrapped: types.FunctionType,
+ *,
+ ignored_conflict_names: t.Set[str] = frozenset(),
+) -> types.FunctionType:
+ """
+ Update globals of `wrapper` with the globals from `wrapped`.
+
+ For forwardrefs in command annotations discordpy uses the __global__ attribute of the function
+ to resolve their values, with decorators that replace the function this breaks because they have
+ their own globals.
+
+ This function creates a new function functionally identical to `wrapper`, which has the globals replaced with
+ a merge of `wrapped`s globals and the `wrapper`s globals.
+
+ An exception will be raised in case `wrapper` and `wrapped` share a global name that is used by
+ `wrapped`'s typehints and is not in `ignored_conflict_names`,
+ as this can cause incorrect objects being used by discordpy's converters.
+ """
+ annotation_global_names = (
+ ann.split(".", maxsplit=1)[0] for ann in wrapped.__annotations__.values() if isinstance(ann, str)
+ )
+ # Conflicting globals from both functions' modules that are also used in the wrapper and in wrapped's annotations.
+ shared_globals = set(wrapper.__code__.co_names) & set(annotation_global_names)
+ shared_globals &= set(wrapped.__globals__) & set(wrapper.__globals__) - ignored_conflict_names
+ if shared_globals:
+ raise GlobalNameConflictError(
+ f"wrapper and the wrapped function share the following "
+ f"global names used by annotations: {', '.join(shared_globals)}. Resolve the conflicts or add "
+ f"the name to the `ignored_conflict_names` set to suppress this error if this is intentional."
+ )
+
+ new_globals = wrapper.__globals__.copy()
+ new_globals.update((k, v) for k, v in wrapped.__globals__.items() if k not in wrapper.__code__.co_names)
+ return types.FunctionType(
+ code=wrapper.__code__,
+ globals=new_globals,
+ name=wrapper.__name__,
+ argdefs=wrapper.__defaults__,
+ closure=wrapper.__closure__,
+ )
+
+
+def command_wraps(
+ wrapped: types.FunctionType,
+ assigned: t.Sequence[str] = functools.WRAPPER_ASSIGNMENTS,
+ updated: t.Sequence[str] = functools.WRAPPER_UPDATES,
+ *,
+ ignored_conflict_names: t.Set[str] = frozenset(),
+) -> t.Callable[[types.FunctionType], types.FunctionType]:
+ """Update the decorated function to look like `wrapped` and update globals for discordpy forwardref evaluation."""
+ def decorator(wrapper: types.FunctionType) -> types.FunctionType:
+ return functools.update_wrapper(
+ update_wrapper_globals(wrapper, wrapped, ignored_conflict_names=ignored_conflict_names),
+ wrapped,
+ assigned,
+ updated,
+ )
+
+ return decorator
diff --git a/bot/utils/lock.py b/bot/utils/lock.py
index e44776340..ec6f92cd4 100644
--- a/bot/utils/lock.py
+++ b/bot/utils/lock.py
@@ -1,13 +1,15 @@
import asyncio
import inspect
import logging
+import types
from collections import defaultdict
-from functools import partial, wraps
+from functools import partial
from typing import Any, Awaitable, Callable, Hashable, Union
from weakref import WeakValueDictionary
from bot.errors import LockedResourceError
from bot.utils import function
+from bot.utils.function import command_wraps
log = logging.getLogger(__name__)
__lock_dicts = defaultdict(WeakValueDictionary)
@@ -17,6 +19,35 @@ _IdCallable = Callable[[function.BoundArgs], _IdCallableReturn]
ResourceId = Union[Hashable, _IdCallable]
+class SharedEvent:
+ """
+ Context manager managing an internal event exposed through the wait coro.
+
+ While any code is executing in this context manager, the underlying event will not be set;
+ when all of the holders finish the event will be set.
+ """
+
+ def __init__(self):
+ self._active_count = 0
+ self._event = asyncio.Event()
+ self._event.set()
+
+ def __enter__(self):
+ """Increment the count of the active holders and clear the internal event."""
+ self._active_count += 1
+ self._event.clear()
+
+ def __exit__(self, _exc_type, _exc_val, _exc_tb): # noqa: ANN001
+ """Decrement the count of the active holders; if 0 is reached set the internal event."""
+ self._active_count -= 1
+ if not self._active_count:
+ self._event.set()
+
+ async def wait(self) -> None:
+ """Wait for all active holders to exit."""
+ await self._event.wait()
+
+
def lock(
namespace: Hashable,
resource_id: ResourceId,
@@ -41,10 +72,10 @@ def lock(
If decorating a command, this decorator must go before (below) the `command` decorator.
"""
- def decorator(func: Callable) -> Callable:
+ def decorator(func: types.FunctionType) -> types.FunctionType:
name = func.__name__
- @wraps(func)
+ @command_wraps(func)
async def wrapper(*args, **kwargs) -> Any:
log.trace(f"{name}: mutually exclusive decorator called")
diff --git a/bot/utils/messages.py b/bot/utils/messages.py
index 077dd9569..2beead6af 100644
--- a/bot/utils/messages.py
+++ b/bot/utils/messages.py
@@ -3,6 +3,7 @@ import contextlib
import logging
import random
import re
+from functools import partial
from io import BytesIO
from typing import List, Optional, Sequence, Union
@@ -12,24 +13,66 @@ from discord.ext.commands import Context
import bot
from bot.constants import Emojis, MODERATION_ROLES, NEGATIVE_REPLIES
+from bot.utils import scheduling
log = logging.getLogger(__name__)
+def reaction_check(
+ reaction: discord.Reaction,
+ user: discord.abc.User,
+ *,
+ message_id: int,
+ allowed_emoji: Sequence[str],
+ allowed_users: Sequence[int],
+ allow_mods: bool = True,
+) -> bool:
+ """
+ Check if a reaction's emoji and author are allowed and the message is `message_id`.
+
+ If the user is not allowed, remove the reaction. Ignore reactions made by the bot.
+ If `allow_mods` is True, allow users with moderator roles even if they're not in `allowed_users`.
+ """
+ right_reaction = (
+ user != bot.instance.user
+ and reaction.message.id == message_id
+ and str(reaction.emoji) in allowed_emoji
+ )
+ if not right_reaction:
+ return False
+
+ is_moderator = (
+ allow_mods
+ and any(role.id in MODERATION_ROLES for role in getattr(user, "roles", []))
+ )
+
+ if user.id in allowed_users or is_moderator:
+ log.trace(f"Allowed reaction {reaction} by {user} on {reaction.message.id}.")
+ return True
+ else:
+ log.trace(f"Removing reaction {reaction} by {user} on {reaction.message.id}: disallowed user.")
+ scheduling.create_task(
+ reaction.message.remove_reaction(reaction.emoji, user),
+ HTTPException, # Suppress the HTTPException if adding the reaction fails
+ name=f"remove_reaction-{reaction}-{reaction.message.id}-{user}"
+ )
+ return False
+
+
async def wait_for_deletion(
message: discord.Message,
- user_ids: Sequence[discord.abc.Snowflake],
+ user_ids: Sequence[int],
deletion_emojis: Sequence[str] = (Emojis.trashcan,),
timeout: float = 60 * 5,
attach_emojis: bool = True,
- allow_moderation_roles: bool = True
+ allow_mods: bool = True
) -> None:
"""
Wait for up to `timeout` seconds for a reaction by any of the specified `user_ids` to delete the message.
An `attach_emojis` bool may be specified to determine whether to attach the given
`deletion_emojis` to the message in the given `context`.
- An `allow_moderation_roles` bool may also be specified to allow anyone with a role in `MODERATION_ROLES` to delete
+ An `allow_mods` bool may also be specified to allow anyone with a role in `MODERATION_ROLES` to delete
the message.
"""
if message.guild is None:
@@ -43,16 +86,13 @@ async def wait_for_deletion(
log.trace(f"Aborting wait_for_deletion: message {message.id} deleted prematurely.")
return
- def check(reaction: discord.Reaction, user: discord.Member) -> bool:
- """Check that the deletion emoji is reacted by the appropriate user."""
- return (
- reaction.message.id == message.id
- and str(reaction.emoji) in deletion_emojis
- and (
- user.id in user_ids
- or allow_moderation_roles and any(role.id in MODERATION_ROLES for role in user.roles)
- )
- )
+ check = partial(
+ reaction_check,
+ message_id=message.id,
+ allowed_emoji=deletion_emojis,
+ allowed_users=user_ids,
+ allow_mods=allow_mods,
+ )
with contextlib.suppress(asyncio.TimeoutError):
await bot.instance.wait_for('reaction_add', check=check, timeout=timeout)
@@ -141,14 +181,14 @@ def sub_clyde(username: Optional[str]) -> Optional[str]:
return username # Empty string or None
-async def send_denial(ctx: Context, reason: str) -> None:
+async def send_denial(ctx: Context, reason: str) -> discord.Message:
"""Send an embed denying the user with the given reason."""
embed = discord.Embed()
embed.colour = discord.Colour.red()
embed.title = random.choice(NEGATIVE_REPLIES)
embed.description = reason
- await ctx.send(embed=embed)
+ return await ctx.send(embed=embed)
def format_user(user: discord.abc.User) -> str:
diff --git a/bot/utils/scheduling.py b/bot/utils/scheduling.py
index 6843bae88..2dc485f24 100644
--- a/bot/utils/scheduling.py
+++ b/bot/utils/scheduling.py
@@ -161,18 +161,18 @@ class Scheduler:
self._log.error(f"Error in task #{task_id} {id(done_task)}!", exc_info=exception)
-def create_task(*args, **kwargs) -> asyncio.Task:
+def create_task(coro: t.Awaitable, *suppressed_exceptions: t.Type[Exception], **kwargs) -> asyncio.Task:
"""Wrapper for `asyncio.create_task` which logs exceptions raised in the task."""
- task = asyncio.create_task(*args, **kwargs)
- task.add_done_callback(_log_task_exception)
+ task = asyncio.create_task(coro, **kwargs)
+ task.add_done_callback(partial(_log_task_exception, suppressed_exceptions=suppressed_exceptions))
return task
-def _log_task_exception(task: asyncio.Task) -> None:
+def _log_task_exception(task: asyncio.Task, *, suppressed_exceptions: t.Tuple[t.Type[Exception]]) -> None:
"""Retrieve and log the exception raised in `task` if one exists."""
with contextlib.suppress(asyncio.CancelledError):
exception = task.exception()
# Log the exception if one exists.
- if exception:
+ if exception and not isinstance(exception, suppressed_exceptions):
log = logging.getLogger(__name__)
log.error(f"Error in task {task.get_name()} {id(task)}!", exc_info=exception)
diff --git a/tests/bot/exts/info/doc/__init__.py b/tests/bot/exts/info/doc/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/bot/exts/info/doc/__init__.py
diff --git a/tests/bot/exts/info/doc/test_parsing.py b/tests/bot/exts/info/doc/test_parsing.py
new file mode 100644
index 000000000..1663d8491
--- /dev/null
+++ b/tests/bot/exts/info/doc/test_parsing.py
@@ -0,0 +1,66 @@
+from unittest import TestCase
+
+from bot.exts.info.doc import _parsing as parsing
+
+
+class SignatureSplitter(TestCase):
+
+ def test_basic_split(self):
+ test_cases = (
+ ("0,0,0", ["0", "0", "0"]),
+ ("0,a=0,a=0", ["0", "a=0", "a=0"]),
+ )
+ self._run_tests(test_cases)
+
+ def test_commas_ignored_in_brackets(self):
+ test_cases = (
+ ("0,[0,0],0,[0,0],0", ["0", "[0,0]", "0", "[0,0]", "0"]),
+ ("(0,),0,(0,(0,),0),0", ["(0,)", "0", "(0,(0,),0)", "0"]),
+ )
+ self._run_tests(test_cases)
+
+ def test_mixed_brackets(self):
+ tests_cases = (
+ ("[0,{0},0],0,{0:0},0", ["[0,{0},0]", "0", "{0:0}", "0"]),
+ ("([0],0,0),0,(0,0),0", ["([0],0,0)", "0", "(0,0)", "0"]),
+ ("([(0,),(0,)],0),0", ["([(0,),(0,)],0)", "0"]),
+ )
+ self._run_tests(tests_cases)
+
+ def test_string_contents_ignored(self):
+ test_cases = (
+ ("'0,0',0,',',0", ["'0,0'", "0", "','", "0"]),
+ ("0,[']',0],0", ["0", "[']',0]", "0"]),
+ ("{0,0,'}}',0,'{'},0", ["{0,0,'}}',0,'{'}", "0"]),
+ )
+ self._run_tests(test_cases)
+
+ def test_mixed_quotes(self):
+ test_cases = (
+ ("\"0',0',\",'0,0',0", ["\"0',0',\"", "'0,0'", "0"]),
+ ("\",',\",'\",',0", ["\",',\"", "'\",'", "0"]),
+ )
+ self._run_tests(test_cases)
+
+ def test_quote_escaped(self):
+ test_cases = (
+ (r"'\',','\\',0", [r"'\','", r"'\\'", "0"]),
+ (r"'0\',0\\\'\\',0", [r"'0\',0\\\'\\'", "0"]),
+ )
+ self._run_tests(test_cases)
+
+ def test_real_signatures(self):
+ test_cases = (
+ ("start, stop[, step]", ["start", " stop[, step]"]),
+ ("object=b'', encoding='utf-8', errors='strict'", ["object=b''", " encoding='utf-8'", " errors='strict'"]),
+ (
+ "typename, field_names, *, rename=False, defaults=None, module=None",
+ ["typename", " field_names", " *", " rename=False", " defaults=None", " module=None"]
+ ),
+ )
+ self._run_tests(test_cases)
+
+ def _run_tests(self, test_cases):
+ for input_string, expected_output in test_cases:
+ with self.subTest(input_string=input_string):
+ self.assertEqual(list(parsing._split_parameters(input_string)), expected_output)
diff --git a/tests/bot/test_converters.py b/tests/bot/test_converters.py
index c42111f3f..4af84dde5 100644
--- a/tests/bot/test_converters.py
+++ b/tests/bot/test_converters.py
@@ -10,9 +10,9 @@ from bot.converters import (
Duration,
HushDurationConverter,
ISODateTime,
+ PackageName,
TagContentConverter,
TagNameConverter,
- ValidPythonIdentifier,
)
@@ -78,24 +78,23 @@ class ConverterTests(unittest.IsolatedAsyncioTestCase):
with self.assertRaisesRegex(BadArgument, re.escape(exception_message)):
await TagNameConverter.convert(self.context, invalid_name)
- async def test_valid_python_identifier_for_valid(self):
- """ValidPythonIdentifier returns valid identifiers unchanged."""
- test_values = ('foo', 'lemon')
+ async def test_package_name_for_valid(self):
+ """PackageName returns valid package names unchanged."""
+ test_values = ('foo', 'le_mon', 'num83r')
for name in test_values:
with self.subTest(identifier=name):
- conversion = await ValidPythonIdentifier.convert(self.context, name)
+ conversion = await PackageName.convert(self.context, name)
self.assertEqual(name, conversion)
- async def test_valid_python_identifier_for_invalid(self):
- """ValidPythonIdentifier raises the proper exception for invalid identifiers."""
- test_values = ('nested.stuff', '#####')
+ async def test_package_name_for_invalid(self):
+ """PackageName raises the proper exception for invalid package names."""
+ test_values = ('text_with_a_dot.', 'UpperCaseName', 'dashed-name')
for name in test_values:
with self.subTest(identifier=name):
- exception_message = f'`{name}` is not a valid Python identifier'
- with self.assertRaisesRegex(BadArgument, re.escape(exception_message)):
- await ValidPythonIdentifier.convert(self.context, name)
+ with self.assertRaises(BadArgument):
+ await PackageName.convert(self.context, name)
async def test_duration_converter_for_valid(self):
"""Duration returns the correct `datetime` for valid duration strings."""