2 files changed, 117 insertions, 0 deletions
diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY
new file mode 100644
index 000000000..f78491fc1
--- /dev/null
+++ b/LICENSE-THIRD-PARTY
@@ -0,0 +1,30 @@
+License for Sphinx
+Applies to:
+    - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__.
+==================
+
+Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file).
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/bot/cogs/doc/inventory_parser.py b/bot/cogs/doc/inventory_parser.py
new file mode 100644
index 000000000..6c2b63d5e
--- /dev/null
+++ b/bot/cogs/doc/inventory_parser.py
@@ -0,0 +1,87 @@
+import re
+import zlib
+from collections import defaultdict
+from typing import AsyncIterator, DefaultDict, List, Tuple
+
+import aiohttp
+
+_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)')
+
+
+class ZlibStreamReader:
+    """Class used for decoding zlib data of a stream line by line."""
+
+    READ_CHUNK_SIZE = 16 * 1024
+
+    def __init__(self, stream: aiohttp.StreamReader) -> None:
+        self.stream = stream
+
+    async def _read_compressed_chunks(self) -> AsyncIterator[bytes]:
+        """Read zlib data in `READ_CHUNK_SIZE` sized chunks and decompress."""
+        decompressor = zlib.decompressobj()
+        async for chunk in self.stream.iter_chunked(self.READ_CHUNK_SIZE):
+            yield decompressor.decompress(chunk)
+
+        yield decompressor.flush()
+
+    async def __aiter__(self) -> AsyncIterator[str]:
+        """Yield lines of decompressed text."""
+        buf = b''
+        async for chunk in self._read_compressed_chunks():
+            buf += chunk
+            pos = buf.find(b'\n')
+            while pos != -1:
+                yield buf[:pos].decode()
+                buf = buf[pos + 1:]
+                pos = buf.find(b'\n')
+
+
+async def _load_v1(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]:
+    invdata = defaultdict(list)
+
+    async for line in stream:
+        name, type_, location = line.decode().rstrip().split(maxsplit=2)
+        # version 1 did not add anchors to the location
+        if type_ == 'mod':
+            type_ = 'py:module'
+            location += '#module-' + name
+        else:
+            type_ = 'py:' + type_
+            location += '#' + name
+        invdata[type_].append((name, location))
+    return invdata
+
+
+async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]:
+    invdata = defaultdict(list)
+
+    async for line in ZlibStreamReader(stream):
+        m = _V2_LINE_RE.match(line.rstrip())
+        name, type_, _prio, location, _dispname = m.groups()  # ignore the parsed items we don't need
+        if location.endswith('$'):
+            location = location[:-1] + name
+
+        invdata[type_].append((name, location))
+    return invdata
+
+
+async def fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
+    """Fetch, parse and return an intersphinx inventory file from an url."""
+    timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
+    async with client_session.get(url, timeout=timeout, raise_for_status=True) as response:
+        stream = response.content
+
+        inventory_header = (await stream.readline()).decode().rstrip()
+        inventory_version = int(inventory_header[-1:])
+        await stream.readline()  # skip project name
+        await stream.readline()  # skip project version
+
+        if inventory_version == 1:
+            return await _load_v1(stream)
+
+        elif inventory_version == 2:
+            if b"zlib" not in await stream.readline():
+                raise ValueError(f"Invalid inventory file at url {url}.")
+            return await _load_v2(stream)
+
+        raise ValueError(f"Invalid inventory file at url {url}.")