diff options
| -rw-r--r-- | LICENSE-THIRD-PARTY | 30 | ||||
| -rw-r--r-- | bot/cogs/doc/inventory_parser.py | 87 |
2 files changed, 117 insertions, 0 deletions
diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY new file mode 100644 index 000000000..f78491fc1 --- /dev/null +++ b/LICENSE-THIRD-PARTY @@ -0,0 +1,30 @@ +License for Sphinx +Applies to: + - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__. +================== + +Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bot/cogs/doc/inventory_parser.py b/bot/cogs/doc/inventory_parser.py new file mode 100644 index 000000000..6c2b63d5e --- /dev/null +++ b/bot/cogs/doc/inventory_parser.py @@ -0,0 +1,87 @@ +import re +import zlib +from collections import defaultdict +from typing import AsyncIterator, DefaultDict, List, Tuple + +import aiohttp + +_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)') + + +class ZlibStreamReader: + """Class used for decoding zlib data of a stream line by line.""" + + READ_CHUNK_SIZE = 16 * 1024 + + def __init__(self, stream: aiohttp.StreamReader) -> None: + self.stream = stream + + async def _read_compressed_chunks(self) -> AsyncIterator[bytes]: + """Read zlib data in `READ_CHUNK_SIZE` sized chunks and decompress.""" + decompressor = zlib.decompressobj() + async for chunk in self.stream.iter_chunked(self.READ_CHUNK_SIZE): + yield decompressor.decompress(chunk) + + yield decompressor.flush() + + async def __aiter__(self) -> AsyncIterator[str]: + """Yield lines of decompressed text.""" + buf = b'' + async for chunk in self._read_compressed_chunks(): + buf += chunk + pos = buf.find(b'\n') + while pos != -1: + yield buf[:pos].decode() + buf = buf[pos + 1:] + pos = buf.find(b'\n') + + +async def _load_v1(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]: + invdata = defaultdict(list) + + async for line in stream: + name, type_, location = line.decode().rstrip().split(maxsplit=2) + # version 1 did not add anchors to the location + if type_ == 'mod': + type_ = 'py:module' + location += '#module-' + name + else: + type_ = 'py:' + type_ + location += '#' + name + invdata[type_].append((name, location)) + return invdata + + +async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]: + invdata = defaultdict(list) + + async for line in ZlibStreamReader(stream): + m = _V2_LINE_RE.match(line.rstrip()) + name, type_, _prio, location, _dispname = m.groups() # ignore the parsed items we don't need + if location.endswith('$'): + location = location[:-1] + name + + invdata[type_].append((name, location)) + return invdata + + +async def fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]: + """Fetch, parse and return an intersphinx inventory file from an url.""" + timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5) + async with client_session.get(url, timeout=timeout, raise_for_status=True) as response: + stream = response.content + + inventory_header = (await stream.readline()).decode().rstrip() + inventory_version = int(inventory_header[-1:]) + await stream.readline() # skip project name + await stream.readline() # skip project version + + if inventory_version == 1: + return await _load_v1(stream) + + elif inventory_version == 2: + if b"zlib" not in await stream.readline(): + raise ValueError(f"Invalid inventory file at url {url}.") + return await _load_v2(stream) + + raise ValueError(f"Invalid inventory file at url {url}.") |