diff options
| -rw-r--r-- | LICENSE-THIRD-PARTY | 30 | ||||
| -rw-r--r-- | bot/cogs/doc/inventory_parser.py | 87 | 
2 files changed, 117 insertions, 0 deletions
| diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY new file mode 100644 index 000000000..f78491fc1 --- /dev/null +++ b/LICENSE-THIRD-PARTY @@ -0,0 +1,30 @@ +License for Sphinx +Applies to: +    - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__. +================== + +Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +  notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright +  notice, this list of conditions and the following disclaimer in the +  documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bot/cogs/doc/inventory_parser.py b/bot/cogs/doc/inventory_parser.py new file mode 100644 index 000000000..6c2b63d5e --- /dev/null +++ b/bot/cogs/doc/inventory_parser.py @@ -0,0 +1,87 @@ +import re +import zlib +from collections import defaultdict +from typing import AsyncIterator, DefaultDict, List, Tuple + +import aiohttp + +_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)') + + +class ZlibStreamReader: +    """Class used for decoding zlib data of a stream line by line.""" + +    READ_CHUNK_SIZE = 16 * 1024 + +    def __init__(self, stream: aiohttp.StreamReader) -> None: +        self.stream = stream + +    async def _read_compressed_chunks(self) -> AsyncIterator[bytes]: +        """Read zlib data in `READ_CHUNK_SIZE` sized chunks and decompress.""" +        decompressor = zlib.decompressobj() +        async for chunk in self.stream.iter_chunked(self.READ_CHUNK_SIZE): +            yield decompressor.decompress(chunk) + +        yield decompressor.flush() + +    async def __aiter__(self) -> AsyncIterator[str]: +        """Yield lines of decompressed text.""" +        buf = b'' +        async for chunk in self._read_compressed_chunks(): +            buf += chunk +            pos = buf.find(b'\n') +            while pos != -1: +                yield buf[:pos].decode() +                buf = buf[pos + 1:] +                pos = buf.find(b'\n') + + +async def _load_v1(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]: +    invdata = defaultdict(list) + +    async for line in stream: +        name, type_, location = line.decode().rstrip().split(maxsplit=2) +        # version 1 did not add anchors to the location +        if type_ == 'mod': +            type_ = 'py:module' +            location += '#module-' + name +        else: +            type_ = 'py:' + type_ +            location += '#' + name +        invdata[type_].append((name, location)) +    return invdata + + +async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]: +    invdata = defaultdict(list) + +    async for line in ZlibStreamReader(stream): +        m = _V2_LINE_RE.match(line.rstrip()) +        name, type_, _prio, location, _dispname = m.groups()  # ignore the parsed items we don't need +        if location.endswith('$'): +            location = location[:-1] + name + +        invdata[type_].append((name, location)) +    return invdata + + +async def fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]: +    """Fetch, parse and return an intersphinx inventory file from an url.""" +    timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5) +    async with client_session.get(url, timeout=timeout, raise_for_status=True) as response: +        stream = response.content + +        inventory_header = (await stream.readline()).decode().rstrip() +        inventory_version = int(inventory_header[-1:]) +        await stream.readline()  # skip project name +        await stream.readline()  # skip project version + +        if inventory_version == 1: +            return await _load_v1(stream) + +        elif inventory_version == 2: +            if b"zlib" not in await stream.readline(): +                raise ValueError(f"Invalid inventory file at url {url}.") +            return await _load_v2(stream) + +        raise ValueError(f"Invalid inventory file at url {url}.") | 
