diff options
Diffstat (limited to 'pysite/rst')
| -rw-r--r-- | pysite/rst/__init__.py | 70 | 
1 files changed, 69 insertions, 1 deletions
diff --git a/pysite/rst/__init__.py b/pysite/rst/__init__.py index e0fc973e..0c069615 100644 --- a/pysite/rst/__init__.py +++ b/pysite/rst/__init__.py @@ -1,15 +1,83 @@  # coding=utf-8 +import re +  from docutils.core import publish_parts  from docutils.parsers.rst.roles import register_canonical_role  from pysite.rst.roles import icon_role, page_role, url_for_role +RST_TEMPLATE = """.. contents:: + +{0}""" + +CONTENTS_REGEX = re.compile(r"""<div class=\"contents topic\" id=\"contents\">(.*?)</div>""", re.DOTALL) +HREF_REGEX = re.compile(r"""<a class=\"reference internal\" href=\"(.*?)\".*?>(.*?)</a>""") +  def render(rst: str): -    return publish_parts( +    rst = RST_TEMPLATE.format(rst) +    html = publish_parts(          source=rst, writer_name="html5", settings_overrides={"halt_level": 2, "syntax_highlight": "short"}      )["html_body"] +    data = { +        "html": html, +        "headers": [] +    } + +    match = CONTENTS_REGEX.search(html)  # Find the contents HTML + +    if match: +        data["html"] = html.replace(match.group(0), "")  # Remove the contents from the document HTML + +        depth = 0 +        headers = [] +        current_header = {} + +        group = match.group(1) + +        # Sanitize the output so we can more easily parse it +        group = group.replace("<li>", "<li>\n") +        group = group.replace("</li>", "\n</li>") +        group = group.replace("<p>", "<p>\n") +        group = group.replace("</p>", "\n</p>") + +        for line in group.split("\n"): +            line = line.strip()  # Remove excess whitespace + +            if not line:  # Nothing to process +                continue + +            if line.startswith("<li>") and depth <= 2: +                #  We've found a header, or the start of a header group +                depth += 1 +            elif line.startswith("</li>") and depth >= 0: +                # That's the end of a header or header group + +                if depth == 1: +                    # We just dealt with an entire header group, so store it +                    headers.append(current_header.copy())  # Store a copy, since we're clearing the dict +                    current_header.clear() + +                depth -= 1 +            elif line.startswith("<a") and depth <= 2: +                # We've found an actual URL +                match = HREF_REGEX.match(line)  # Parse the line for the ID and header title + +                if depth == 1:  # Top-level header, so just store it in the current header +                    current_header["id"] = match.group(1) +                    current_header["title"] = match.group(2) +                else:  # Second-level (or deeper) header, should be stored in a list of sub-headers under the current +                    sub_headers = current_header.get("sub_headers", []) +                    sub_headers.append({ +                        "id": match.group(1), +                        "title": match.group(2) +                    }) +                    current_header["sub_headers"] = sub_headers + +        data["headers"] = headers +    return data +  register_canonical_role("icon", icon_role)  register_canonical_role("page", page_role)  |