import re from docutils.core import publish_parts from docutils.parsers.rst.roles import register_canonical_role from pysite.rst.roles import icon_role, page_role, url_for_role RST_TEMPLATE = """.. contents:: {0}""" CONTENTS_REGEX = re.compile(r"""

(.*?)

""", re.DOTALL) HREF_REGEX = re.compile(r"""(.*?)""") def render(rst: str): rst = RST_TEMPLATE.format(rst) html = publish_parts( source=rst, writer_name="html5", settings_overrides={ "halt_level": 2, "syntax_highlight": "short", "initial_header_level": 3 } )["html_body"] data = { "html": html, "headers": [] } match = CONTENTS_REGEX.search(html) # Find the contents HTML if match: data["html"] = html.replace(match.group(0), "") # Remove the contents from the document HTML depth = 0 headers = [] current_header = {} group = match.group(1) # Sanitize the output so we can more easily parse it group = group.replace("

", "

\n") group = group.replace("

", "\n") group = group.replace("

", "

\n") group = group.replace("

", "\n

") for line in group.split("\n"): line = line.strip() # Remove excess whitespace if not line: # Nothing to process continue if line.startswith("

") and depth <= 2: # We've found a header, or the start of a header group depth += 1 elif line.startswith("

") and depth >= 0: # That's the end of a header or header group if depth == 1: # We just dealt with an entire header group, so store it headers.append(current_header.copy()) # Store a copy, since we're clearing the dict current_header.clear() depth -= 1 elif line.startswith(" ", " ") current_header["title"] = title else: # Second-level (or deeper) header, should be stored in a list of sub-headers under the current sub_headers = current_header.get("sub_headers", []) title = match.group(2) if title.startswith(" ", " ") sub_headers.append({ "id": match.group(1), "title": title }) current_header["sub_headers"] = sub_headers data["headers"] = headers return data register_canonical_role("icon", icon_role) register_canonical_role("page", page_role) register_canonical_role("url_for", url_for_role)