# coding=utf-8 import re from docutils.core import publish_parts from docutils.parsers.rst.roles import register_canonical_role from pysite.rst.roles import icon_role, page_role, url_for_role RST_TEMPLATE = """.. contents:: {0}""" CONTENTS_REGEX = re.compile(r"""

(.*?)

""", re.DOTALL) HREF_REGEX = re.compile(r"""(.*?)""") def render(rst: str): rst = RST_TEMPLATE.format(rst) html = publish_parts( source=rst, writer_name="html5", settings_overrides={"halt_level": 2, "syntax_highlight": "short"} )["html_body"] data = { "html": html, "headers": [] } match = CONTENTS_REGEX.search(html) # Find the contents HTML if match: data["html"] = html.replace(match.group(0), "") # Remove the contents from the document HTML depth = 0 headers = [] current_header = {} group = match.group(1) # Sanitize the output so we can more easily parse it group = group.replace("

", "

\n") group = group.replace("

", "\n") group = group.replace("

", "

\n") group = group.replace("

", "\n

") for line in group.split("\n"): line = line.strip() # Remove excess whitespace if not line: # Nothing to process continue if line.startswith("

") and depth <= 2: # We've found a header, or the start of a header group depth += 1 elif line.startswith("

") and depth >= 0: # That's the end of a header or header group if depth == 1: # We just dealt with an entire header group, so store it headers.append(current_header.copy()) # Store a copy, since we're clearing the dict current_header.clear() depth -= 1 elif line.startswith("