import re
from docutils.core import publish_parts
from docutils.parsers.rst.roles import register_canonical_role
from pysite.rst.roles import icon_role, page_role, url_for_role
RST_TEMPLATE = """.. contents::
{0}"""
CONTENTS_REGEX = re.compile(r"""
(.*?)
""", re.DOTALL)
HREF_REGEX = re.compile(r"""(.*?)""")
def render(rst: str):
    rst = RST_TEMPLATE.format(rst)
    html = publish_parts(
        source=rst, writer_name="html5", settings_overrides={
            "halt_level": 2, "syntax_highlight": "short", "initial_header_level": 3
        }
    )["html_body"]
    data = {
        "html": html,
        "headers": []
    }
    match = CONTENTS_REGEX.search(html)  # Find the contents HTML
    if match:
        data["html"] = html.replace(match.group(0), "")  # Remove the contents from the document HTML
        depth = 0
        headers = []
        current_header = {}
        group = match.group(1)
        # Sanitize the output so we can more easily parse it
        group = group.replace("", "\n")
        group = group.replace("", "\n")
        group = group.replace("", "
\n")
        group = group.replace("
", "\n")
        for line in group.split("\n"):
            line = line.strip()  # Remove excess whitespace
            if not line:  # Nothing to process
                continue
            if line.startswith("") and depth <= 2:
                #  We've found a header, or the start of a header group
                depth += 1
            elif line.startswith("") and depth >= 0:
                # That's the end of a header or header group
                if depth == 1:
                    # We just dealt with an entire header group, so store it
                    headers.append(current_header.copy())  # Store a copy, since we're clearing the dict
                    current_header.clear()
                depth -= 1
            elif line.startswith(" ", "  ")
                    current_header["title"] = title
                else:  # Second-level (or deeper) header, should be stored in a list of sub-headers under the current
                    sub_headers = current_header.get("sub_headers", [])
                    title = match.group(2)
                    if title.startswith(" ", "  ")
                    sub_headers.append({
                        "id": match.group(1),
                        "title": title
                    })
                    current_header["sub_headers"] = sub_headers
        data["headers"] = headers
    return data
register_canonical_role("icon", icon_role)
register_canonical_role("page", page_role)
register_canonical_role("url_for", url_for_role)