#!/usr/bin/env python3
"""Convert PEM Fortran banner headers to Doxygen comments at parse time.

This filter is used by Doxygen via FILTER_PATTERNS. It does not modify source
files on disk; it only transforms the stream Doxygen reads.
"""

import re
import sys
from typing import Dict, List, Optional, Tuple

DECL_RE = re.compile(r"^\s*(MODULE|SUBROUTINE)\s+([A-Za-z_][A-Za-z0-9_]*)\b", re.IGNORECASE)
END_DECL_RE = re.compile(r"^\s*END\s+(MODULE|SUBROUTINE)\b", re.IGNORECASE)
SUB_DECL_ARGS_RE = re.compile(r"^\s*SUBROUTINE\s+[A-Za-z_][A-Za-z0-9_]*\s*\(([^)]*)\)", re.IGNORECASE)
INTENT_RE = re.compile(r"intent\s*\(\s*(inout|in|out)\s*\)", re.IGNORECASE)
VAR_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
DELIM_RE = re.compile(r"^\s*!-{10,}\s*$")
COMMENT_RE = re.compile(r"^\s*!\s?(.*)$")
ARG_COMMENT_DESC_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*[:=-]\s*(.+)$")

SECTION_NAMES = {
    "NAME",
    "DESCRIPTION",
    "AUTHORS & DATE",
    "NOTES",
}


def parse_header(lines: List[str], start: int) -> Tuple[Optional[Dict[str, List[str]]], int]:
    """Parse a PEM banner header that starts with !----- and returns sections."""
    if start >= len(lines) or DELIM_RE.match(lines[start]) is None:
        return None, start

    sections: Dict[str, List[str]] = {name: [] for name in SECTION_NAMES}
    i = start + 1
    current: Optional[str] = None

    while i < len(lines):
        raw = lines[i].rstrip("\n")
        if DELIM_RE.match(raw):
            return sections, i + 1

        m = COMMENT_RE.match(raw)
        if m is None:
            return None, start

        text = m.group(1).rstrip()
        normalized = text.strip().upper()

        if normalized in SECTION_NAMES:
            current = normalized
        elif text.strip() == "":
            if current is not None and sections[current] and sections[current][-1] != "":
                sections[current].append("")
        elif current is not None:
            sections[current].append(text.strip())

        i += 1

    return None, start


def collapse_blanks(items: List[str]) -> List[str]:
    out: List[str] = []
    prev_blank = True
    for item in items:
        blank = (item.strip() == "")
        if blank and prev_blank:
            continue
        out.append(item)
        prev_blank = blank
    while out and out[-1].strip() == "":
        out.pop()
    return out


def collect_continued_statement(lines: List[str], start: int, end: int) -> Tuple[str, str, int]:
    parts: List[str] = []
    comment_chunks: List[str] = []
    i = start

    while i < end:
        raw = lines[i].rstrip("\n")

        if "!" in raw:
            code, comment = raw.split("!", 1)
            if comment.strip():
                comment_chunks.append(comment.strip())
        else:
            code = raw

        if parts:
            code = re.sub(r"^\s*&\s*", "", code)
        code = re.sub(r"&\s*$", "", code).rstrip()
        parts.append(code)

        code_only = raw.split("!", 1)[0].rstrip()
        has_trailing_amp = code_only.endswith("&")
        next_starts_amp = i + 1 < end and re.match(r"^\s*&", lines[i + 1]) is not None

        if not has_trailing_amp and not next_starts_amp:
            break

        i += 1

    statement = " ".join(part.strip() for part in parts if part.strip())
    merged_comment = " ".join(comment_chunks).strip()
    return statement, merged_comment, i


def parse_signature_args(lines: List[str], start: int) -> List[str]:
    statement, _, _ = collect_continued_statement(lines, start, len(lines))
    m = SUB_DECL_ARGS_RE.match(statement)
    if m is None:
        return []

    args = []
    for token in m.group(1).split(","):
        name = token.strip().replace("&", "")
        if VAR_RE.match(name):
            args.append(name)
    return args


def find_subroutine_end(lines: List[str], start: int) -> int:
    i = start
    while i < len(lines):
        if re.match(r"^\s*END\s+SUBROUTINE\b", lines[i], re.IGNORECASE):
            return i
        i += 1
    return len(lines)


def looks_like_section_title(text: str) -> bool:
    stripped = text.strip()
    if stripped == "" or set(stripped) == {"-"}:
        return False
    return bool(re.match(r"^[A-Z][A-Z0-9 _&/()-]*$", stripped))


def parse_argument_metadata(lines: List[str], start: int, end: int) -> Dict[str, Dict[str, str]]:
    metadata: Dict[str, Dict[str, str]] = {}
    in_arguments = False
    i = start

    while i < end:
        raw = lines[i].rstrip("\n")
        comment = COMMENT_RE.match(raw)

        if comment is not None:
            text = comment.group(1).strip()
            if text.upper() == "ARGUMENTS":
                in_arguments = True
                i += 1
                continue
            if in_arguments and looks_like_section_title(text):
                break

            if in_arguments:
                m = ARG_COMMENT_DESC_RE.match(text)
                if m is not None:
                    key = m.group(1).lower()
                    if key not in metadata:
                        metadata[key] = {"intent": "", "desc": ""}
                    if metadata[key]["desc"] == "":
                        metadata[key]["desc"] = m.group(2).strip()

            i += 1
            continue

        if not in_arguments:
            i += 1
            continue

        stripped = raw.strip()
        if stripped == "":
            i += 1
            continue
        statement, continuation_comment, last_i = collect_continued_statement(lines, i, end)
        if "::" not in statement:
            i = last_i + 1
            continue

        left, right = statement.split("::", 1)
        intent_match = INTENT_RE.search(left)
        intent = intent_match.group(1).lower() if intent_match else ""

        decl_and_comment = right
        names_part = decl_and_comment
        description = continuation_comment

        for token in names_part.split(","):
            name = token.strip()
            if name == "":
                continue
            name = name.split("=", 1)[0].strip()
            name = re.sub(r"\(.*\)$", "", name).strip()
            if not VAR_RE.match(name):
                continue

            key = name.lower()
            if key not in metadata:
                metadata[key] = {"intent": "", "desc": ""}

            if intent and metadata[key]["intent"] == "":
                metadata[key]["intent"] = intent
            if description and metadata[key]["desc"] == "":
                metadata[key]["desc"] = description

        i = last_i + 1

    return metadata


def default_param_desc(intent: str) -> str:
    if intent == "in":
        return "Input argument."
    if intent == "out":
        return "Output argument."
    if intent == "inout":
        return "Input/output argument."
    return "Argument."


def normalize_desc(desc: str) -> str:
    clean = desc.strip()
    if clean == "":
        return "Argument."
    if clean[-1] not in ".!?":
        clean = clean + "."
    return clean


def build_param_docs(signature_args: List[str], metadata: Dict[str, Dict[str, str]]) -> List[Tuple[str, str]]:
    params: List[Tuple[str, str]] = []
    for arg in signature_args:
        info = metadata.get(arg.lower(), {"intent": "", "desc": ""})
        description = info.get("desc", "") or default_param_desc(info.get("intent", ""))
        params.append((arg, normalize_desc(description)))
    return params


def build_doxygen_block(
    kind: str,
    symbol: str,
    sections: Dict[str, List[str]],
    params: Optional[List[Tuple[str, str]]] = None,
) -> List[str]:
    desc = collapse_blanks(sections.get("DESCRIPTION", []))
    authors = collapse_blanks(sections.get("AUTHORS & DATE", []))
    notes = collapse_blanks(sections.get("NOTES", []))

    if desc:
        brief = desc[0]
        rest_desc = desc[1:]
    else:
        brief = f"{kind.title()} {symbol}."
        rest_desc = []

    out: List[str] = [f"!> \\brief {brief}\n"]

    if rest_desc:
        out.append("!!\n")
        first_detail = True
        for line in rest_desc:
            if line.strip() == "":
                out.append("!!\n")
                first_detail = True
                continue
            if first_detail:
                out.append(f"!! \\details {line}\n")
                first_detail = False
            else:
                out.append(f"!! {line}\n")

    if params:
        out.append("!!\n")
        for name, description in params:
            out.append(f"!! \\param {name} {description}\n")

    if authors:
        out.append("!!\n")
        for author in authors:
            if author.strip():
                out.append(f"!! \\author {author}\n")

    if notes:
        out.append("!!\n")
        first_note = True
        for note in notes:
            if note.strip() == "":
                out.append("!!\n")
                first_note = True
                continue
            if first_note:
                out.append(f"!! \\note {note}\n")
                first_note = False
            else:
                out.append(f"!! {note}\n")

    return out


def transform(lines: List[str]) -> List[str]:
    out: List[str] = []
    i = 0

    while i < len(lines):
        line = lines[i]
        m = DECL_RE.match(line)
        if m and END_DECL_RE.match(line) is None:
            kind = m.group(1).upper()
            symbol = m.group(2)

            decl_end = i
            if kind == "SUBROUTINE":
                _, _, decl_end = collect_continued_statement(lines, i, len(lines))

            sections, new_i = parse_header(lines, decl_end + 1)
            if sections is not None:
                params: List[Tuple[str, str]] = []
                if kind == "SUBROUTINE":
                    signature_args = parse_signature_args(lines, i)
                    if signature_args:
                        sub_end = find_subroutine_end(lines, new_i)
                        metadata = parse_argument_metadata(lines, new_i, sub_end)
                        params = build_param_docs(signature_args, metadata)

                out.extend(build_doxygen_block(kind, symbol, sections, params=params))
                out.extend(lines[i:decl_end + 1])
                i = new_i
                continue

        out.append(line)
        i += 1

    return out


def main() -> None:
    if len(sys.argv) > 1:
        with open(sys.argv[1], "r", encoding="utf-8", errors="replace") as handle:
            content = handle.read().splitlines(True)
    else:
        content = sys.stdin.read().splitlines(True)
    sys.stdout.writelines(transform(content))


if __name__ == "__main__":
    main()
