"""Deterministic dictation grading: align a blind transcription against a
known reference and emit a strict JSON report.

See specs/ocr.md. The VLM transcription step lives elsewhere; this module
never sees the image and is fully testable with a fixed transcription string.
"""

import re
import unicodedata
from difflib import SequenceMatcher
from typing import Literal, NamedTuple

from pydantic import BaseModel

Status = Literal["correct", "misspelled", "missing", "extra"]

# A word is a maximal run of word characters (Unicode letters/digits/_);
# any other non-space character is a standalone punctuation token.
_TOKEN_RE = re.compile(r"\w+|[^\w\s]", re.UNICODE)


class Token(NamedTuple):
    text: str
    is_word: bool


class GradeOptions(NamedTuple):
    """Pedagogy knobs for grading (see specs/ocr.md §8). Defaults match the
    spec: case matters, punctuation is reported but not counted, diacritics
    are graded."""

    case_sensitive: bool = True
    grade_punctuation: bool = False
    grade_diacritics: bool = True


def comparison_key(word: str, options: GradeOptions) -> str:
    """Map a surface word to the key used for alignment/equality, applying the
    active grading options. The surface form is preserved for display; only
    this key decides whether two words count as 'the same'.

    Never maps ß→ss or ö→oe — diacritic *grading* only strips combining marks
    (accents), and ß is not a combining mark, so it always stays distinct."""
    key = unicodedata.normalize("NFC", word)
    if not options.case_sensitive:
        key = key.casefold()
    if not options.grade_diacritics:
        decomposed = unicodedata.normalize("NFD", key)
        key = "".join(ch for ch in decomposed if not unicodedata.combining(ch))
        key = unicodedata.normalize("NFC", key)
    return key


def tokenize(text: str) -> list[Token]:
    """NFC-normalize then split into word and punctuation tokens."""
    text = unicodedata.normalize("NFC", text)
    tokens: list[Token] = []
    for match in _TOKEN_RE.finditer(text):
        piece = match.group()
        tokens.append(Token(piece, is_word=bool(re.match(r"\w", piece))))
    return tokens


def graphemes(text: str) -> list[str]:
    """Split a string into grapheme clusters (a base char plus any trailing
    combining marks), so combining diacritics don't desync a character diff."""
    clusters: list[str] = []
    for ch in text:
        if clusters and unicodedata.combining(ch):
            clusters[-1] += ch
        else:
            clusters.append(ch)
    return clusters


def char_diff(expected: str, read: str) -> str:
    """Human-readable grapheme-level diff describing how ``read`` deviates from
    ``expected`` (e.g. ``ß→ss``, ``-t``, ``+e``). Empty string if identical."""
    a, b = graphemes(expected), graphemes(read)
    parts: list[str] = []
    for op, i1, i2, j1, j2 in SequenceMatcher(a=a, b=b).get_opcodes():
        if op == "equal":
            continue
        exp_chunk, read_chunk = "".join(a[i1:i2]), "".join(b[j1:j2])
        if op == "replace":
            parts.append(f"{exp_chunk}→{read_chunk}")
        elif op == "delete":
            parts.append(f"-{exp_chunk}")
        elif op == "insert":
            parts.append(f"+{read_chunk}")
    return ", ".join(parts)


class Word(BaseModel):
    """One graded word in the report (specs/ocr.md §7).

    ``expected`` is None for ``extra``; ``read`` is None for ``missing``;
    ``diff`` is present only for ``misspelled``."""

    index: int
    expected: str | None
    read: str | None
    status: Status
    diff: str | None = None


def _word_texts(text: str, options: GradeOptions) -> list[str]:
    """Token surface forms to align: words always, punctuation only when graded."""
    return [
        tok.text
        for tok in tokenize(text)
        if tok.is_word or options.grade_punctuation
    ]


def align_words(
    reference: str, transcription: str, options: GradeOptions | None = None
) -> list[Word]:
    """Align the blind transcription against the reference at word level and
    classify each token (specs/ocr.md stages 3-4). Deterministic, no model."""
    options = options or GradeOptions()
    ref = _word_texts(reference, options)
    read = _word_texts(transcription, options)
    ref_keys = [comparison_key(w, options) for w in ref]
    read_keys = [comparison_key(w, options) for w in read]

    words: list[Word] = []

    def emit(expected: str | None, got: str | None, status: Status) -> None:
        diff = char_diff(expected, got) if status == "misspelled" else None
        words.append(
            Word(
                index=len(words),
                expected=expected,
                read=got,
                status=status,
                diff=diff or None,
            )
        )

    for op, i1, i2, j1, j2 in SequenceMatcher(a=ref_keys, b=read_keys).get_opcodes():
        if op == "equal":
            for i, j in zip(range(i1, i2), range(j1, j2)):
                emit(ref[i], read[j], "correct")
        elif op == "replace":
            # Pair up as misspellings; leftovers are missing/extra.
            paired = min(i2 - i1, j2 - j1)
            for k in range(paired):
                emit(ref[i1 + k], read[j1 + k], "misspelled")
            for i in range(i1 + paired, i2):
                emit(ref[i], None, "missing")
            for j in range(j1 + paired, j2):
                emit(None, read[j], "extra")
        elif op == "delete":
            for i in range(i1, i2):
                emit(ref[i], None, "missing")
        elif op == "insert":
            for j in range(j1, j2):
                emit(None, read[j], "extra")

    return words


class Summary(BaseModel):
    """Tally over the graded words. ``total`` counts reference words only
    (correct + misspelled + missing); extras don't inflate it."""

    total: int
    correct: int
    misspelled: int
    missing: int
    extra: int
    accuracy: float


class GradeReport(BaseModel):
    """The strict JSON grading report (specs/ocr.md §7)."""

    lang: str
    reference: str
    transcription: str
    words: list[Word]
    summary: Summary


def _summarize(words: list[Word]) -> Summary:
    counts = {"correct": 0, "misspelled": 0, "missing": 0, "extra": 0}
    for w in words:
        counts[w.status] += 1
    total = counts["correct"] + counts["misspelled"] + counts["missing"]
    accuracy = round(counts["correct"] / total, 4) if total else 0.0
    return Summary(total=total, accuracy=accuracy, **counts)


def grade(
    reference: str,
    transcription: str,
    lang: str,
    options: GradeOptions | None = None,
) -> GradeReport:
    """Grade a blind transcription against the reference and return a validated
    report (specs/ocr.md stages 3-5). The image/VLM never enters here."""
    options = options or GradeOptions()
    words = align_words(reference, transcription, options)
    return GradeReport(
        lang=lang,
        reference=unicodedata.normalize("NFC", reference),
        transcription=unicodedata.normalize("NFC", transcription),
        words=words,
        summary=_summarize(words),
    )


_MARKS = {"correct": "✓", "misspelled": "✗", "missing": "·", "extra": "+"}


def format_text_report(report: GradeReport) -> str:
    """Render a GradeReport as a human-readable plain-text report (derivable
    purely from the JSON, specs/ocr.md §2)."""
    s = report.summary
    lines = [
        f"[{report.lang}] {s.correct}/{s.total} correct "
        f"({s.accuracy * 100:.0f}%)  "
        f"misspelled={s.misspelled} missing={s.missing} extra={s.extra}",
    ]
    for w in report.words:
        mark = _MARKS[w.status]
        if w.status == "correct":
            lines.append(f"  {mark} {w.read}")
        elif w.status == "misspelled":
            lines.append(f"  {mark} {w.expected} → {w.read}  [{w.diff}]")
        elif w.status == "missing":
            lines.append(f"  {mark} {w.expected}  (missing)")
        else:  # extra
            lines.append(f"  {mark} {w.read}  (extra)")
    return "\n".join(lines)