#!/usr/bin/env python3
"""Dictation Trainer — Gradio Space (specs/gradio.md).

A thin, mobile-first wrapper over three Modal endpoints. No models run here:
every inference is an HTTP call out to Modal.

  Generate: word list + level --LLM--> German dictation --TTS--> audio
  Check:    photo of handwriting --OCR(blind)--> text --grade--> diff + score

Run locally from this directory (the Space root):
  MODAL_LLM_URL=... MODAL_TTS_URL=... MODAL_OCR_URL=... uv run python app.py
"""

import base64
import json
import os
import tempfile
import time

import gradio as gr
from loguru import logger

# Flat imports: this file is the Space entrypoint, run with space/ as the root.
from diff_html import render_report_html
from ocr.grading import grade
from ocr.transcribe import transcribe_image
from openai_client import make_client
from prompts import (
    DICTATION_SYSTEM_PROMPT,
    build_user_prompt,
    clean_dictation,
    parse_word_list,
)
from wizard import nav

LLM_MODEL = "LiquidAI/LFM2.5-8B-A1B-GGUF"
# Higgs ignores the model field today, but the OpenAI SDK requires one; keep it
# descriptive in case the server starts validating it.
TTS_MODEL = "bosonai/higgs-audio-v3-tts-4b"
TTS_VOICE = "alba"
# Sampling fixed per spec §6 — deterministic, repeatable dictations. top_k and
# repeat_penalty aren't OpenAI-standard params, so they ride in extra_body.
LLM_SAMPLING = {
    "temperature": 0.1,
    "top_p": 0.1,
    "top_k": 50,
    "repeat_penalty": 1.05,
}
# LFM2.5 is a reasoning model: it emits a chain-of-thought into `reasoning_content`
# BEFORE the answer goes into `content`. The token budget must cover BOTH, or the
# reasoning consumes it all and `content` comes back empty (finish_reason=length).
# The verbose system prompt lengthens the reasoning, so keep this well above the
# ~900 tokens of CoT we observed. See space/debug_llm.py.
LLM_MAX_TOKENS = 2048
LANG = "de"

COLD_START_HINT = "First call after idle can take ~30-60s while backend warms up."

# Calm, modern theme: indigo/violet accents on slate neutrals, a soft gradient
# page, white cards with gentle shadows, roomy radius + spacing. Most of the look
# lives here (theme variables are version-stable); CSS below only does the things
# themes can't (phone framing, gradient title, status pill).
THEME = gr.themes.Soft(
    primary_hue=gr.themes.colors.indigo,
    secondary_hue=gr.themes.colors.sky,
    neutral_hue=gr.themes.colors.slate,
    font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
    radius_size=gr.themes.sizes.radius_lg,
    spacing_size=gr.themes.sizes.spacing_lg,
    text_size=gr.themes.sizes.text_md,
).set(
    body_background_fill="linear-gradient(160deg, #eef2fb 0%, #e9ecf7 45%, #ece9f7 100%)",
    body_background_fill_dark="linear-gradient(160deg, #0f1422 0%, #141b2d 100%)",
    body_text_color="#1e293b",
    body_text_color_subdued="#64748b",
    block_background_fill="#ffffff",
    block_background_fill_dark="#1a2234",
    block_border_width="0px",
    block_radius="18px",
    block_shadow="0 6px 24px rgba(30, 41, 59, 0.08)",
    block_shadow_dark="0 6px 24px rgba(0, 0, 0, 0.40)",
    block_padding="20px",
    layout_gap="14px",
    input_background_fill="#f8fafc",
    input_background_fill_dark="#0f1626",
    input_radius="12px",
    button_large_radius="12px",
    button_large_padding="11px 18px",
    button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)",
    button_primary_background_fill_hover="linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%)",
    button_primary_background_fill_dark="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)",
    button_primary_text_color="#ffffff",
    button_primary_shadow="0 4px 14px rgba(99, 102, 241, 0.35)",
    button_primary_shadow_hover="0 6px 18px rgba(99, 102, 241, 0.45)",
)

# Portrait-phone framing + the bits the theme can't express: a narrow centered
# column, a gradient app title, and the centered status (spinner) pill.
MOBILE_CSS = """
.gradio-container {
    max-width: 480px !important;
    margin: 0 auto !important;
    padding: 12px 14px 28px !important;
}
.app-header {
    display: flex;
    align-items: center;
    justify-content: center;
    gap: 12px;
    margin: 12px 0 4px;
}
.app-logo {
    height: 128px;
    width: auto;
    flex: 0 0 auto;
}
.app-title {
    font-size: 1.6rem;
    font-weight: 700;
    background: linear-gradient(135deg, #6366f1, #8b5cf6);
    -webkit-background-clip: text;
    background-clip: text;
    -webkit-text-fill-color: transparent;
}
.intro {
    text-align: center;
    color: #64748b;
    font-size: 0.95rem;
    line-height: 1.45;
    margin: 0 6px 6px;
}
/* Step title: transparent (no clipped grey strip), larger, and padded so the
   card's rounded corner never crops the leading "1 ·". */
.panel-title {
    background: transparent !important;
    box-shadow: none !important;
    border: none !important;
    padding: 6px 18px 2px !important;
}
.panel-title h3 {
    font-size: 1.35rem !important;
    font-weight: 700 !important;
    color: #4f46e5 !important;
    margin: 0 !important;
    line-height: 1.3;
}
.status {
    text-align: center;
    font-weight: 600;
    opacity: 0.9;
}
.status .fa-spinner {
    margin-right: 6px;
}
"""

# FontAwesome (CDN) for the animated status spinner (fa-spinner + fa-spin). Loaded
# into <head> at launch; the icon itself is rendered as raw HTML in the status
# fields (see _busy), which is why those Markdowns set sanitize_html=False.
FA_HEAD = (
    '<link rel="stylesheet" '
    'href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css">'
)
SPINNER = '<i class="fa-solid fa-spinner fa-spin"></i>'


# Shared by the in-app header (embedded base64) and the PWA / favicon icon.
LOGO_PATH = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), "assets", "ankira_blue.png"
)


def _logo_data_uri() -> str:
    """Embed the scaled-down Ankira logo as a base64 PNG data URI, so the header
    needs no Gradio file-serving allowlist. Empty string if the file is missing
    (the header then falls back to the ✍️ emoji)."""
    try:
        with open(LOGO_PATH, "rb") as f:
            b64 = base64.b64encode(f.read()).decode("ascii")
        return f"data:image/png;base64,{b64}"
    except OSError:
        return ""


_LOGO_URI = _logo_data_uri()
_LOGO_TAG = (
    f'<img class="app-logo" alt="Ankira" src="{_LOGO_URI}">' if _LOGO_URI else "✍️"
)
HEADER_HTML = (
    f'<div class="app-header">{_LOGO_TAG}'
    f'<span class="app-title">Ankira: German Dictation Trainer</span></div>'
)


def _require_env(name: str) -> str:
    val = os.environ.get(name)
    if not val:
        raise gr.Error(f"{name} is not configured (set it in the Space secrets).")
    return val


def _tts_base_url() -> str:
    """MODAL_TTS_URL may be the server root or the full speech path; reduce it to
    the server root so the client appends /v1/audio/speech itself."""
    url = _require_env("MODAL_TTS_URL").rstrip("/")
    for suffix in ("/v1/audio/speech", "/audio/speech"):
        if url.endswith(suffix):
            return url[: -len(suffix)]
    return url


def call_llm(words: list[str], level: str) -> str:
    """Word list + CEFR level -> one German dictation paragraph (cleaned)."""
    client = make_client(_require_env("MODAL_LLM_URL"))
    completion = client.chat.completions.create(
        model=LLM_MODEL,
        messages=[
            {"role": "system", "content": DICTATION_SYSTEM_PROMPT},
            {"role": "user", "content": build_user_prompt(words, level)},
        ],
        max_tokens=LLM_MAX_TOKENS,
        extra_body=LLM_SAMPLING,
    )
    data = completion.model_dump()
    choice = data.get("choices", [{}])[0]
    content = (choice.get("message") or {}).get("content")
    text = clean_dictation(content)
    if not text:
        # Evidence at the LLM boundary: see exactly what came back when empty.
        logger.error(
            "Empty dictation from LLM. finish_reason={} raw_content={!r}\nfull response: {}",
            choice.get("finish_reason"),
            content,
            json.dumps(data, ensure_ascii=False)[:2000],
        )
    return text


def call_tts(text: str) -> str:
    """Synthesize the dictation; return a temp audio file path. Suffix follows
    the response Content-Type so gr.Audio plays it without transcoding."""
    client = make_client(_tts_base_url())
    response = client.audio.speech.create(model=TTS_MODEL, voice=TTS_VOICE, input=text)
    audio = response.read()
    content_type = response.response.headers.get("content-type", "")
    suffix = ".mp3" if "mpeg" in content_type else ".wav"
    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
        f.write(audio)
        return f.name


# ---- Step handlers ---------------------------------------------------------
#
# CTA handlers do their work, then append nav(...) so the wizard advances only on
# success; raising gr.Error leaves every output (including the views) untouched,
# so the user stays on the current step to fix the problem.


def start(words_raw: str, level: str, state: dict):
    """Input → Listen: generate the dictation text + audio."""
    words = parse_word_list(words_raw)
    if not words:
        raise gr.Error("Enter at least one word to practice.")

    text = call_llm(words, level)
    if not text:
        logger.error("LLM returned an empty dictation ({} words, {})", len(words), level)
        raise gr.Error("The model returned an empty dictation. Please try again.")
    logger.info("Generated dictation ({} words, {}):\n{}", len(words), level, text)
    state = {"diktat": text, "created_at": time.time()}

    try:
        audio_path = call_tts(text)
    except Exception as e:  # never lose the text because synthesis failed (spec §4)
        gr.Warning(f"Audio synthesis failed ({e}). Text saved — open 'Show text'.")
        audio_path = None

    # trailing update hides the first-step intro (see the outputs list in build_ui).
    return audio_path, text, state, *nav("listen"), gr.update(visible=False)


def check(image_path: str, state: dict):
    """Upload → Results: transcribe the photo (blind) and grade it."""
    if not state or not state.get("diktat"):
        raise gr.Error("Generate a dictation first.")
    if not image_path:
        raise gr.Error("Upload a photo of your handwriting first.")

    ocr_client = make_client(_require_env("MODAL_OCR_URL"))
    transcription = transcribe_image(image_path, ocr_client)
    logger.info("OCR transcription:\n{}", transcription)
    report = grade(state["diktat"], transcription, LANG)
    return transcription, render_report_html(report), *nav("results")


def restart():
    """Results → Input: clear everything and start a fresh dictation."""
    fresh = {"diktat": "", "created_at": 0}
    # order matches the `outputs` list on the Start-over button below; trailing
    # update re-shows the first-step intro.
    return "", None, "", None, "", "", fresh, *nav("input"), gr.update(visible=True)


# Inline progress: the CTA's real outputs all live on the next (hidden) view, so
# Gradio's spinner would paint where the user can't see it. Instead each CTA runs
# as show-status -> work -> hide-status; the hide step uses .then so it fires even
# when the work raises (no spinner left stuck on screen).


def _busy(message: str):
    return gr.update(value=f"{SPINNER} {message}", visible=True)


def _idle():
    return gr.update(visible=False)


def _begin(message: str):
    """Show the spinner and disable the CTA so it can't be re-fired mid-call."""
    return _busy(message), gr.update(interactive=False)


def _end():
    """Hide the spinner and re-enable the CTA on success (.then path)."""
    return _idle(), gr.update(interactive=True)


def _recover(*_):
    """Same cleanup for the failure path. A raised gr.Error aborts the chained
    .then, so re-enabling has to be wired via .failure (which passes the
    exception as an arg — ignored here). Without this, a validation error like
    'no photo uploaded' would leave the button stuck disabled."""
    return _idle(), gr.update(interactive=True)


def goto(target: str):
    """Switch to a view and clear transient state (both status spinners hidden,
    both CTAs re-enabled). Navigating away during a wait shouldn't leave a stale
    spinner or a disabled button behind on the view you left. Return order matches
    the NAV_OUTPUTS list wired in build_ui."""
    return (
        *nav(target),
        gr.update(visible=False),     # input_status
        gr.update(visible=False),     # upload_status
        gr.update(interactive=True),  # start_btn
        gr.update(interactive=True),  # check_btn
        gr.update(visible=target == "input"),  # intro (first step only)
    )


# ---- UI --------------------------------------------------------------------


def build_ui() -> gr.Blocks:
    with gr.Blocks(title="Dictation Trainer") as demo:
        # localStorage-backed: survives a tab reload while the learner writes (spec §3).
        state = gr.BrowserState({"diktat": "", "created_at": 0})

        gr.HTML(HEADER_HTML)

        # Shown on the first step only (toggled by start/goto/restart below).
        intro = gr.Markdown(
            "Practice German spelling by ear. Enter a few words, get a short "
            "dictation read aloud, and write it down by hand. Finally photograph "
            "your page for instant word-by-word feedback.",
            elem_classes="intro",
        )

        # Four stacked views; nav() keeps exactly one visible. Order must match
        # wizard.VIEWS.
        with gr.Group(visible=True) as view_input:
            gr.Markdown("### Let's start!", elem_classes="panel-title")

            words_in = gr.Textbox(
                label="Words to practice",
                placeholder="Comma- or newline-separated, e.g. angeblich, ablehnen, Apfel",
                lines=4,
            )
            level_in = gr.Dropdown(["A1", "A2", "B1", "B2"], value="A2", label="Level")
            start_btn = gr.Button("Generate", variant="primary")
            input_status = gr.Markdown(visible=False, sanitize_html=False, elem_classes="status")

        with gr.Group(visible=False) as view_listen:
            gr.Markdown("### Listen", elem_classes="panel-title")
            audio_out = gr.Audio(
                type="filepath", interactive=False, label="Dictation audio"
            )
            gr.Markdown("🎧 Listen and write it down on paper, then click **Finished**.")
            with gr.Accordion("Show text (debug)", open=False):
                text_out = gr.Textbox(label="Dictation text", interactive=False, lines=4)
            with gr.Row():
                listen_back_btn = gr.Button("Back")
                finished_btn = gr.Button("Finished", variant="primary")

        with gr.Group(visible=False) as view_upload:
            gr.Markdown("### Upload", elem_classes="panel-title")
            image_in = gr.Image(
                type="filepath",
                sources=["upload", "webcam", "clipboard"],
                label="Photo of your handwriting",
            )
            with gr.Row():
                upload_back_btn = gr.Button("Back")
                check_btn = gr.Button("Check", variant="primary")
            upload_status = gr.Markdown(visible=False, sanitize_html=False, elem_classes="status")

        with gr.Group(visible=False) as view_results:
            gr.Markdown("### Results", elem_classes="panel-title")
            recognized_out = gr.Textbox(
                label="Recognized text (OCR)", interactive=False, lines=4
            )
            diff_out = gr.HTML(label="Feedback")
            with gr.Row():
                results_back_btn = gr.Button("Back")
                restart_btn = gr.Button("Start over", variant="primary")

        views = [view_input, view_listen, view_upload, view_results]

        # CTA handlers: disable the button + show status, do the work (advance, or
        # raise gr.Error and stay). On success .then clears + re-enables; on error
        # .failure does the same (the raise aborts .then), so the button is never
        # left stuck disabled.
        start_work = start_btn.click(
            lambda: _begin(f"Generating dictation… {COLD_START_HINT}"),
            outputs=[input_status, start_btn],
            show_progress="hidden",
        ).then(
            start,
            inputs=[words_in, level_in, state],
            outputs=[audio_out, text_out, state, *views, intro],
            # Our _begin spinner is the indicator; suppress Gradio's own overlay,
            # which would otherwise cover the visible card (and our spinner).
            show_progress="hidden",
        )
        start_work.then(_end, outputs=[input_status, start_btn], show_progress="hidden")
        start_work.failure(_recover, outputs=[input_status, start_btn], show_progress="hidden")

        check_work = check_btn.click(
            lambda: _begin(f"Reading your handwriting… {COLD_START_HINT}"),
            outputs=[upload_status, check_btn],
            show_progress="hidden",
        ).then(
            check,
            inputs=[image_in, state],
            outputs=[recognized_out, diff_out, *views],
            # Keep our _begin spinner visible; suppress Gradio's overlay (it would
            # cover the upload card and hide the spinner).
            show_progress="hidden",
        )
        check_work.then(_end, outputs=[upload_status, check_btn], show_progress="hidden")
        check_work.failure(_recover, outputs=[upload_status, check_btn], show_progress="hidden")
        restart_btn.click(
            restart,
            outputs=[words_in, audio_out, text_out, image_in, recognized_out, diff_out, state, *views, intro],
        )

        # Navigation (Finished + Back buttons): switch view AND clear any leftover
        # spinner / disabled CTA from an action on the view being left. `cancels`
        # aborts an in-flight Start/Check so a call the user navigated away from
        # can't complete and yank them forward (its outputs are discarded).
        nav_outputs = [*views, input_status, upload_status, start_btn, check_btn, intro]
        in_flight = [start_work, check_work]
        finished_btn.click(lambda: goto("upload"), outputs=nav_outputs, cancels=in_flight)
        listen_back_btn.click(lambda: goto("input"), outputs=nav_outputs, cancels=in_flight)
        upload_back_btn.click(lambda: goto("listen"), outputs=nav_outputs, cancels=in_flight)
        results_back_btn.click(lambda: goto("upload"), outputs=nav_outputs, cancels=in_flight)

    return demo


if __name__ == "__main__":
    # On HF Spaces (SPACE_ID set) the runtime serves the app — don't request a
    # share tunnel there; locally, share=True gives a link usable from a phone.
    # theme, css and head all live on launch() in Gradio 6 (moved off Blocks).
    # pwa=True makes the app installable (Gradio generates the manifest + service
    # worker); the favicon doubles as the home-screen icon.
    build_ui().launch(
        theme=THEME,
        css=MOBILE_CSS,
        head=FA_HEAD,
        pwa=True,
        favicon_path=LOGO_PATH if os.path.exists(LOGO_PATH) else None,
        share="SPACE_ID" not in os.environ,
    )