# -*- coding: utf-8 -*-
"""
EduCrate - Socratic Tutor (Gradio app)
A Spanish-language Socratic tutor for Peruvian public secondary-school students.

UI in English (international judges); the tutoring happens in Spanish.
Model: Qwen3-0.6B fine-tuned (SFT). Runs on CPU. Gradio 6 (messages format).
"""
import os
import re
import gradio as gr
import torch

MODEL_ID = os.environ.get("MODEL_ID", "build-small-hackathon/educrate-qwen3-bi")
_THINK = re.compile(r"<think>.*?</think>", re.S)
MAX_TURNS = int(os.environ.get("MAX_TURNS", "8"))
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32

# Must match the training system prompt (scripts/22_sft_qwen3_lora.py) — bilingual.
SYSTEM_PROMPT = (
    "You are EduCrate, a Socratic tutor for secondary-school students (math reasoning and "
    "reading comprehension). Reply in the student's language (Spanish or English). First "
    "reason briefly inside <think>...</think>, then ask ONE guiding question. ABSOLUTE RULE: "
    "never give the final answer or result; guide with progressive hints until the student "
    "discovers it. Be warm, brief and clear."
)

MODES = {
    "Just chat": "",
    "Understand my mistake": (
        "\n\nEl estudiante quiere entender su razonamiento: NO le des datos ni la "
        "respuesta; respóndele con una contrapregunta que lo haga revisar su propio paso."
    ),
    "I need a fact or formula": (
        "\n\nEl estudiante pide un dato o fórmula puntual: puedes darlo de forma breve, "
        "pero NUNCA lo apliques hasta la respuesta final por él; devuélvele la pregunta."
    ),
}

# Example cards: (button label in English, reading passage, Spanish student message).
EXAMPLES = [
    ("Linear equation",
     "", "Ayúdame a resolver 3x + 6 = 15, pero no me des la respuesta."),
    ("Adding fractions",
     "", "No entiendo cómo sumar 1/2 + 1/3. ¿Me ayudas a pensarlo?"),
    ("Percentages",
     "", "¿Cómo calculo el 20% de 50? No me lo resuelvas, guíame paso a paso."),
    ("Reading comprehension",
     "El reciclaje ayuda a reducir la basura en las ciudades. Si separamos los "
     "plásticos y el papel, menos residuos llegan a los ríos.",
     "¿Cuál es la idea principal de este texto? Guíame, no me des la respuesta."),
]

_load_error = None
model = tokenizer = None
try:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    print(f"Loading {MODEL_ID} on {DEVICE} ...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=DTYPE).to(DEVICE)
    model.eval()
    if DEVICE == "cpu":
        torch.set_num_threads(int(os.environ.get("OMP_NUM_THREADS", "4")))
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    print("Model loaded.")
except Exception as e:  # noqa: BLE001
    _load_error = str(e)
    print(f"[WARN] Could not load model: {e}")


def _render(messages):
    try:
        return tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    except TypeError:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)


def _generate(messages, max_new_tokens):
    inputs = tokenizer(_render(messages), return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        out = model.generate(
            **inputs, max_new_tokens=max_new_tokens, do_sample=True,
            temperature=0.45, top_p=0.9, repetition_penalty=1.15,
            pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id)
    text = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
    text = _THINK.sub("", text)              # hide the model's brief reasoning
    text = text.replace("<think>", "").replace("</think>", "")
    return text.strip()


def respond(user_msg, history, reading_text, mode, hint_mode):
    history = history or []
    if not user_msg or not user_msg.strip():
        return history, ""
    if model is None:
        return history + [
            {"role": "user", "content": user_msg},
            {"role": "assistant", "content": f"Model failed to load ({_load_error})."},
        ], ""
    system = SYSTEM_PROMPT + MODES.get(mode, "")
    if reading_text and reading_text.strip():
        system += f"\n\nTexto de lectura del estudiante:\n{reading_text.strip()}"
    if hint_mode:
        system += ("\n\nEl estudiante pidió una PISTA: da UNA sola pista corta que lo "
                   "acerque, sin revelar la respuesta.")
    messages = [{"role": "system", "content": system}]
    messages += history[-2 * MAX_TURNS:]
    messages.append({"role": "user", "content": user_msg})
    reply = _generate(messages, max_new_tokens=240 if hint_mode else 340)
    return history + [
        {"role": "user", "content": user_msg},
        {"role": "assistant", "content": reply},
    ], ""


CSS = """
.gradio-container {max-width: 1024px !important; margin: 0 auto;}
#title {text-align:center; margin-bottom: 0;}
#subtitle {text-align:center; color:#6b7280; margin-top:2px; font-size:0.95rem;}
.example-btn button {font-weight:500;}
footer {visibility:hidden}
"""

with gr.Blocks(title="EduCrate - Socratic Tutor", theme=gr.themes.Soft(
        primary_hue="slate", neutral_hue="slate"), css=CSS) as demo:
    gr.Markdown("# EduCrate", elem_id="title")
    gr.Markdown(
        "A Socratic tutor that never gives the answer — it guides with questions. "
        "Spanish-language, for Peruvian public-school students. Math reasoning and "
        "reading comprehension. Runs on CPU.", elem_id="subtitle")
    if _load_error:
        gr.Markdown(f"> Model `{MODEL_ID}` failed to load: {_load_error}")

    gr.Markdown("**Try an example** (the tutor replies in Spanish):")
    ex_buttons = []
    with gr.Row():
        for label, reading, message in EXAMPLES:
            b = gr.Button(label, size="sm", elem_classes="example-btn", scale=1)
            ex_buttons.append((b, reading, message))

    with gr.Row(equal_height=False):
        with gr.Column(scale=1):
            reading_text = gr.Textbox(
                label="Reading passage (optional)",
                placeholder="Paste a short text to practice reading comprehension.",
                lines=6)
            mode = gr.Radio(choices=list(MODES.keys()), value="Just chat",
                            label="What do you need?")
            hint_mode = gr.Checkbox(label="Give me a single short hint")
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Conversation", height=460)
            user_input = gr.Textbox(
                label="Your question", lines=2,
                placeholder="Type your question in Spanish and press Send.")
            with gr.Row():
                submit_btn = gr.Button("Send", variant="primary")
                clear_btn = gr.Button("New topic", variant="secondary")

    inp = [user_input, chatbot, reading_text, mode, hint_mode]
    submit_btn.click(respond, inp, [chatbot, user_input])
    user_input.submit(respond, inp, [chatbot, user_input])
    clear_btn.click(lambda: ([], "", ""), outputs=[chatbot, user_input, reading_text])
    for b, reading, message in ex_buttons:
        b.click(lambda r=reading, m=message: (r, m, []),
                outputs=[reading_text, user_input, chatbot]).then(
                respond, inp, [chatbot, user_input])

    with gr.Accordion("About this project", open=False):
        gr.Markdown(
            """
**The problem.** Peru's public secondary schools face a deep learning crisis. In
**PISA 2022 (OECD)**, only **34%** of Peruvian 15-year-olds reached basic proficiency
in mathematics (66% below) and **50%** in reading. Peru's national assessment
(**ECE / MINEDU, grade 8, 2022**) found only about **12.7%** *Satisfactory* in math,
with public (state) schools far behind private ones.

**The approach.** Most chatbots hand over the answer, which does not build reasoning.
EduCrate never gives the final answer — it asks one guiding question at a time, detects
the student's mistake, and offers progressive hints (the maieutic method).

**The model.** Qwen3-0.6B fine-tuned (SFT, with a GRPO variant) on ~4,900 Spanish
Socratic dialogues. It runs on CPU, so it works on low-resource laptops common in
public schools — no GPU and no paid API required.

**Measured behavior (held-out mGSM-es).** The fine-tune raised the answer-withholding
rate from 84% (base) to 100%, turning verbose solutions into concise guiding questions.

*It is a 0.6B model: guidance is sometimes imperfect. Built for the Build Small
Hackathon (Backyard AI). Made with generative AI; validate pedagogical use with a teacher.*
            """
        )

if __name__ == "__main__":
    demo.launch()