# -*- coding: utf-8 -*-
"""
EduCrate - Socratic Tutor (Gradio app)
A Spanish-language Socratic tutor for Peruvian public secondary-school students.
UI in English (international judges); the tutoring happens in Spanish.
Model: Qwen3-0.6B fine-tuned (SFT). Runs on CPU. Gradio 6 (messages format).
"""
import os
import re
import gradio as gr
import torch
MODEL_ID = os.environ.get("MODEL_ID", "build-small-hackathon/educrate-qwen3-bi")
_THINK = re.compile(r".*?", re.S)
MAX_TURNS = int(os.environ.get("MAX_TURNS", "8"))
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
# Must match the training system prompt (scripts/22_sft_qwen3_lora.py) — bilingual.
SYSTEM_PROMPT = (
"You are EduCrate, a Socratic tutor for secondary-school students (math reasoning and "
"reading comprehension). Reply in the student's language (Spanish or English). First "
"reason briefly inside ..., then ask ONE guiding question. ABSOLUTE RULE: "
"never give the final answer or result; guide with progressive hints until the student "
"discovers it. Be warm, brief and clear."
)
MODES = {
"Just chat": "",
"Understand my mistake": (
"\n\nEl estudiante quiere entender su razonamiento: NO le des datos ni la "
"respuesta; respóndele con una contrapregunta que lo haga revisar su propio paso."
),
"I need a fact or formula": (
"\n\nEl estudiante pide un dato o fórmula puntual: puedes darlo de forma breve, "
"pero NUNCA lo apliques hasta la respuesta final por él; devuélvele la pregunta."
),
}
# Example cards: (button label in English, reading passage, Spanish student message).
EXAMPLES = [
("Linear equation",
"", "Ayúdame a resolver 3x + 6 = 15, pero no me des la respuesta."),
("Adding fractions",
"", "No entiendo cómo sumar 1/2 + 1/3. ¿Me ayudas a pensarlo?"),
("Percentages",
"", "¿Cómo calculo el 20% de 50? No me lo resuelvas, guíame paso a paso."),
("Reading comprehension",
"El reciclaje ayuda a reducir la basura en las ciudades. Si separamos los "
"plásticos y el papel, menos residuos llegan a los ríos.",
"¿Cuál es la idea principal de este texto? Guíame, no me des la respuesta."),
]
_load_error = None
model = tokenizer = None
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
print(f"Loading {MODEL_ID} on {DEVICE} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=DTYPE).to(DEVICE)
model.eval()
if DEVICE == "cpu":
torch.set_num_threads(int(os.environ.get("OMP_NUM_THREADS", "4")))
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("Model loaded.")
except Exception as e: # noqa: BLE001
_load_error = str(e)
print(f"[WARN] Could not load model: {e}")
def _render(messages):
try:
return tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
except TypeError:
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
def _generate(messages, max_new_tokens):
inputs = tokenizer(_render(messages), return_tensors="pt").to(DEVICE)
with torch.no_grad():
out = model.generate(
**inputs, max_new_tokens=max_new_tokens, do_sample=True,
temperature=0.45, top_p=0.9, repetition_penalty=1.15,
pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id)
text = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
text = _THINK.sub("", text) # hide the model's brief reasoning
text = text.replace("", "").replace("", "")
return text.strip()
def respond(user_msg, history, reading_text, mode, hint_mode):
history = history or []
if not user_msg or not user_msg.strip():
return history, ""
if model is None:
return history + [
{"role": "user", "content": user_msg},
{"role": "assistant", "content": f"Model failed to load ({_load_error})."},
], ""
system = SYSTEM_PROMPT + MODES.get(mode, "")
if reading_text and reading_text.strip():
system += f"\n\nTexto de lectura del estudiante:\n{reading_text.strip()}"
if hint_mode:
system += ("\n\nEl estudiante pidió una PISTA: da UNA sola pista corta que lo "
"acerque, sin revelar la respuesta.")
messages = [{"role": "system", "content": system}]
messages += history[-2 * MAX_TURNS:]
messages.append({"role": "user", "content": user_msg})
reply = _generate(messages, max_new_tokens=240 if hint_mode else 340)
return history + [
{"role": "user", "content": user_msg},
{"role": "assistant", "content": reply},
], ""
CSS = """
.gradio-container {max-width: 1024px !important; margin: 0 auto;}
#title {text-align:center; margin-bottom: 0;}
#subtitle {text-align:center; color:#6b7280; margin-top:2px; font-size:0.95rem;}
.example-btn button {font-weight:500;}
footer {visibility:hidden}
"""
with gr.Blocks(title="EduCrate - Socratic Tutor", theme=gr.themes.Soft(
primary_hue="slate", neutral_hue="slate"), css=CSS) as demo:
gr.Markdown("# EduCrate", elem_id="title")
gr.Markdown(
"A Socratic tutor that never gives the answer — it guides with questions. "
"Spanish-language, for Peruvian public-school students. Math reasoning and "
"reading comprehension. Runs on CPU.", elem_id="subtitle")
if _load_error:
gr.Markdown(f"> Model `{MODEL_ID}` failed to load: {_load_error}")
gr.Markdown("**Try an example** (the tutor replies in Spanish):")
ex_buttons = []
with gr.Row():
for label, reading, message in EXAMPLES:
b = gr.Button(label, size="sm", elem_classes="example-btn", scale=1)
ex_buttons.append((b, reading, message))
with gr.Row(equal_height=False):
with gr.Column(scale=1):
reading_text = gr.Textbox(
label="Reading passage (optional)",
placeholder="Paste a short text to practice reading comprehension.",
lines=6)
mode = gr.Radio(choices=list(MODES.keys()), value="Just chat",
label="What do you need?")
hint_mode = gr.Checkbox(label="Give me a single short hint")
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Conversation", height=460)
user_input = gr.Textbox(
label="Your question", lines=2,
placeholder="Type your question in Spanish and press Send.")
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("New topic", variant="secondary")
inp = [user_input, chatbot, reading_text, mode, hint_mode]
submit_btn.click(respond, inp, [chatbot, user_input])
user_input.submit(respond, inp, [chatbot, user_input])
clear_btn.click(lambda: ([], "", ""), outputs=[chatbot, user_input, reading_text])
for b, reading, message in ex_buttons:
b.click(lambda r=reading, m=message: (r, m, []),
outputs=[reading_text, user_input, chatbot]).then(
respond, inp, [chatbot, user_input])
with gr.Accordion("About this project", open=False):
gr.Markdown(
"""
**The problem.** Peru's public secondary schools face a deep learning crisis. In
**PISA 2022 (OECD)**, only **34%** of Peruvian 15-year-olds reached basic proficiency
in mathematics (66% below) and **50%** in reading. Peru's national assessment
(**ECE / MINEDU, grade 8, 2022**) found only about **12.7%** *Satisfactory* in math,
with public (state) schools far behind private ones.
**The approach.** Most chatbots hand over the answer, which does not build reasoning.
EduCrate never gives the final answer — it asks one guiding question at a time, detects
the student's mistake, and offers progressive hints (the maieutic method).
**The model.** Qwen3-0.6B fine-tuned (SFT, with a GRPO variant) on ~4,900 Spanish
Socratic dialogues. It runs on CPU, so it works on low-resource laptops common in
public schools — no GPU and no paid API required.
**Measured behavior (held-out mGSM-es).** The fine-tune raised the answer-withholding
rate from 84% (base) to 100%, turning verbose solutions into concise guiding questions.
*It is a 0.6B model: guidance is sometimes imperfect. Built for the Build Small
Hackathon (Backyard AI). Made with generative AI; validate pedagogical use with a teacher.*
"""
)
if __name__ == "__main__":
demo.launch()