# -*- coding: utf-8 -*- """ EduCrate - Socratic Tutor (Gradio app) A Spanish-language Socratic tutor for Peruvian public secondary-school students. UI in English (international judges); the tutoring happens in Spanish. Model: Qwen3-0.6B fine-tuned (SFT). Runs on CPU. Gradio 6 (messages format). """ import os import re import gradio as gr import torch MODEL_ID = os.environ.get("MODEL_ID", "build-small-hackathon/educrate-qwen3-bi") _THINK = re.compile(r".*?", re.S) MAX_TURNS = int(os.environ.get("MAX_TURNS", "8")) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 # Must match the training system prompt (scripts/22_sft_qwen3_lora.py) — bilingual. SYSTEM_PROMPT = ( "You are EduCrate, a Socratic tutor for secondary-school students (math reasoning and " "reading comprehension). Reply in the student's language (Spanish or English). First " "reason briefly inside ..., then ask ONE guiding question. ABSOLUTE RULE: " "never give the final answer or result; guide with progressive hints until the student " "discovers it. Be warm, brief and clear." ) MODES = { "Just chat": "", "Understand my mistake": ( "\n\nEl estudiante quiere entender su razonamiento: NO le des datos ni la " "respuesta; respóndele con una contrapregunta que lo haga revisar su propio paso." ), "I need a fact or formula": ( "\n\nEl estudiante pide un dato o fórmula puntual: puedes darlo de forma breve, " "pero NUNCA lo apliques hasta la respuesta final por él; devuélvele la pregunta." ), } # Example cards: (button label in English, reading passage, Spanish student message). EXAMPLES = [ ("Linear equation", "", "Ayúdame a resolver 3x + 6 = 15, pero no me des la respuesta."), ("Adding fractions", "", "No entiendo cómo sumar 1/2 + 1/3. ¿Me ayudas a pensarlo?"), ("Percentages", "", "¿Cómo calculo el 20% de 50? No me lo resuelvas, guíame paso a paso."), ("Reading comprehension", "El reciclaje ayuda a reducir la basura en las ciudades. Si separamos los " "plásticos y el papel, menos residuos llegan a los ríos.", "¿Cuál es la idea principal de este texto? Guíame, no me des la respuesta."), ] _load_error = None model = tokenizer = None try: from transformers import AutoModelForCausalLM, AutoTokenizer print(f"Loading {MODEL_ID} on {DEVICE} ...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=DTYPE).to(DEVICE) model.eval() if DEVICE == "cpu": torch.set_num_threads(int(os.environ.get("OMP_NUM_THREADS", "4"))) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("Model loaded.") except Exception as e: # noqa: BLE001 _load_error = str(e) print(f"[WARN] Could not load model: {e}") def _render(messages): try: return tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, enable_thinking=False) except TypeError: return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) def _generate(messages, max_new_tokens): inputs = tokenizer(_render(messages), return_tensors="pt").to(DEVICE) with torch.no_grad(): out = model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.45, top_p=0.9, repetition_penalty=1.15, pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id) text = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) text = _THINK.sub("", text) # hide the model's brief reasoning text = text.replace("", "").replace("", "") return text.strip() def respond(user_msg, history, reading_text, mode, hint_mode): history = history or [] if not user_msg or not user_msg.strip(): return history, "" if model is None: return history + [ {"role": "user", "content": user_msg}, {"role": "assistant", "content": f"Model failed to load ({_load_error})."}, ], "" system = SYSTEM_PROMPT + MODES.get(mode, "") if reading_text and reading_text.strip(): system += f"\n\nTexto de lectura del estudiante:\n{reading_text.strip()}" if hint_mode: system += ("\n\nEl estudiante pidió una PISTA: da UNA sola pista corta que lo " "acerque, sin revelar la respuesta.") messages = [{"role": "system", "content": system}] messages += history[-2 * MAX_TURNS:] messages.append({"role": "user", "content": user_msg}) reply = _generate(messages, max_new_tokens=240 if hint_mode else 340) return history + [ {"role": "user", "content": user_msg}, {"role": "assistant", "content": reply}, ], "" CSS = """ .gradio-container {max-width: 1024px !important; margin: 0 auto;} #title {text-align:center; margin-bottom: 0;} #subtitle {text-align:center; color:#6b7280; margin-top:2px; font-size:0.95rem;} .example-btn button {font-weight:500;} footer {visibility:hidden} """ with gr.Blocks(title="EduCrate - Socratic Tutor", theme=gr.themes.Soft( primary_hue="slate", neutral_hue="slate"), css=CSS) as demo: gr.Markdown("# EduCrate", elem_id="title") gr.Markdown( "A Socratic tutor that never gives the answer — it guides with questions. " "Spanish-language, for Peruvian public-school students. Math reasoning and " "reading comprehension. Runs on CPU.", elem_id="subtitle") if _load_error: gr.Markdown(f"> Model `{MODEL_ID}` failed to load: {_load_error}") gr.Markdown("**Try an example** (the tutor replies in Spanish):") ex_buttons = [] with gr.Row(): for label, reading, message in EXAMPLES: b = gr.Button(label, size="sm", elem_classes="example-btn", scale=1) ex_buttons.append((b, reading, message)) with gr.Row(equal_height=False): with gr.Column(scale=1): reading_text = gr.Textbox( label="Reading passage (optional)", placeholder="Paste a short text to practice reading comprehension.", lines=6) mode = gr.Radio(choices=list(MODES.keys()), value="Just chat", label="What do you need?") hint_mode = gr.Checkbox(label="Give me a single short hint") with gr.Column(scale=2): chatbot = gr.Chatbot(label="Conversation", height=460) user_input = gr.Textbox( label="Your question", lines=2, placeholder="Type your question in Spanish and press Send.") with gr.Row(): submit_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("New topic", variant="secondary") inp = [user_input, chatbot, reading_text, mode, hint_mode] submit_btn.click(respond, inp, [chatbot, user_input]) user_input.submit(respond, inp, [chatbot, user_input]) clear_btn.click(lambda: ([], "", ""), outputs=[chatbot, user_input, reading_text]) for b, reading, message in ex_buttons: b.click(lambda r=reading, m=message: (r, m, []), outputs=[reading_text, user_input, chatbot]).then( respond, inp, [chatbot, user_input]) with gr.Accordion("About this project", open=False): gr.Markdown( """ **The problem.** Peru's public secondary schools face a deep learning crisis. In **PISA 2022 (OECD)**, only **34%** of Peruvian 15-year-olds reached basic proficiency in mathematics (66% below) and **50%** in reading. Peru's national assessment (**ECE / MINEDU, grade 8, 2022**) found only about **12.7%** *Satisfactory* in math, with public (state) schools far behind private ones. **The approach.** Most chatbots hand over the answer, which does not build reasoning. EduCrate never gives the final answer — it asks one guiding question at a time, detects the student's mistake, and offers progressive hints (the maieutic method). **The model.** Qwen3-0.6B fine-tuned (SFT, with a GRPO variant) on ~4,900 Spanish Socratic dialogues. It runs on CPU, so it works on low-resource laptops common in public schools — no GPU and no paid API required. **Measured behavior (held-out mGSM-es).** The fine-tune raised the answer-withholding rate from 84% (base) to 100%, turning verbose solutions into concise guiding questions. *It is a 0.6B model: guidance is sometimes imperfect. Built for the Build Small Hackathon (Backyard AI). Made with generative AI; validate pedagogical use with a teacher.* """ ) if __name__ == "__main__": demo.launch()