Spaces:

build-small-hackathon
/

educrate

Sleeping

App Files Files Community

educrate / app.py

fabrizziomcl

Revert anti-bleed prompt mitigation (ineffective)

88498da verified 16 days ago

Raw

History Blame Contribute Delete

8.82 kB

	# -- coding: utf-8 --
	"""
	EduCrate - Socratic Tutor (Gradio app)
	A Spanish-language Socratic tutor for Peruvian public secondary-school students.

	UI in English (international judges); the tutoring happens in Spanish.
	Model: Qwen3-0.6B fine-tuned (SFT). Runs on CPU. Gradio 6 (messages format).
	"""
	import os
	import re
	import gradio as gr
	import torch

	MODEL_ID = os.environ.get("MODEL_ID", "build-small-hackathon/educrate-qwen3-bi")
	_THINK = re.compile(r"<think>.*?</think>", re.S)
	MAX_TURNS = int(os.environ.get("MAX_TURNS", "8"))
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32

	# Must match the training system prompt (scripts/22_sft_qwen3_lora.py) — bilingual.
	SYSTEM_PROMPT = (
	"You are EduCrate, a Socratic tutor for secondary-school students (math reasoning and "
	"reading comprehension). Reply in the student's language (Spanish or English). First "
	"reason briefly inside <think>...</think>, then ask ONE guiding question. ABSOLUTE RULE: "
	"never give the final answer or result; guide with progressive hints until the student "
	"discovers it. Be warm, brief and clear."
	)

	MODES = {
	"Just chat": "",
	"Understand my mistake": (
	"\n\nEl estudiante quiere entender su razonamiento: NO le des datos ni la "
	"respuesta; respóndele con una contrapregunta que lo haga revisar su propio paso."
	),
	"I need a fact or formula": (
	"\n\nEl estudiante pide un dato o fórmula puntual: puedes darlo de forma breve, "
	"pero NUNCA lo apliques hasta la respuesta final por él; devuélvele la pregunta."
	),
	}

	# Example cards: (button label in English, reading passage, Spanish student message).
	EXAMPLES = [
	("Linear equation",
	"", "Ayúdame a resolver 3x + 6 = 15, pero no me des la respuesta."),
	("Adding fractions",
	"", "No entiendo cómo sumar 1/2 + 1/3. ¿Me ayudas a pensarlo?"),
	("Percentages",
	"", "¿Cómo calculo el 20% de 50? No me lo resuelvas, guíame paso a paso."),
	("Reading comprehension",
	"El reciclaje ayuda a reducir la basura en las ciudades. Si separamos los "
	"plásticos y el papel, menos residuos llegan a los ríos.",
	"¿Cuál es la idea principal de este texto? Guíame, no me des la respuesta."),
	]

	_load_error = None
	model = tokenizer = None
	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer
	print(f"Loading {MODEL_ID} on {DEVICE} ...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=DTYPE).to(DEVICE)
	model.eval()
	if DEVICE == "cpu":
	torch.set_num_threads(int(os.environ.get("OMP_NUM_THREADS", "4")))
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	print("Model loaded.")
	except Exception as e: # noqa: BLE001
	_load_error = str(e)
	print(f"[WARN] Could not load model: {e}")


	def _render(messages):
	try:
	return tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
	except TypeError:
	return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)


	def _generate(messages, max_new_tokens):
	inputs = tokenizer(_render(messages), return_tensors="pt").to(DEVICE)
	with torch.no_grad():
	out = model.generate(
	**inputs, max_new_tokens=max_new_tokens, do_sample=True,
	temperature=0.45, top_p=0.9, repetition_penalty=1.15,
	pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id)
	text = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
	text = _THINK.sub("", text) # hide the model's brief reasoning
	text = text.replace("<think>", "").replace("</think>", "")
	return text.strip()


	def respond(user_msg, history, reading_text, mode, hint_mode):
	history = history or []
	if not user_msg or not user_msg.strip():
	return history, ""
	if model is None:
	return history + [
	{"role": "user", "content": user_msg},
	{"role": "assistant", "content": f"Model failed to load ({_load_error})."},
	], ""
	system = SYSTEM_PROMPT + MODES.get(mode, "")
	if reading_text and reading_text.strip():
	system += f"\n\nTexto de lectura del estudiante:\n{reading_text.strip()}"
	if hint_mode:
	system += ("\n\nEl estudiante pidió una PISTA: da UNA sola pista corta que lo "
	"acerque, sin revelar la respuesta.")
	messages = [{"role": "system", "content": system}]
	messages += history[-2 * MAX_TURNS:]
	messages.append({"role": "user", "content": user_msg})
	reply = _generate(messages, max_new_tokens=240 if hint_mode else 340)
	return history + [
	{"role": "user", "content": user_msg},
	{"role": "assistant", "content": reply},
	], ""


	CSS = """
	.gradio-container {max-width: 1024px !important; margin: 0 auto;}
	#title {text-align:center; margin-bottom: 0;}
	#subtitle {text-align:center; color:#6b7280; margin-top:2px; font-size:0.95rem;}
	.example-btn button {font-weight:500;}
	footer {visibility:hidden}
	"""

	with gr.Blocks(title="EduCrate - Socratic Tutor", theme=gr.themes.Soft(
	primary_hue="slate", neutral_hue="slate"), css=CSS) as demo:
	gr.Markdown("# EduCrate", elem_id="title")
	gr.Markdown(
	"A Socratic tutor that never gives the answer — it guides with questions. "
	"Spanish-language, for Peruvian public-school students. Math reasoning and "
	"reading comprehension. Runs on CPU.", elem_id="subtitle")
	if _load_error:
	gr.Markdown(f"> Model `{MODEL_ID}` failed to load: {_load_error}")

	gr.Markdown("Try an example (the tutor replies in Spanish):")
	ex_buttons = []
	with gr.Row():
	for label, reading, message in EXAMPLES:
	b = gr.Button(label, size="sm", elem_classes="example-btn", scale=1)
	ex_buttons.append((b, reading, message))

	with gr.Row(equal_height=False):
	with gr.Column(scale=1):
	reading_text = gr.Textbox(
	label="Reading passage (optional)",
	placeholder="Paste a short text to practice reading comprehension.",
	lines=6)
	mode = gr.Radio(choices=list(MODES.keys()), value="Just chat",
	label="What do you need?")
	hint_mode = gr.Checkbox(label="Give me a single short hint")
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Conversation", height=460)
	user_input = gr.Textbox(
	label="Your question", lines=2,
	placeholder="Type your question in Spanish and press Send.")
	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("New topic", variant="secondary")

	inp = [user_input, chatbot, reading_text, mode, hint_mode]
	submit_btn.click(respond, inp, [chatbot, user_input])
	user_input.submit(respond, inp, [chatbot, user_input])
	clear_btn.click(lambda: ([], "", ""), outputs=[chatbot, user_input, reading_text])
	for b, reading, message in ex_buttons:
	b.click(lambda r=reading, m=message: (r, m, []),
	outputs=[reading_text, user_input, chatbot]).then(
	respond, inp, [chatbot, user_input])

	with gr.Accordion("About this project", open=False):
	gr.Markdown(
	"""
	The problem. Peru's public secondary schools face a deep learning crisis. In
	PISA 2022 (OECD), only 34% of Peruvian 15-year-olds reached basic proficiency
	in mathematics (66% below) and 50% in reading. Peru's national assessment
	(ECE / MINEDU, grade 8, 2022) found only about 12.7% Satisfactory in math,
	with public (state) schools far behind private ones.

	The approach. Most chatbots hand over the answer, which does not build reasoning.
	EduCrate never gives the final answer — it asks one guiding question at a time, detects
	the student's mistake, and offers progressive hints (the maieutic method).

	The model. Qwen3-0.6B fine-tuned (SFT, with a GRPO variant) on ~4,900 Spanish
	Socratic dialogues. It runs on CPU, so it works on low-resource laptops common in
	public schools — no GPU and no paid API required.

	Measured behavior (held-out mGSM-es). The fine-tune raised the answer-withholding
	rate from 84% (base) to 100%, turning verbose solutions into concise guiding questions.

	*It is a 0.6B model: guidance is sometimes imperfect. Built for the Build Small
	Hackathon (Backyard AI). Made with generative AI; validate pedagogical use with a teacher.*
	"""
	)

	if __name__ == "__main__":
	demo.launch()