set-ufps-demo / app.py
Orlando Beltran
feat: add demo app
c050d7c
"""
SET-UFPS Demo — Hugging Face Space
Analyzes student teaching evaluation comments using 4 fine-tuned Spanish NLP models.
"""
import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, ElectraTokenizerFast
# CONFIGURACIÓN
HF_USER = "DevOB"
MAX_LENGTH = 128
THRESHOLD = 0.5
MODELOS = {
"RoBERTuito": {
"repo_riesgo": f"{HF_USER}/set-ufps-robertuito-riesgo",
"repo_categorias": f"{HF_USER}/set-ufps-robertuito-categorias",
"tokenizer_type": "auto",
"description": "RoBERTa pre-trained on informal Spanish (Twitter/social media)",
},
"BETO": {
"repo_riesgo": f"{HF_USER}/set-ufps-beto-riesgo",
"repo_categorias": f"{HF_USER}/set-ufps-beto-categorias",
"tokenizer_type": "auto",
"description": "BERT pre-trained on formal Spanish (Wikipedia + news)",
},
"DistilBETO": {
"repo_riesgo": f"{HF_USER}/set-ufps-distilbeto-riesgo",
"repo_categorias": f"{HF_USER}/set-ufps-distilbeto-categorias",
"tokenizer_type": "auto",
"description": "Distilled version of BETO — 40% fewer parameters, faster inference",
},
"Electricidad": {
"repo_riesgo": f"{HF_USER}/set-ufps-electricidad-riesgo",
"repo_categorias": f"{HF_USER}/set-ufps-electricidad-categorias",
"tokenizer_type": "electra",
"description": "ELECTRA-based model pre-trained on Spanish corpus",
},
}
ETIQUETAS_RIESGO = ["BAJO", "MEDIO", "ALTO"]
ETIQUETAS_CATEGORIAS = [
"DESARROLLO DEL CONOCIMIENTO",
"DESEMPEÑO DOCENTE",
"PROCESOS DE EVALUACIÓN",
"INTEGRACIÓN INTERPERSONAL",
"SIN CATEGORIA",
]
RIESGO_EMOJI = {"BAJO": "🟢", "MEDIO": "🟡", "ALTO": "🔴"}
# ==========================================
# CACHÉ DE MODELOS
# Se cargan una sola vez y se reutilizan.
# ==========================================
cache = {}
def cargar_modelo(nombre: str):
if nombre in cache:
return cache[nombre]
config = MODELOS[nombre]
device = torch.device("cpu")
if config["tokenizer_type"] == "electra":
tok_r = ElectraTokenizerFast.from_pretrained(config["repo_riesgo"])
tok_c = ElectraTokenizerFast.from_pretrained(config["repo_categorias"])
else:
tok_r = AutoTokenizer.from_pretrained(config["repo_riesgo"])
tok_c = AutoTokenizer.from_pretrained(config["repo_categorias"])
mod_r = AutoModelForSequenceClassification.from_pretrained(config["repo_riesgo"]).to(device).eval()
mod_c = AutoModelForSequenceClassification.from_pretrained(config["repo_categorias"]).to(device).eval()
cache[nombre] = (tok_r, mod_r, tok_c, mod_c, device)
return cache[nombre]
# ==========================================
# PREDICCIÓN
# ==========================================
def predecir(comentario: str, modelo_nombre: str):
if not comentario.strip():
return "⚠️ Please enter a comment.", ""
tok_r, mod_r, tok_c, mod_c, device = cargar_modelo(modelo_nombre)
# --- Riesgo ---
inputs_r = tok_r(
comentario, return_tensors="pt",
padding="max_length", truncation=True, max_length=MAX_LENGTH
)
inputs_r = {k: v.to(device) for k, v in inputs_r.items()}
with torch.no_grad():
logits_r = mod_r(**inputs_r).logits
probs_r = torch.softmax(logits_r, dim=-1).squeeze().cpu().numpy()
clase_idx = int(np.argmax(probs_r))
nivel = ETIQUETAS_RIESGO[clase_idx]
# --- Categorías ---
inputs_c = tok_c(
comentario, return_tensors="pt",
padding="max_length", truncation=True, max_length=MAX_LENGTH
)
inputs_c = {k: v.to(device) for k, v in inputs_c.items()}
with torch.no_grad():
logits_c = mod_c(**inputs_c).logits
probs_c = torch.sigmoid(logits_c).squeeze().cpu().numpy()
detectadas = [
(ETIQUETAS_CATEGORIAS[i], float(probs_c[i]))
for i in range(len(ETIQUETAS_CATEGORIAS))
if probs_c[i] >= THRESHOLD
]
if not detectadas:
idx_max = int(np.argmax(probs_c))
detectadas = [(ETIQUETAS_CATEGORIAS[idx_max], float(probs_c[idx_max]))]
detectadas.sort(key=lambda x: x[1], reverse=True)
# --- Formatear resultado de riesgo ---
emoji = RIESGO_EMOJI[nivel]
resultado_riesgo = f"## {emoji} Risk Level: **{nivel}**\n\n"
resultado_riesgo += "| Class | Probability |\n|---|---|\n"
for clase, prob in zip(ETIQUETAS_RIESGO, probs_r):
marker = "**→**" if clase == nivel else ""
resultado_riesgo += f"| {marker} {clase} {marker} | {prob:.2%} |\n"
# --- Formatear resultado de categorías ---
resultado_cats = "## 📚 Pedagogical Categories\n\n"
resultado_cats += "| Category | Probability | Detected |\n|---|---|---|\n"
for i, etiqueta in enumerate(ETIQUETAS_CATEGORIAS):
prob = float(probs_c[i])
detected = "✅" if prob >= THRESHOLD else "—"
bold = "**" if prob >= THRESHOLD else ""
resultado_cats += f"| {bold}{etiqueta}{bold} | {prob:.2%} | {detected} |\n"
return resultado_riesgo, resultado_cats
# ==========================================
# INTERFAZ GRADIO
# ==========================================
with gr.Blocks(title="SET-UFPS Demo") as demo:
gr.Markdown("""
# 🎓 SET-UFPS — Teacher Evaluation AI Demo
**Universidad Francisco de Paula Santander**
Analyze student teaching evaluation comments using fine-tuned Spanish NLP models.
Select a model, enter a comment and click **Analyze**.
""")
with gr.Row():
with gr.Column(scale=1):
modelo_selector = gr.Radio(
choices=list(MODELOS.keys()),
value="RoBERTuito",
label="Select Model",
)
modelo_info = gr.Markdown(
value=f"_{MODELOS['RoBERTuito']['description']}_"
)
with gr.Column(scale=2):
comentario_input = gr.Textbox(
label="Student Comment (Spanish)",
placeholder="Escribe aquí el comentario del estudiante...",
lines=4,
)
analizar_btn = gr.Button("🔍 Analyze", variant="primary")
with gr.Row():
output_riesgo = gr.Markdown(label="Risk Level")
output_cats = gr.Markdown(label="Pedagogical Categories")
gr.Markdown("""
---
**Models trained on:** 9,457 manually labeled student comments in Spanish.
**Tasks:** Risk level classification (BAJO / MEDIO / ALTO) + Pedagogical category multi-label classification.
**Bachelor's Thesis** — Systems Engineering, UFPS 2025.
""")
# Actualizar descripción al cambiar modelo
def actualizar_info(modelo):
return f"_{MODELOS[modelo]['description']}_"
modelo_selector.change(fn=actualizar_info, inputs=modelo_selector, outputs=modelo_info)
# Ejecutar predicción
analizar_btn.click(
fn=predecir,
inputs=[comentario_input, modelo_selector],
outputs=[output_riesgo, output_cats],
)
# Ejemplos predefinidos
gr.Examples(
examples=[
["El profesor explica muy bien y domina los temas, aunque los exámenes son muy difíciles.", "RoBERTuito"],
["Siempre llega tarde y no respeta a los estudiantes.", "BETO"],
["Los criterios de evaluación no son claros y las notas tardan mucho.", "DistilBETO"],
["Buen profesor, explica con ejemplos reales y fomenta la participación.", "Electricidad"],
],
inputs=[comentario_input, modelo_selector],
)
demo.launch()