Spaces:
Running
Running
| """ | |
| SET-UFPS Demo — Hugging Face Space | |
| Analyzes student teaching evaluation comments using 4 fine-tuned Spanish NLP models. | |
| """ | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, ElectraTokenizerFast | |
| # CONFIGURACIÓN | |
| HF_USER = "DevOB" | |
| MAX_LENGTH = 128 | |
| THRESHOLD = 0.5 | |
| MODELOS = { | |
| "RoBERTuito": { | |
| "repo_riesgo": f"{HF_USER}/set-ufps-robertuito-riesgo", | |
| "repo_categorias": f"{HF_USER}/set-ufps-robertuito-categorias", | |
| "tokenizer_type": "auto", | |
| "description": "RoBERTa pre-trained on informal Spanish (Twitter/social media)", | |
| }, | |
| "BETO": { | |
| "repo_riesgo": f"{HF_USER}/set-ufps-beto-riesgo", | |
| "repo_categorias": f"{HF_USER}/set-ufps-beto-categorias", | |
| "tokenizer_type": "auto", | |
| "description": "BERT pre-trained on formal Spanish (Wikipedia + news)", | |
| }, | |
| "DistilBETO": { | |
| "repo_riesgo": f"{HF_USER}/set-ufps-distilbeto-riesgo", | |
| "repo_categorias": f"{HF_USER}/set-ufps-distilbeto-categorias", | |
| "tokenizer_type": "auto", | |
| "description": "Distilled version of BETO — 40% fewer parameters, faster inference", | |
| }, | |
| "Electricidad": { | |
| "repo_riesgo": f"{HF_USER}/set-ufps-electricidad-riesgo", | |
| "repo_categorias": f"{HF_USER}/set-ufps-electricidad-categorias", | |
| "tokenizer_type": "electra", | |
| "description": "ELECTRA-based model pre-trained on Spanish corpus", | |
| }, | |
| } | |
| ETIQUETAS_RIESGO = ["BAJO", "MEDIO", "ALTO"] | |
| ETIQUETAS_CATEGORIAS = [ | |
| "DESARROLLO DEL CONOCIMIENTO", | |
| "DESEMPEÑO DOCENTE", | |
| "PROCESOS DE EVALUACIÓN", | |
| "INTEGRACIÓN INTERPERSONAL", | |
| "SIN CATEGORIA", | |
| ] | |
| RIESGO_EMOJI = {"BAJO": "🟢", "MEDIO": "🟡", "ALTO": "🔴"} | |
| # ========================================== | |
| # CACHÉ DE MODELOS | |
| # Se cargan una sola vez y se reutilizan. | |
| # ========================================== | |
| cache = {} | |
| def cargar_modelo(nombre: str): | |
| if nombre in cache: | |
| return cache[nombre] | |
| config = MODELOS[nombre] | |
| device = torch.device("cpu") | |
| if config["tokenizer_type"] == "electra": | |
| tok_r = ElectraTokenizerFast.from_pretrained(config["repo_riesgo"]) | |
| tok_c = ElectraTokenizerFast.from_pretrained(config["repo_categorias"]) | |
| else: | |
| tok_r = AutoTokenizer.from_pretrained(config["repo_riesgo"]) | |
| tok_c = AutoTokenizer.from_pretrained(config["repo_categorias"]) | |
| mod_r = AutoModelForSequenceClassification.from_pretrained(config["repo_riesgo"]).to(device).eval() | |
| mod_c = AutoModelForSequenceClassification.from_pretrained(config["repo_categorias"]).to(device).eval() | |
| cache[nombre] = (tok_r, mod_r, tok_c, mod_c, device) | |
| return cache[nombre] | |
| # ========================================== | |
| # PREDICCIÓN | |
| # ========================================== | |
| def predecir(comentario: str, modelo_nombre: str): | |
| if not comentario.strip(): | |
| return "⚠️ Please enter a comment.", "" | |
| tok_r, mod_r, tok_c, mod_c, device = cargar_modelo(modelo_nombre) | |
| # --- Riesgo --- | |
| inputs_r = tok_r( | |
| comentario, return_tensors="pt", | |
| padding="max_length", truncation=True, max_length=MAX_LENGTH | |
| ) | |
| inputs_r = {k: v.to(device) for k, v in inputs_r.items()} | |
| with torch.no_grad(): | |
| logits_r = mod_r(**inputs_r).logits | |
| probs_r = torch.softmax(logits_r, dim=-1).squeeze().cpu().numpy() | |
| clase_idx = int(np.argmax(probs_r)) | |
| nivel = ETIQUETAS_RIESGO[clase_idx] | |
| # --- Categorías --- | |
| inputs_c = tok_c( | |
| comentario, return_tensors="pt", | |
| padding="max_length", truncation=True, max_length=MAX_LENGTH | |
| ) | |
| inputs_c = {k: v.to(device) for k, v in inputs_c.items()} | |
| with torch.no_grad(): | |
| logits_c = mod_c(**inputs_c).logits | |
| probs_c = torch.sigmoid(logits_c).squeeze().cpu().numpy() | |
| detectadas = [ | |
| (ETIQUETAS_CATEGORIAS[i], float(probs_c[i])) | |
| for i in range(len(ETIQUETAS_CATEGORIAS)) | |
| if probs_c[i] >= THRESHOLD | |
| ] | |
| if not detectadas: | |
| idx_max = int(np.argmax(probs_c)) | |
| detectadas = [(ETIQUETAS_CATEGORIAS[idx_max], float(probs_c[idx_max]))] | |
| detectadas.sort(key=lambda x: x[1], reverse=True) | |
| # --- Formatear resultado de riesgo --- | |
| emoji = RIESGO_EMOJI[nivel] | |
| resultado_riesgo = f"## {emoji} Risk Level: **{nivel}**\n\n" | |
| resultado_riesgo += "| Class | Probability |\n|---|---|\n" | |
| for clase, prob in zip(ETIQUETAS_RIESGO, probs_r): | |
| marker = "**→**" if clase == nivel else "" | |
| resultado_riesgo += f"| {marker} {clase} {marker} | {prob:.2%} |\n" | |
| # --- Formatear resultado de categorías --- | |
| resultado_cats = "## 📚 Pedagogical Categories\n\n" | |
| resultado_cats += "| Category | Probability | Detected |\n|---|---|---|\n" | |
| for i, etiqueta in enumerate(ETIQUETAS_CATEGORIAS): | |
| prob = float(probs_c[i]) | |
| detected = "✅" if prob >= THRESHOLD else "—" | |
| bold = "**" if prob >= THRESHOLD else "" | |
| resultado_cats += f"| {bold}{etiqueta}{bold} | {prob:.2%} | {detected} |\n" | |
| return resultado_riesgo, resultado_cats | |
| # ========================================== | |
| # INTERFAZ GRADIO | |
| # ========================================== | |
| with gr.Blocks(title="SET-UFPS Demo") as demo: | |
| gr.Markdown(""" | |
| # 🎓 SET-UFPS — Teacher Evaluation AI Demo | |
| **Universidad Francisco de Paula Santander** | |
| Analyze student teaching evaluation comments using fine-tuned Spanish NLP models. | |
| Select a model, enter a comment and click **Analyze**. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| modelo_selector = gr.Radio( | |
| choices=list(MODELOS.keys()), | |
| value="RoBERTuito", | |
| label="Select Model", | |
| ) | |
| modelo_info = gr.Markdown( | |
| value=f"_{MODELOS['RoBERTuito']['description']}_" | |
| ) | |
| with gr.Column(scale=2): | |
| comentario_input = gr.Textbox( | |
| label="Student Comment (Spanish)", | |
| placeholder="Escribe aquí el comentario del estudiante...", | |
| lines=4, | |
| ) | |
| analizar_btn = gr.Button("🔍 Analyze", variant="primary") | |
| with gr.Row(): | |
| output_riesgo = gr.Markdown(label="Risk Level") | |
| output_cats = gr.Markdown(label="Pedagogical Categories") | |
| gr.Markdown(""" | |
| --- | |
| **Models trained on:** 9,457 manually labeled student comments in Spanish. | |
| **Tasks:** Risk level classification (BAJO / MEDIO / ALTO) + Pedagogical category multi-label classification. | |
| **Bachelor's Thesis** — Systems Engineering, UFPS 2025. | |
| """) | |
| # Actualizar descripción al cambiar modelo | |
| def actualizar_info(modelo): | |
| return f"_{MODELOS[modelo]['description']}_" | |
| modelo_selector.change(fn=actualizar_info, inputs=modelo_selector, outputs=modelo_info) | |
| # Ejecutar predicción | |
| analizar_btn.click( | |
| fn=predecir, | |
| inputs=[comentario_input, modelo_selector], | |
| outputs=[output_riesgo, output_cats], | |
| ) | |
| # Ejemplos predefinidos | |
| gr.Examples( | |
| examples=[ | |
| ["El profesor explica muy bien y domina los temas, aunque los exámenes son muy difíciles.", "RoBERTuito"], | |
| ["Siempre llega tarde y no respeta a los estudiantes.", "BETO"], | |
| ["Los criterios de evaluación no son claros y las notas tardan mucho.", "DistilBETO"], | |
| ["Buen profesor, explica con ejemplos reales y fomenta la participación.", "Electricidad"], | |
| ], | |
| inputs=[comentario_input, modelo_selector], | |
| ) | |
| demo.launch() |