Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +40 -37
src/streamlit_app.py
CHANGED
|
@@ -52,8 +52,11 @@ def _init_state() -> None:
|
|
| 52 |
"modelo_gen_validado": False,
|
| 53 |
"modelo_gen_confirmado": "",
|
| 54 |
"eval_running": False,
|
|
|
|
| 55 |
"eval_success": False,
|
| 56 |
"last_result": None,
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
for key, value in defaults.items():
|
| 59 |
if key not in st.session_state:
|
|
@@ -83,7 +86,8 @@ def _liberar_memoria() -> None:
|
|
| 83 |
torch.cuda.empty_cache()
|
| 84 |
|
| 85 |
|
| 86 |
-
def _render_reloj_tiempo_real(inicio_epoch: int) -> None:
|
|
|
|
| 87 |
components.html(
|
| 88 |
f"""
|
| 89 |
<div style='padding:0.35rem 0; font-size:0.95rem; color:#374151;'>
|
|
@@ -207,6 +211,9 @@ st.caption("Despliegue p煤blico en Hugging Face Spaces")
|
|
| 207 |
|
| 208 |
_init_state()
|
| 209 |
|
|
|
|
|
|
|
|
|
|
| 210 |
modo = st.radio(
|
| 211 |
"Selecciona el modo",
|
| 212 |
options=[ModoEvaluacion.POR_DEFECTO.value, ModoEvaluacion.PERSONALIZADA.value],
|
|
@@ -214,27 +221,6 @@ modo = st.radio(
|
|
| 214 |
disabled=st.session_state["eval_running"],
|
| 215 |
)
|
| 216 |
|
| 217 |
-
if st.session_state["eval_running"] and not st.session_state.get("eval_success"):
|
| 218 |
-
st.warning("Hay una evaluaci贸n en curso o interrumpida por una interacci贸n.")
|
| 219 |
-
st.caption("No cambies par谩metros ahora. Puedes cancelarla de forma segura.")
|
| 220 |
-
confirmar_cancelacion = st.checkbox(
|
| 221 |
-
"Confirmo que quiero cancelar la evaluaci贸n actual",
|
| 222 |
-
key="confirmar_cancelacion_eval",
|
| 223 |
-
value=False,
|
| 224 |
-
)
|
| 225 |
-
if st.button("Cancelar evaluaci贸n", key="cancelar_eval_btn"):
|
| 226 |
-
if confirmar_cancelacion:
|
| 227 |
-
st.session_state["eval_running"] = False
|
| 228 |
-
st.session_state["eval_success"] = False
|
| 229 |
-
st.session_state["last_result"] = None
|
| 230 |
-
st.session_state["modelo_eval_validado"] = False
|
| 231 |
-
st.session_state["modelo_eval_confirmado"] = ""
|
| 232 |
-
_liberar_memoria()
|
| 233 |
-
st.rerun()
|
| 234 |
-
else:
|
| 235 |
-
st.error("Debes confirmar la cancelaci贸n antes de continuar.")
|
| 236 |
-
st.stop()
|
| 237 |
-
|
| 238 |
# Unico parametro editable en UI para ambos modos.
|
| 239 |
timeout_segundos = st.slider(
|
| 240 |
"Timeout por llamada (segundos)",
|
|
@@ -367,6 +353,7 @@ selected_eval_types = st.multiselect(
|
|
| 367 |
options=TIPOS_EVALUACION_DISPONIBLES,
|
| 368 |
default=["preguntas_cerradas_esperadas"],
|
| 369 |
format_func=lambda x: LABELS_TIPOS_EVALUACION.get(x, x),
|
|
|
|
| 370 |
disabled=st.session_state["eval_running"],
|
| 371 |
)
|
| 372 |
|
|
@@ -389,16 +376,29 @@ if tipos_no_disponibles:
|
|
| 389 |
|
| 390 |
if st.button("Comenzar evaluaci贸n", key="comenzar_eval", disabled=st.session_state["eval_running"]):
|
| 391 |
st.session_state["eval_running"] = True
|
|
|
|
| 392 |
st.session_state["eval_success"] = False
|
| 393 |
st.session_state["last_result"] = None
|
| 394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
_liberar_memoria()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
request = JobRequest(
|
| 398 |
modo_evaluacion=ModoEvaluacion.POR_DEFECTO.value,
|
| 399 |
tipo_evaluacion=TipoEvaluacion.PREGUNTAS_CERRADAS_ESPERADAS.value,
|
| 400 |
-
modelo_hf=
|
| 401 |
-
timeout_segundos=
|
| 402 |
)
|
| 403 |
|
| 404 |
temp_dir = Path(tempfile.mkdtemp(prefix="equitia_space_"))
|
|
@@ -408,33 +408,33 @@ if st.button("Comenzar evaluaci贸n", key="comenzar_eval", disabled=st.session_st
|
|
| 408 |
|
| 409 |
progress = st.progress(0.0)
|
| 410 |
progress_label = st.empty()
|
| 411 |
-
|
| 412 |
-
_render_reloj_tiempo_real(start_epoch)
|
| 413 |
|
| 414 |
def on_progress(done: int, total: int, current_file: str) -> None:
|
| 415 |
ratio = (done / total) if total else 0.0
|
| 416 |
elapsed = _formatear_duracion(time.perf_counter() - start_ts)
|
| 417 |
progress.progress(ratio)
|
| 418 |
progress_label.info(
|
| 419 |
-
f"Progreso: {done}/{total} prompts evaluados ({ratio * 100:.1f}%).
|
| 420 |
)
|
| 421 |
|
| 422 |
def invocar_prompt(prompt: str, instruccion_sistema: str | None = None) -> str:
|
| 423 |
return invocar_modelo_transformers(
|
| 424 |
-
|
| 425 |
prompt,
|
| 426 |
instruccion_sistema=instruccion_sistema,
|
| 427 |
)
|
| 428 |
|
| 429 |
-
try:
|
| 430 |
with st.spinner("Obteniendo modelo a evaluar..."):
|
| 431 |
-
cargar_modelo_transformers(
|
| 432 |
|
| 433 |
with st.spinner("Ejecutando proceso de evaluaci贸n..."):
|
| 434 |
result = ejecutar_job(
|
| 435 |
request,
|
| 436 |
job_dir,
|
| 437 |
-
selected_eval_types=
|
| 438 |
invocar_modelo_fn=invocar_prompt,
|
| 439 |
progress_callback=on_progress,
|
| 440 |
)
|
|
@@ -442,7 +442,7 @@ if st.button("Comenzar evaluaci贸n", key="comenzar_eval", disabled=st.session_st
|
|
| 442 |
progress.progress(1.0)
|
| 443 |
elapsed_total = _formatear_duracion(time.perf_counter() - start_ts)
|
| 444 |
progress_label.success(f"Evaluaci贸n completada. Tiempo total: {elapsed_total}")
|
| 445 |
-
|
| 446 |
|
| 447 |
resumen_path = result.job_dir / "resumen.json"
|
| 448 |
resultados_csv = result.graficos_dir / "resultados.csv"
|
|
@@ -453,7 +453,7 @@ if st.button("Comenzar evaluaci贸n", key="comenzar_eval", disabled=st.session_st
|
|
| 453 |
resumen = json.load(f)
|
| 454 |
|
| 455 |
zip_id = int(time.time())
|
| 456 |
-
modelo_slug = _slug_modelo(
|
| 457 |
zip_filename = f"resultados_equitia_{modelo_slug}_{zip_id}.zip"
|
| 458 |
zip_base = temp_dir / f"resultados_equitia_{zip_id}"
|
| 459 |
zip_path = Path(shutil.make_archive(str(zip_base), "zip", str(result.job_dir)))
|
|
@@ -481,17 +481,20 @@ if st.button("Comenzar evaluaci贸n", key="comenzar_eval", disabled=st.session_st
|
|
| 481 |
"zip_bytes": zip_path.read_bytes(),
|
| 482 |
"zip_filename": zip_filename,
|
| 483 |
"elapsed_total": elapsed_total,
|
| 484 |
-
"modelo":
|
| 485 |
}
|
| 486 |
st.session_state["eval_success"] = True
|
| 487 |
|
| 488 |
except Exception as exc:
|
| 489 |
-
st.
|
| 490 |
-
|
| 491 |
finally:
|
| 492 |
st.session_state["eval_running"] = False
|
|
|
|
|
|
|
| 493 |
_liberar_memoria()
|
| 494 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
| 495 |
|
| 496 |
if st.session_state.get("eval_success") and st.session_state.get("last_result"):
|
| 497 |
resultado = st.session_state["last_result"]
|
|
|
|
| 52 |
"modelo_gen_validado": False,
|
| 53 |
"modelo_gen_confirmado": "",
|
| 54 |
"eval_running": False,
|
| 55 |
+
"eval_requested": False,
|
| 56 |
"eval_success": False,
|
| 57 |
"last_result": None,
|
| 58 |
+
"pending_eval": None,
|
| 59 |
+
"eval_error": None,
|
| 60 |
}
|
| 61 |
for key, value in defaults.items():
|
| 62 |
if key not in st.session_state:
|
|
|
|
| 86 |
torch.cuda.empty_cache()
|
| 87 |
|
| 88 |
|
| 89 |
+
def _render_reloj_tiempo_real(inicio_epoch: int, placeholder) -> None:
|
| 90 |
+
with placeholder:
|
| 91 |
components.html(
|
| 92 |
f"""
|
| 93 |
<div style='padding:0.35rem 0; font-size:0.95rem; color:#374151;'>
|
|
|
|
| 211 |
|
| 212 |
_init_state()
|
| 213 |
|
| 214 |
+
if st.session_state.get("eval_error"):
|
| 215 |
+
st.error(st.session_state["eval_error"])
|
| 216 |
+
|
| 217 |
modo = st.radio(
|
| 218 |
"Selecciona el modo",
|
| 219 |
options=[ModoEvaluacion.POR_DEFECTO.value, ModoEvaluacion.PERSONALIZADA.value],
|
|
|
|
| 221 |
disabled=st.session_state["eval_running"],
|
| 222 |
)
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
# Unico parametro editable en UI para ambos modos.
|
| 225 |
timeout_segundos = st.slider(
|
| 226 |
"Timeout por llamada (segundos)",
|
|
|
|
| 353 |
options=TIPOS_EVALUACION_DISPONIBLES,
|
| 354 |
default=["preguntas_cerradas_esperadas"],
|
| 355 |
format_func=lambda x: LABELS_TIPOS_EVALUACION.get(x, x),
|
| 356 |
+
key="selected_eval_types",
|
| 357 |
disabled=st.session_state["eval_running"],
|
| 358 |
)
|
| 359 |
|
|
|
|
| 376 |
|
| 377 |
if st.button("Comenzar evaluaci贸n", key="comenzar_eval", disabled=st.session_state["eval_running"]):
|
| 378 |
st.session_state["eval_running"] = True
|
| 379 |
+
st.session_state["eval_requested"] = True
|
| 380 |
st.session_state["eval_success"] = False
|
| 381 |
st.session_state["last_result"] = None
|
| 382 |
+
st.session_state["eval_error"] = None
|
| 383 |
+
st.session_state["pending_eval"] = {
|
| 384 |
+
"modelo_hf": st.session_state["modelo_eval_confirmado"],
|
| 385 |
+
"timeout_segundos": timeout_segundos,
|
| 386 |
+
"selected_eval_types": selected_eval_types,
|
| 387 |
+
}
|
| 388 |
_liberar_memoria()
|
| 389 |
+
st.rerun()
|
| 390 |
+
|
| 391 |
+
if st.session_state.get("eval_running") and st.session_state.get("eval_requested"):
|
| 392 |
+
pending = st.session_state.get("pending_eval") or {}
|
| 393 |
+
modelo_hf = str(pending.get("modelo_hf", st.session_state.get("modelo_eval_confirmado", ""))).strip()
|
| 394 |
+
timeout_pendiente = int(pending.get("timeout_segundos", timeout_segundos))
|
| 395 |
+
tipos_pendientes = pending.get("selected_eval_types") or ["preguntas_cerradas_esperadas"]
|
| 396 |
|
| 397 |
request = JobRequest(
|
| 398 |
modo_evaluacion=ModoEvaluacion.POR_DEFECTO.value,
|
| 399 |
tipo_evaluacion=TipoEvaluacion.PREGUNTAS_CERRADAS_ESPERADAS.value,
|
| 400 |
+
modelo_hf=modelo_hf,
|
| 401 |
+
timeout_segundos=timeout_pendiente,
|
| 402 |
)
|
| 403 |
|
| 404 |
temp_dir = Path(tempfile.mkdtemp(prefix="equitia_space_"))
|
|
|
|
| 408 |
|
| 409 |
progress = st.progress(0.0)
|
| 410 |
progress_label = st.empty()
|
| 411 |
+
timer_placeholder = st.empty()
|
| 412 |
+
_render_reloj_tiempo_real(start_epoch, timer_placeholder)
|
| 413 |
|
| 414 |
def on_progress(done: int, total: int, current_file: str) -> None:
|
| 415 |
ratio = (done / total) if total else 0.0
|
| 416 |
elapsed = _formatear_duracion(time.perf_counter() - start_ts)
|
| 417 |
progress.progress(ratio)
|
| 418 |
progress_label.info(
|
| 419 |
+
f"Progreso: {done}/{total} prompts evaluados ({ratio * 100:.1f}%). Tiempo 煤ltimo prompt evaluado: {elapsed}. Archivo actual: {current_file}"
|
| 420 |
)
|
| 421 |
|
| 422 |
def invocar_prompt(prompt: str, instruccion_sistema: str | None = None) -> str:
|
| 423 |
return invocar_modelo_transformers(
|
| 424 |
+
modelo_hf,
|
| 425 |
prompt,
|
| 426 |
instruccion_sistema=instruccion_sistema,
|
| 427 |
)
|
| 428 |
|
| 429 |
+
try:
|
| 430 |
with st.spinner("Obteniendo modelo a evaluar..."):
|
| 431 |
+
cargar_modelo_transformers(modelo_hf)
|
| 432 |
|
| 433 |
with st.spinner("Ejecutando proceso de evaluaci贸n..."):
|
| 434 |
result = ejecutar_job(
|
| 435 |
request,
|
| 436 |
job_dir,
|
| 437 |
+
selected_eval_types=tipos_pendientes,
|
| 438 |
invocar_modelo_fn=invocar_prompt,
|
| 439 |
progress_callback=on_progress,
|
| 440 |
)
|
|
|
|
| 442 |
progress.progress(1.0)
|
| 443 |
elapsed_total = _formatear_duracion(time.perf_counter() - start_ts)
|
| 444 |
progress_label.success(f"Evaluaci贸n completada. Tiempo total: {elapsed_total}")
|
| 445 |
+
timer_placeholder.empty()
|
| 446 |
|
| 447 |
resumen_path = result.job_dir / "resumen.json"
|
| 448 |
resultados_csv = result.graficos_dir / "resultados.csv"
|
|
|
|
| 453 |
resumen = json.load(f)
|
| 454 |
|
| 455 |
zip_id = int(time.time())
|
| 456 |
+
modelo_slug = _slug_modelo(modelo_hf)
|
| 457 |
zip_filename = f"resultados_equitia_{modelo_slug}_{zip_id}.zip"
|
| 458 |
zip_base = temp_dir / f"resultados_equitia_{zip_id}"
|
| 459 |
zip_path = Path(shutil.make_archive(str(zip_base), "zip", str(result.job_dir)))
|
|
|
|
| 481 |
"zip_bytes": zip_path.read_bytes(),
|
| 482 |
"zip_filename": zip_filename,
|
| 483 |
"elapsed_total": elapsed_total,
|
| 484 |
+
"modelo": modelo_hf,
|
| 485 |
}
|
| 486 |
st.session_state["eval_success"] = True
|
| 487 |
|
| 488 |
except Exception as exc:
|
| 489 |
+
st.session_state["eval_error"] = f"Error durante la evaluaci贸n: {exc}"
|
| 490 |
+
timer_placeholder.empty()
|
| 491 |
finally:
|
| 492 |
st.session_state["eval_running"] = False
|
| 493 |
+
st.session_state["eval_requested"] = False
|
| 494 |
+
st.session_state["pending_eval"] = None
|
| 495 |
_liberar_memoria()
|
| 496 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 497 |
+
st.rerun()
|
| 498 |
|
| 499 |
if st.session_state.get("eval_success") and st.session_state.get("last_result"):
|
| 500 |
resultado = st.session_state["last_result"]
|