Upload app.py
Browse files
app.py
CHANGED
|
@@ -385,7 +385,6 @@ if page == "Processar vídeo nou":
|
|
| 385 |
detect_button_disabled = st.session_state.video_uploaded is None
|
| 386 |
if st.button("Detectar Personatges", disabled=detect_button_disabled):
|
| 387 |
log(f"\n--- DETECCIÓN DE PERSONAJES INICIADA ---")
|
| 388 |
-
log(f"Estado del vídeo: {st.session_state.video_uploaded}")
|
| 389 |
|
| 390 |
with st.spinner("Detectant personatges..."):
|
| 391 |
# Llamar al endpoint del engine para crear el casting inicial
|
|
@@ -432,8 +431,77 @@ if page == "Processar vídeo nou":
|
|
| 432 |
st.warning("**Possible causes:**\n- El Space 'engine' no està accessible públicament\n- El token d'API no és correcte\n- CORS bloquejat")
|
| 433 |
elif "Connection" in error_msg or "timeout" in error_msg:
|
| 434 |
st.warning(f"**No s'ha pogut connectar** amb el servei engine a: `{BACKEND_BASE_URL}`")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
else:
|
| 436 |
-
|
|
|
|
| 437 |
st.success("✅ Casting inicial creat. S'han generat subcarpetes a 'temp/<uploaded-video>/*'.")
|
| 438 |
except Exception as e:
|
| 439 |
error_msg = f"❌ Error inesperat: {e}"
|
|
@@ -453,33 +521,194 @@ if page == "Processar vídeo nou":
|
|
| 453 |
# ]
|
| 454 |
# st.session_state.characters_saved = False
|
| 455 |
|
| 456 |
-
# --- 3. Formularios de personajes ---
|
| 457 |
if st.session_state.characters_detected:
|
| 458 |
-
st.
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
with col1:
|
| 463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
|
|
|
| 465 |
with col2:
|
| 466 |
-
st.
|
| 467 |
-
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
# Aquí iría la lógica para
|
| 477 |
-
st.session_state.characters_saved = True
|
| 478 |
-
st.success("Personatges desats correctament.")
|
| 479 |
-
|
| 480 |
-
with col2:
|
| 481 |
-
if st.session_state.characters_saved:
|
| 482 |
-
st.button("Generar Audiodescripció")
|
| 483 |
|
| 484 |
elif page == "Analitzar audio-descripcions":
|
| 485 |
require_login()
|
|
|
|
| 385 |
detect_button_disabled = st.session_state.video_uploaded is None
|
| 386 |
if st.button("Detectar Personatges", disabled=detect_button_disabled):
|
| 387 |
log(f"\n--- DETECCIÓN DE PERSONAJES INICIADA ---")
|
|
|
|
| 388 |
|
| 389 |
with st.spinner("Detectant personatges..."):
|
| 390 |
# Llamar al endpoint del engine para crear el casting inicial
|
|
|
|
| 431 |
st.warning("**Possible causes:**\n- El Space 'engine' no està accessible públicament\n- El token d'API no és correcte\n- CORS bloquejat")
|
| 432 |
elif "Connection" in error_msg or "timeout" in error_msg:
|
| 433 |
st.warning(f"**No s'ha pogut connectar** amb el servei engine a: `{BACKEND_BASE_URL}`")
|
| 434 |
+
elif isinstance(resp, dict) and resp.get("job_id"):
|
| 435 |
+
# El engine devolvió un job_id - hacer polling
|
| 436 |
+
job_id = resp["job_id"]
|
| 437 |
+
log(f"Job creado con ID: {job_id}")
|
| 438 |
+
log(f"Iniciando polling del estado...")
|
| 439 |
+
|
| 440 |
+
# Placeholder para mensajes
|
| 441 |
+
message_placeholder = st.empty()
|
| 442 |
+
|
| 443 |
+
# Mostrar spinner durante el procesamiento
|
| 444 |
+
import time
|
| 445 |
+
max_attempts = 60 # 5 minutos máximo (5 segundos * 60)
|
| 446 |
+
attempt = 0
|
| 447 |
+
|
| 448 |
+
with message_placeholder:
|
| 449 |
+
with st.spinner("⏳ Detectant personatges... Això pot trigar uns minuts."):
|
| 450 |
+
while attempt < max_attempts:
|
| 451 |
+
job_status = api.get_job(job_id)
|
| 452 |
+
status = job_status.get("status", "unknown")
|
| 453 |
+
|
| 454 |
+
log(f"Polling attempt {attempt + 1}: status = {status}")
|
| 455 |
+
|
| 456 |
+
if status == "done":
|
| 457 |
+
log(f"✓ Job completado exitosamente")
|
| 458 |
+
|
| 459 |
+
# DEBUG: Ver estructura completa de la respuesta
|
| 460 |
+
log(f"DEBUG - job_status completo: {job_status}")
|
| 461 |
+
|
| 462 |
+
# Guardar los resultados si los hay
|
| 463 |
+
if "results" in job_status:
|
| 464 |
+
log(f"DEBUG - results: {job_status['results']}")
|
| 465 |
+
characters = job_status["results"].get("characters", [])
|
| 466 |
+
log(f"DEBUG - characters extraídos: {characters}")
|
| 467 |
+
|
| 468 |
+
if characters:
|
| 469 |
+
st.session_state.characters_detected = characters
|
| 470 |
+
num_chars = len(st.session_state.characters_detected)
|
| 471 |
+
log(f"Personajes detectados: {num_chars}")
|
| 472 |
+
|
| 473 |
+
# Mensaje en catalán
|
| 474 |
+
if num_chars == 1:
|
| 475 |
+
st.success(f"✅ S'ha detectat {num_chars} personatge possible.")
|
| 476 |
+
else:
|
| 477 |
+
st.success(f"✅ S'han detectat {num_chars} personatges possibles.")
|
| 478 |
+
else:
|
| 479 |
+
log(f"WARNING - No se encontraron personajes en results")
|
| 480 |
+
st.warning("⚠️ No s'han detectat personatges al vídeo.")
|
| 481 |
+
else:
|
| 482 |
+
log(f"WARNING - No hay 'results' en job_status")
|
| 483 |
+
st.warning("⚠️ No s'han rebut resultats del servidor.")
|
| 484 |
+
break
|
| 485 |
+
elif status == "failed":
|
| 486 |
+
error_msg = job_status.get("error", "Unknown error")
|
| 487 |
+
log(f"✗ Job falló: {error_msg}")
|
| 488 |
+
st.error(f"❌ Error en el processament: {error_msg}")
|
| 489 |
+
break
|
| 490 |
+
elif status in ["queued", "processing"]:
|
| 491 |
+
# Solo esperar, el spinner ya muestra que está procesando
|
| 492 |
+
time.sleep(5) # Esperar 5 segundos antes del siguiente polling
|
| 493 |
+
attempt += 1
|
| 494 |
+
else:
|
| 495 |
+
log(f"Estado desconocido: {status}")
|
| 496 |
+
time.sleep(5)
|
| 497 |
+
attempt += 1
|
| 498 |
+
|
| 499 |
+
if attempt >= max_attempts:
|
| 500 |
+
log(f"✗ Timeout: el job no se completó en el tiempo esperado")
|
| 501 |
+
st.warning("⚠️ El processament està trigant més del previst. El job continua executant-se al servidor.")
|
| 502 |
else:
|
| 503 |
+
# Respuesta sin job_id ni error - asumimos éxito inmediato (modo antiguo)
|
| 504 |
+
log(f"✓ Respuesta recibida sin job_id (modo síncrono)")
|
| 505 |
st.success("✅ Casting inicial creat. S'han generat subcarpetes a 'temp/<uploaded-video>/*'.")
|
| 506 |
except Exception as e:
|
| 507 |
error_msg = f"❌ Error inesperat: {e}"
|
|
|
|
| 521 |
# ]
|
| 522 |
# st.session_state.characters_saved = False
|
| 523 |
|
| 524 |
+
# --- 3. Formularios de personajes (apilados) ---
|
| 525 |
if st.session_state.characters_detected:
|
| 526 |
+
st.markdown("---")
|
| 527 |
+
st.subheader(f"📋 Personatges detectats: {len(st.session_state.characters_detected)}")
|
| 528 |
+
st.info("Edita cada personatge i confirma el fine-tuning manual al final. Els personatges amb el mateix nom es fusionaran.")
|
| 529 |
+
|
| 530 |
+
# Inicializar datos de personajes si no existe
|
| 531 |
+
if 'character_data' not in st.session_state:
|
| 532 |
+
st.session_state.character_data = {}
|
| 533 |
+
# Inicializar con datos por defecto
|
| 534 |
+
for char in st.session_state.characters_detected:
|
| 535 |
+
char_id = char['id']
|
| 536 |
+
st.session_state.character_data[char_id] = {
|
| 537 |
+
'name': char.get('name', ''),
|
| 538 |
+
'description': '',
|
| 539 |
+
'selected_faces': list(range(char.get('num_faces', 0))), # Todas seleccionadas por defecto
|
| 540 |
+
'selected_voices': [], # Por ahora vacío
|
| 541 |
+
'current_face_idx': 0,
|
| 542 |
+
'current_voice_idx': 0
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
# Mostrar formulario para cada personaje
|
| 546 |
+
for idx, char in enumerate(st.session_state.characters_detected):
|
| 547 |
+
char_id = char['id']
|
| 548 |
+
char_data = st.session_state.character_data[char_id]
|
| 549 |
+
|
| 550 |
+
# Contenedor con borde para cada personaje
|
| 551 |
+
with st.container():
|
| 552 |
+
st.markdown(f"### Personatge {idx + 1}: {char_data['name'] or char_id}")
|
| 553 |
+
|
| 554 |
+
col1, col2 = st.columns([1, 1])
|
| 555 |
+
|
| 556 |
+
# --- Columna 1: Visualizadores ---
|
| 557 |
with col1:
|
| 558 |
+
# Visualizador de caras
|
| 559 |
+
st.markdown("**🖼️ Mostres de cara:**")
|
| 560 |
+
|
| 561 |
+
num_faces = char.get('num_faces', 0)
|
| 562 |
+
if num_faces > 0 and char_data['selected_faces']:
|
| 563 |
+
current_face_idx = char_data['current_face_idx']
|
| 564 |
+
selected_faces = char_data['selected_faces']
|
| 565 |
+
|
| 566 |
+
# Navegación de caras
|
| 567 |
+
col_nav1, col_nav2, col_nav3, col_nav4 = st.columns([1, 2, 1, 1])
|
| 568 |
+
|
| 569 |
+
with col_nav1:
|
| 570 |
+
if st.button("◀", key=f"face_prev_{char_id}", disabled=(current_face_idx == 0)):
|
| 571 |
+
st.session_state.character_data[char_id]['current_face_idx'] = max(0, current_face_idx - 1)
|
| 572 |
+
st.rerun()
|
| 573 |
+
|
| 574 |
+
with col_nav2:
|
| 575 |
+
st.caption(f"Cara {current_face_idx + 1} de {len(selected_faces)}")
|
| 576 |
+
|
| 577 |
+
with col_nav3:
|
| 578 |
+
if st.button("▶", key=f"face_next_{char_id}", disabled=(current_face_idx >= len(selected_faces) - 1)):
|
| 579 |
+
st.session_state.character_data[char_id]['current_face_idx'] = min(len(selected_faces) - 1, current_face_idx + 1)
|
| 580 |
+
st.rerun()
|
| 581 |
+
|
| 582 |
+
with col_nav4:
|
| 583 |
+
if st.button("❌", key=f"face_delete_{char_id}", disabled=(len(selected_faces) <= 1)):
|
| 584 |
+
# Eliminar cara actual
|
| 585 |
+
face_to_remove = selected_faces[current_face_idx]
|
| 586 |
+
st.session_state.character_data[char_id]['selected_faces'].remove(face_to_remove)
|
| 587 |
+
st.session_state.character_data[char_id]['current_face_idx'] = min(current_face_idx, len(selected_faces) - 2)
|
| 588 |
+
st.rerun()
|
| 589 |
+
|
| 590 |
+
# Mostrar imagen de la cara actual
|
| 591 |
+
if 'folder' in char:
|
| 592 |
+
try:
|
| 593 |
+
# Construir URL de la cara
|
| 594 |
+
face_filename = f"face_{selected_faces[current_face_idx]:03d}.jpg"
|
| 595 |
+
face_url = f"{BACKEND_BASE_URL}/files/{st.session_state.video_name}/{char_id}/{face_filename}"
|
| 596 |
+
st.image(face_url, width=250)
|
| 597 |
+
except Exception as e:
|
| 598 |
+
st.info(f"Imatge no disponible: {e}")
|
| 599 |
+
else:
|
| 600 |
+
st.info("No hi ha mostres de cara")
|
| 601 |
+
|
| 602 |
+
st.markdown("---")
|
| 603 |
+
|
| 604 |
+
# Visualizador de voces
|
| 605 |
+
st.markdown("**🎤 Mostres de veu:**")
|
| 606 |
+
st.info("🚧 Funcionalitat de veu en desenvolupament")
|
| 607 |
+
|
| 608 |
+
# TODO: Implementar visualizador de voces similar al de caras
|
| 609 |
|
| 610 |
+
# --- Columna 2: Datos del personaje ---
|
| 611 |
with col2:
|
| 612 |
+
st.markdown("**📝 Informació del personatge:**")
|
| 613 |
+
|
| 614 |
+
# Nombre del personaje
|
| 615 |
+
char_name = st.text_input(
|
| 616 |
+
"Nom del personatge:",
|
| 617 |
+
value=char_data['name'],
|
| 618 |
+
key=f"name_input_{char_id}",
|
| 619 |
+
placeholder="Ex: Maria, Joan, etc.",
|
| 620 |
+
help="Personatges amb el mateix nom es fusionaran"
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
# Actualizar nombre en tiempo real
|
| 624 |
+
if char_name != char_data['name']:
|
| 625 |
+
st.session_state.character_data[char_id]['name'] = char_name
|
| 626 |
+
|
| 627 |
+
# Descripción
|
| 628 |
+
char_description = st.text_area(
|
| 629 |
+
"Descripció (text lliure):",
|
| 630 |
+
value=char_data['description'],
|
| 631 |
+
key=f"desc_input_{char_id}",
|
| 632 |
+
placeholder="Ex: Dona d'uns 30 anys, cabell ros, ulleres...",
|
| 633 |
+
height=150
|
| 634 |
+
)
|
| 635 |
+
|
| 636 |
+
# Actualizar descripción en tiempo real
|
| 637 |
+
if char_description != char_data['description']:
|
| 638 |
+
st.session_state.character_data[char_id]['description'] = char_description
|
| 639 |
+
|
| 640 |
+
# Información adicional
|
| 641 |
+
st.caption(f"**ID original:** {char_id}")
|
| 642 |
+
st.caption(f"**Caras seleccionades:** {len(char_data['selected_faces'])} de {num_faces}")
|
| 643 |
+
|
| 644 |
+
st.markdown("---")
|
| 645 |
+
|
| 646 |
+
# --- 4. Botón de confirmación de fine-tuning ---
|
| 647 |
+
st.markdown("### 🎯 Confirmació del fine-tuning manual")
|
| 648 |
+
|
| 649 |
+
if st.button("✅ Confirmar fine-tuning i fusionar personatges", type="primary", use_container_width=True):
|
| 650 |
+
# Agrupar personajes por nombre
|
| 651 |
+
merged_characters = {}
|
| 652 |
+
|
| 653 |
+
for char in st.session_state.characters_detected:
|
| 654 |
+
char_id = char['id']
|
| 655 |
+
char_data = st.session_state.character_data[char_id]
|
| 656 |
+
char_name = char_data['name'].strip()
|
| 657 |
+
|
| 658 |
+
if not char_name:
|
| 659 |
+
char_name = f"Personatge sense nom {char_id}"
|
| 660 |
+
|
| 661 |
+
if char_name not in merged_characters:
|
| 662 |
+
merged_characters[char_name] = {
|
| 663 |
+
'id': f"merged_{len(merged_characters) + 1}",
|
| 664 |
+
'name': char_name,
|
| 665 |
+
'description': char_data['description'],
|
| 666 |
+
'selected_faces': [],
|
| 667 |
+
'selected_voices': [],
|
| 668 |
+
'original_ids': []
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
# Fusionar datos
|
| 672 |
+
merged_characters[char_name]['selected_faces'].extend(char_data['selected_faces'])
|
| 673 |
+
merged_characters[char_name]['selected_voices'].extend(char_data['selected_voices'])
|
| 674 |
+
merged_characters[char_name]['original_ids'].append(char_id)
|
| 675 |
+
|
| 676 |
+
# Fusionar descripciones (concatenar si hay múltiples)
|
| 677 |
+
if char_data['description'] and char_data['description'] not in merged_characters[char_name]['description']:
|
| 678 |
+
if merged_characters[char_name]['description']:
|
| 679 |
+
merged_characters[char_name]['description'] += " | " + char_data['description']
|
| 680 |
+
else:
|
| 681 |
+
merged_characters[char_name]['description'] = char_data['description']
|
| 682 |
+
|
| 683 |
+
# Actualizar personajes con los fusionados
|
| 684 |
+
st.session_state.characters_detected = list(merged_characters.values())
|
| 685 |
+
|
| 686 |
+
# Reinicializar character_data con los nuevos personajes
|
| 687 |
+
st.session_state.character_data = {}
|
| 688 |
+
for char in st.session_state.characters_detected:
|
| 689 |
+
char_id = char['id']
|
| 690 |
+
st.session_state.character_data[char_id] = {
|
| 691 |
+
'name': char['name'],
|
| 692 |
+
'description': char['description'],
|
| 693 |
+
'selected_faces': char['selected_faces'],
|
| 694 |
+
'selected_voices': char['selected_voices'],
|
| 695 |
+
'current_face_idx': 0,
|
| 696 |
+
'current_voice_idx': 0
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
# Marcar como guardados
|
| 700 |
+
st.session_state.characters_saved = True
|
| 701 |
+
st.success(f"✅ Fine-tuning confirmat! {len(merged_characters)} personatges finals.")
|
| 702 |
+
st.balloons()
|
| 703 |
+
st.rerun()
|
| 704 |
|
| 705 |
+
# --- 5. Botón para generar audiodescripción (solo si están guardados) ---
|
| 706 |
+
if st.session_state.characters_saved:
|
| 707 |
+
st.markdown("---")
|
| 708 |
+
st.markdown("### 🎬 Generar audiodescripció")
|
| 709 |
+
if st.button("🎬 Generar Audiodescripció", type="primary", use_container_width=True):
|
| 710 |
+
st.info("🚧 Funcionalitat en desenvolupament...")
|
| 711 |
+
# Aquí iría la lógica para generar la audiodescripción
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
|
| 713 |
elif page == "Analitzar audio-descripcions":
|
| 714 |
require_login()
|