Upload 2 files
Browse files- api_client.py +22 -20
- app.py +98 -6
api_client.py
CHANGED
|
@@ -418,6 +418,15 @@ class APIClient:
|
|
| 418 |
# Cliente para SVision Space
|
| 419 |
# ===========================
|
| 420 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[str, str]:
|
| 422 |
"""
|
| 423 |
Llama al space svision para describir una imagen (cara o escena).
|
|
@@ -469,33 +478,26 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[
|
|
| 469 |
if not full_description:
|
| 470 |
return ("", "")
|
| 471 |
|
| 472 |
-
# Generar nombre
|
| 473 |
if is_face:
|
| 474 |
# Extraer características clave para el nombre
|
| 475 |
desc_lower = full_description.lower()
|
| 476 |
|
| 477 |
-
# Determinar
|
| 478 |
-
|
| 479 |
-
is_female = any(word in desc_lower for word in ["dona", "noia", "nena", "femení"])
|
| 480 |
-
|
| 481 |
-
if is_young:
|
| 482 |
-
base_name = "Noia" if is_female else "Noi"
|
| 483 |
-
else:
|
| 484 |
-
base_name = "Dona" if is_female else "Home"
|
| 485 |
|
| 486 |
-
#
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
features.append("ulleres")
|
| 490 |
-
if "barba" in desc_lower:
|
| 491 |
-
features.append("barba")
|
| 492 |
-
if "bigoti" in desc_lower:
|
| 493 |
-
features.append("bigoti")
|
| 494 |
|
| 495 |
-
|
| 496 |
-
|
|
|
|
| 497 |
else:
|
| 498 |
-
|
|
|
|
|
|
|
|
|
|
| 499 |
else:
|
| 500 |
# Para escenas, extraer primeras palabras clave
|
| 501 |
words = full_description.split()[:4]
|
|
|
|
| 418 |
# Cliente para SVision Space
|
| 419 |
# ===========================
|
| 420 |
|
| 421 |
+
# Nombres catalanes comunes para asignar a personajes (deben coincidir con app.py)
|
| 422 |
+
def get_catalan_names():
|
| 423 |
+
"""Retorna llistes de noms catalans."""
|
| 424 |
+
noms_home = ["Jordi", "Marc", "Pau", "Pere", "Joan", "Josep", "David", "Àlex", "Guillem", "Albert",
|
| 425 |
+
"Arnau", "Martí", "Bernat", "Oriol", "Roger", "Pol", "Lluís", "Sergi", "Carles", "Xavier"]
|
| 426 |
+
noms_dona = ["Maria", "Anna", "Laura", "Marta", "Cristina", "Núria", "Montserrat", "Júlia", "Sara", "Carla",
|
| 427 |
+
"Alba", "Elisabet", "Rosa", "Gemma", "Sílvia", "Teresa", "Irene", "Laia", "Marina", "Bet"]
|
| 428 |
+
return noms_home, noms_dona
|
| 429 |
+
|
| 430 |
def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[str, str]:
|
| 431 |
"""
|
| 432 |
Llama al space svision para describir una imagen (cara o escena).
|
|
|
|
| 478 |
if not full_description:
|
| 479 |
return ("", "")
|
| 480 |
|
| 481 |
+
# Generar nombre aleatorio en catalán para caras
|
| 482 |
if is_face:
|
| 483 |
# Extraer características clave para el nombre
|
| 484 |
desc_lower = full_description.lower()
|
| 485 |
|
| 486 |
+
# Determinar género
|
| 487 |
+
is_female = any(word in desc_lower for word in ["dona", "noia", "nena", "femení", "femenina"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
+
# Seleccionar nombre aleatorio pero consistente (hash del path)
|
| 490 |
+
import hashlib
|
| 491 |
+
hash_val = int(hashlib.md5(image_path.encode()).hexdigest(), 16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
|
| 493 |
+
noms_home, noms_dona = get_catalan_names()
|
| 494 |
+
if is_female:
|
| 495 |
+
name_list = noms_dona
|
| 496 |
else:
|
| 497 |
+
name_list = noms_home
|
| 498 |
+
|
| 499 |
+
# Usar hash para selección consistente
|
| 500 |
+
short_name = name_list[hash_val % len(name_list)]
|
| 501 |
else:
|
| 502 |
# Para escenas, extraer primeras palabras clave
|
| 503 |
words = full_description.split()[:4]
|
app.py
CHANGED
|
@@ -85,11 +85,62 @@ set_db_path(DB_PATH)
|
|
| 85 |
init_schema()
|
| 86 |
|
| 87 |
# --- Helper de logging ---
|
| 88 |
-
def log(msg):
|
| 89 |
-
"""Helper
|
| 90 |
-
|
|
|
|
| 91 |
sys.stderr.flush()
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def create_default_users_if_needed():
|
| 94 |
"""Asegura que existan los usuarios por defecto y sus contraseñas esperadas (texto plano)."""
|
| 95 |
log("Sincronizando usuarios por defecto...")
|
|
@@ -689,6 +740,20 @@ if page == "Processar vídeo nou":
|
|
| 689 |
# --- 5) Mostrar resultats: Clústers de veu (amb carrusels) ---
|
| 690 |
if st.session_state.get("audio_segments") is not None:
|
| 691 |
st.markdown("---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 692 |
# Agrupar per etiqueta
|
| 693 |
segs = st.session_state.audio_segments or []
|
| 694 |
vlabels = st.session_state.voice_labels or []
|
|
@@ -759,7 +824,8 @@ if page == "Processar vídeo nou":
|
|
| 759 |
with c2:
|
| 760 |
name_key = f"{key_prefix}_name"
|
| 761 |
desc_key = f"{key_prefix}_desc"
|
| 762 |
-
|
|
|
|
| 763 |
st.text_input("Nom del clúster", value=st.session_state.get(name_key, default_name), key=name_key)
|
| 764 |
st.text_area("Descripció", value=st.session_state.get(desc_key, ""), key=desc_key, height=80)
|
| 765 |
|
|
@@ -882,6 +948,17 @@ if page == "Processar vídeo nou":
|
|
| 882 |
"folder": ch.get("folder"),
|
| 883 |
"kept_files": kept,
|
| 884 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
# Agrupar segments d'àudio per etiqueta de veu
|
| 886 |
segs = st.session_state.audio_segments or []
|
| 887 |
vlabels = st.session_state.voice_labels or []
|
|
@@ -893,7 +970,9 @@ if page == "Processar vídeo nou":
|
|
| 893 |
clip_local = seg.get("clip_path")
|
| 894 |
fname = _os.path.basename(clip_local) if clip_local else None
|
| 895 |
if fname:
|
| 896 |
-
|
|
|
|
|
|
|
| 897 |
# Incloure noms/descr. personalitzats del formulari si existeixen
|
| 898 |
if isinstance(lbl, int) and lbl >= 0:
|
| 899 |
vpref = f"voice_{int(lbl):02d}"
|
|
@@ -1001,6 +1080,17 @@ if page == "Processar vídeo nou":
|
|
| 1001 |
"description": desc,
|
| 1002 |
})
|
| 1003 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
# Agrupar segments de veu per etiqueta i nom normalizado
|
| 1005 |
segs = st.session_state.audio_segments or []
|
| 1006 |
vlabels = st.session_state.voice_labels or []
|
|
@@ -1012,7 +1102,9 @@ if page == "Processar vídeo nou":
|
|
| 1012 |
if not (isinstance(lbl, int) and lbl >= 0):
|
| 1013 |
continue
|
| 1014 |
vpref = f"voice_{int(lbl):02d}"
|
| 1015 |
-
|
|
|
|
|
|
|
| 1016 |
vname_normalized = normalize_name(vname_custom)
|
| 1017 |
vdesc = st.session_state.get(f"{vpref}_desc", "").strip()
|
| 1018 |
clip_local = seg.get("clip_path")
|
|
|
|
| 85 |
init_schema()
|
| 86 |
|
| 87 |
# --- Helper de logging ---
|
| 88 |
+
def log(msg: str):
|
| 89 |
+
"""Helper per logging amb timestamp"""
|
| 90 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 91 |
+
sys.stderr.write(f"[{timestamp}] {msg}\n")
|
| 92 |
sys.stderr.flush()
|
| 93 |
|
| 94 |
+
def get_all_catalan_names():
|
| 95 |
+
"""Retorna tots els noms catalans disponibles."""
|
| 96 |
+
noms_home = ["Jordi", "Marc", "Pau", "Pere", "Joan", "Josep", "David", "Àlex", "Guillem", "Albert",
|
| 97 |
+
"Arnau", "Martí", "Bernat", "Oriol", "Roger", "Pol", "Lluís", "Sergi", "Carles", "Xavier"]
|
| 98 |
+
noms_dona = ["Maria", "Anna", "Laura", "Marta", "Cristina", "Núria", "Montserrat", "Júlia", "Sara", "Carla",
|
| 99 |
+
"Alba", "Elisabet", "Rosa", "Gemma", "Sílvia", "Teresa", "Irene", "Laia", "Marina", "Bet"]
|
| 100 |
+
return noms_home, noms_dona
|
| 101 |
+
|
| 102 |
+
def get_catalan_name_for_speaker(speaker_label: int, used_names_home: list = None, used_names_dona: list = None) -> str:
|
| 103 |
+
"""
|
| 104 |
+
Genera un nom català per a un speaker, reutilitzant noms de caras si estan disponibles.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
speaker_label: Índex del speaker (0, 1, 2, ...)
|
| 108 |
+
used_names_home: Noms d'home ja usats en caras (per reutilitzar-los)
|
| 109 |
+
used_names_dona: Noms de dona ja usats en caras (per reutilitzar-los)
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
Nom català
|
| 113 |
+
"""
|
| 114 |
+
noms_home, noms_dona = get_all_catalan_names()
|
| 115 |
+
|
| 116 |
+
# Si no hi ha noms usats, usar llistes completes
|
| 117 |
+
if used_names_home is None:
|
| 118 |
+
used_names_home = []
|
| 119 |
+
if used_names_dona is None:
|
| 120 |
+
used_names_dona = []
|
| 121 |
+
|
| 122 |
+
# Alternar entre home y dona según el índex
|
| 123 |
+
is_male = (speaker_label % 2 == 0)
|
| 124 |
+
|
| 125 |
+
if is_male:
|
| 126 |
+
# Primer intentar reutilitzar noms d'home de caras
|
| 127 |
+
if used_names_home:
|
| 128 |
+
idx = speaker_label // 2 # 0, 2, 4 -> 0, 1, 2
|
| 129 |
+
return used_names_home[idx % len(used_names_home)]
|
| 130 |
+
else:
|
| 131 |
+
# Si no hi ha noms usats, usar hash
|
| 132 |
+
hash_val = hash(f"speaker_{speaker_label}")
|
| 133 |
+
return noms_home[abs(hash_val) % len(noms_home)]
|
| 134 |
+
else:
|
| 135 |
+
# Primer intentar reutilitzar noms de dona de caras
|
| 136 |
+
if used_names_dona:
|
| 137 |
+
idx = speaker_label // 2 # 1, 3, 5 -> 0, 1, 2
|
| 138 |
+
return used_names_dona[idx % len(used_names_dona)]
|
| 139 |
+
else:
|
| 140 |
+
# Si no hi ha noms usats, usar hash
|
| 141 |
+
hash_val = hash(f"speaker_{speaker_label}")
|
| 142 |
+
return noms_dona[abs(hash_val) % len(noms_dona)]
|
| 143 |
+
|
| 144 |
def create_default_users_if_needed():
|
| 145 |
"""Asegura que existan los usuarios por defecto y sus contraseñas esperadas (texto plano)."""
|
| 146 |
log("Sincronizando usuarios por defecto...")
|
|
|
|
| 740 |
# --- 5) Mostrar resultats: Clústers de veu (amb carrusels) ---
|
| 741 |
if st.session_state.get("audio_segments") is not None:
|
| 742 |
st.markdown("---")
|
| 743 |
+
|
| 744 |
+
# Extreure noms de caras per reutilitzar-los en veus
|
| 745 |
+
used_names_home = []
|
| 746 |
+
used_names_dona = []
|
| 747 |
+
noms_home_all, noms_dona_all = get_all_catalan_names()
|
| 748 |
+
|
| 749 |
+
for ch in (st.session_state.characters_detected or []):
|
| 750 |
+
# Obtenir nom del personatge (ja assignat per svision)
|
| 751 |
+
ch_name = ch.get("name", "")
|
| 752 |
+
if ch_name in noms_home_all:
|
| 753 |
+
used_names_home.append(ch_name)
|
| 754 |
+
elif ch_name in noms_dona_all:
|
| 755 |
+
used_names_dona.append(ch_name)
|
| 756 |
+
|
| 757 |
# Agrupar per etiqueta
|
| 758 |
segs = st.session_state.audio_segments or []
|
| 759 |
vlabels = st.session_state.voice_labels or []
|
|
|
|
| 824 |
with c2:
|
| 825 |
name_key = f"{key_prefix}_name"
|
| 826 |
desc_key = f"{key_prefix}_desc"
|
| 827 |
+
# Reutilitzar noms de caras
|
| 828 |
+
default_name = get_catalan_name_for_speaker(lbl, used_names_home, used_names_dona)
|
| 829 |
st.text_input("Nom del clúster", value=st.session_state.get(name_key, default_name), key=name_key)
|
| 830 |
st.text_area("Descripció", value=st.session_state.get(desc_key, ""), key=desc_key, height=80)
|
| 831 |
|
|
|
|
| 948 |
"folder": ch.get("folder"),
|
| 949 |
"kept_files": kept,
|
| 950 |
})
|
| 951 |
+
# Extreure noms de caras per reutilitzar-los
|
| 952 |
+
used_names_home_fin = []
|
| 953 |
+
used_names_dona_fin = []
|
| 954 |
+
noms_home_all, noms_dona_all = get_all_catalan_names()
|
| 955 |
+
for cp in chars_payload:
|
| 956 |
+
face_name = cp.get("name", "")
|
| 957 |
+
if face_name in noms_home_all:
|
| 958 |
+
used_names_home_fin.append(face_name)
|
| 959 |
+
elif face_name in noms_dona_all:
|
| 960 |
+
used_names_dona_fin.append(face_name)
|
| 961 |
+
|
| 962 |
# Agrupar segments d'àudio per etiqueta de veu
|
| 963 |
segs = st.session_state.audio_segments or []
|
| 964 |
vlabels = st.session_state.voice_labels or []
|
|
|
|
| 970 |
clip_local = seg.get("clip_path")
|
| 971 |
fname = _os.path.basename(clip_local) if clip_local else None
|
| 972 |
if fname:
|
| 973 |
+
# Generar nombre catalán por defecto, reutilitzant noms de caras
|
| 974 |
+
default_voice_name = get_catalan_name_for_speaker(int(lbl), used_names_home_fin, used_names_dona_fin) if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN"
|
| 975 |
+
voice_clusters.setdefault(lbl, {"label": lbl, "name": default_voice_name, "description": "", "clips": []})
|
| 976 |
# Incloure noms/descr. personalitzats del formulari si existeixen
|
| 977 |
if isinstance(lbl, int) and lbl >= 0:
|
| 978 |
vpref = f"voice_{int(lbl):02d}"
|
|
|
|
| 1080 |
"description": desc,
|
| 1081 |
})
|
| 1082 |
|
| 1083 |
+
# Extreure noms de caras per reutilitzar-los en veus
|
| 1084 |
+
used_names_home_pers = []
|
| 1085 |
+
used_names_dona_pers = []
|
| 1086 |
+
noms_home_all, noms_dona_all = get_all_catalan_names()
|
| 1087 |
+
for cp in chars_payload:
|
| 1088 |
+
face_name = cp.get("name", "")
|
| 1089 |
+
if face_name in noms_home_all:
|
| 1090 |
+
used_names_home_pers.append(face_name)
|
| 1091 |
+
elif face_name in noms_dona_all:
|
| 1092 |
+
used_names_dona_pers.append(face_name)
|
| 1093 |
+
|
| 1094 |
# Agrupar segments de veu per etiqueta i nom normalizado
|
| 1095 |
segs = st.session_state.audio_segments or []
|
| 1096 |
vlabels = st.session_state.voice_labels or []
|
|
|
|
| 1102 |
if not (isinstance(lbl, int) and lbl >= 0):
|
| 1103 |
continue
|
| 1104 |
vpref = f"voice_{int(lbl):02d}"
|
| 1105 |
+
# Generar nombre catalán por defecto para voces, reutilitzant noms de caras
|
| 1106 |
+
default_voice_name = get_catalan_name_for_speaker(int(lbl), used_names_home_pers, used_names_dona_pers) if isinstance(lbl, int) and lbl >= 0 else f"SPEAKER_{int(lbl):02d}"
|
| 1107 |
+
vname_custom = st.session_state.get(f"{vpref}_name") or default_voice_name
|
| 1108 |
vname_normalized = normalize_name(vname_custom)
|
| 1109 |
vdesc = st.session_state.get(f"{vpref}_desc", "").strip()
|
| 1110 |
clip_local = seg.get("clip_path")
|