VeuReu commited on
Commit
e898458
·
1 Parent(s): f805057

Upload 2 files

Browse files
Files changed (2) hide show
  1. api_client.py +22 -20
  2. app.py +98 -6
api_client.py CHANGED
@@ -418,6 +418,15 @@ class APIClient:
418
  # Cliente para SVision Space
419
  # ===========================
420
 
 
 
 
 
 
 
 
 
 
421
  def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[str, str]:
422
  """
423
  Llama al space svision para describir una imagen (cara o escena).
@@ -469,33 +478,26 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[
469
  if not full_description:
470
  return ("", "")
471
 
472
- # Generar nombre abreviado para caras
473
  if is_face:
474
  # Extraer características clave para el nombre
475
  desc_lower = full_description.lower()
476
 
477
- # Determinar edad y género
478
- is_young = any(word in desc_lower for word in ["jove", "nen", "nena", "adolescent", "noi", "noia"])
479
- is_female = any(word in desc_lower for word in ["dona", "noia", "nena", "femení"])
480
-
481
- if is_young:
482
- base_name = "Noia" if is_female else "Noi"
483
- else:
484
- base_name = "Dona" if is_female else "Home"
485
 
486
- # Añadir características distintivas
487
- features = []
488
- if "ullere" in desc_lower or "gafa" in desc_lower:
489
- features.append("ulleres")
490
- if "barba" in desc_lower:
491
- features.append("barba")
492
- if "bigoti" in desc_lower:
493
- features.append("bigoti")
494
 
495
- if features:
496
- short_name = f"{base_name} amb {', '.join(features)}"
 
497
  else:
498
- short_name = base_name
 
 
 
499
  else:
500
  # Para escenas, extraer primeras palabras clave
501
  words = full_description.split()[:4]
 
418
  # Cliente para SVision Space
419
  # ===========================
420
 
421
+ # Nombres catalanes comunes para asignar a personajes (deben coincidir con app.py)
422
+ def get_catalan_names():
423
+ """Retorna llistes de noms catalans."""
424
+ noms_home = ["Jordi", "Marc", "Pau", "Pere", "Joan", "Josep", "David", "Àlex", "Guillem", "Albert",
425
+ "Arnau", "Martí", "Bernat", "Oriol", "Roger", "Pol", "Lluís", "Sergi", "Carles", "Xavier"]
426
+ noms_dona = ["Maria", "Anna", "Laura", "Marta", "Cristina", "Núria", "Montserrat", "Júlia", "Sara", "Carla",
427
+ "Alba", "Elisabet", "Rosa", "Gemma", "Sílvia", "Teresa", "Irene", "Laia", "Marina", "Bet"]
428
+ return noms_home, noms_dona
429
+
430
  def describe_image_with_svision(image_path: str, is_face: bool = True) -> Tuple[str, str]:
431
  """
432
  Llama al space svision para describir una imagen (cara o escena).
 
478
  if not full_description:
479
  return ("", "")
480
 
481
+ # Generar nombre aleatorio en catalán para caras
482
  if is_face:
483
  # Extraer características clave para el nombre
484
  desc_lower = full_description.lower()
485
 
486
+ # Determinar género
487
+ is_female = any(word in desc_lower for word in ["dona", "noia", "nena", "femení", "femenina"])
 
 
 
 
 
 
488
 
489
+ # Seleccionar nombre aleatorio pero consistente (hash del path)
490
+ import hashlib
491
+ hash_val = int(hashlib.md5(image_path.encode()).hexdigest(), 16)
 
 
 
 
 
492
 
493
+ noms_home, noms_dona = get_catalan_names()
494
+ if is_female:
495
+ name_list = noms_dona
496
  else:
497
+ name_list = noms_home
498
+
499
+ # Usar hash para selección consistente
500
+ short_name = name_list[hash_val % len(name_list)]
501
  else:
502
  # Para escenas, extraer primeras palabras clave
503
  words = full_description.split()[:4]
app.py CHANGED
@@ -85,11 +85,62 @@ set_db_path(DB_PATH)
85
  init_schema()
86
 
87
  # --- Helper de logging ---
88
- def log(msg):
89
- """Helper para escribir logs que aparezcan en el container de HF Spaces"""
90
- sys.stderr.write(f"{msg}\n")
 
91
  sys.stderr.flush()
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def create_default_users_if_needed():
94
  """Asegura que existan los usuarios por defecto y sus contraseñas esperadas (texto plano)."""
95
  log("Sincronizando usuarios por defecto...")
@@ -689,6 +740,20 @@ if page == "Processar vídeo nou":
689
  # --- 5) Mostrar resultats: Clústers de veu (amb carrusels) ---
690
  if st.session_state.get("audio_segments") is not None:
691
  st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
  # Agrupar per etiqueta
693
  segs = st.session_state.audio_segments or []
694
  vlabels = st.session_state.voice_labels or []
@@ -759,7 +824,8 @@ if page == "Processar vídeo nou":
759
  with c2:
760
  name_key = f"{key_prefix}_name"
761
  desc_key = f"{key_prefix}_desc"
762
- default_name = f"SPEAKER_{lbl:02d}"
 
763
  st.text_input("Nom del clúster", value=st.session_state.get(name_key, default_name), key=name_key)
764
  st.text_area("Descripció", value=st.session_state.get(desc_key, ""), key=desc_key, height=80)
765
 
@@ -882,6 +948,17 @@ if page == "Processar vídeo nou":
882
  "folder": ch.get("folder"),
883
  "kept_files": kept,
884
  })
 
 
 
 
 
 
 
 
 
 
 
885
  # Agrupar segments d'àudio per etiqueta de veu
886
  segs = st.session_state.audio_segments or []
887
  vlabels = st.session_state.voice_labels or []
@@ -893,7 +970,9 @@ if page == "Processar vídeo nou":
893
  clip_local = seg.get("clip_path")
894
  fname = _os.path.basename(clip_local) if clip_local else None
895
  if fname:
896
- voice_clusters.setdefault(lbl, {"label": lbl, "name": f"SPEAKER_{int(lbl):02d}" if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN", "description": "", "clips": []})
 
 
897
  # Incloure noms/descr. personalitzats del formulari si existeixen
898
  if isinstance(lbl, int) and lbl >= 0:
899
  vpref = f"voice_{int(lbl):02d}"
@@ -1001,6 +1080,17 @@ if page == "Processar vídeo nou":
1001
  "description": desc,
1002
  })
1003
 
 
 
 
 
 
 
 
 
 
 
 
1004
  # Agrupar segments de veu per etiqueta i nom normalizado
1005
  segs = st.session_state.audio_segments or []
1006
  vlabels = st.session_state.voice_labels or []
@@ -1012,7 +1102,9 @@ if page == "Processar vídeo nou":
1012
  if not (isinstance(lbl, int) and lbl >= 0):
1013
  continue
1014
  vpref = f"voice_{int(lbl):02d}"
1015
- vname_custom = st.session_state.get(f"{vpref}_name") or f"SPEAKER_{int(lbl):02d}"
 
 
1016
  vname_normalized = normalize_name(vname_custom)
1017
  vdesc = st.session_state.get(f"{vpref}_desc", "").strip()
1018
  clip_local = seg.get("clip_path")
 
85
  init_schema()
86
 
87
  # --- Helper de logging ---
88
+ def log(msg: str):
89
+ """Helper per logging amb timestamp"""
90
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
91
+ sys.stderr.write(f"[{timestamp}] {msg}\n")
92
  sys.stderr.flush()
93
 
94
+ def get_all_catalan_names():
95
+ """Retorna tots els noms catalans disponibles."""
96
+ noms_home = ["Jordi", "Marc", "Pau", "Pere", "Joan", "Josep", "David", "Àlex", "Guillem", "Albert",
97
+ "Arnau", "Martí", "Bernat", "Oriol", "Roger", "Pol", "Lluís", "Sergi", "Carles", "Xavier"]
98
+ noms_dona = ["Maria", "Anna", "Laura", "Marta", "Cristina", "Núria", "Montserrat", "Júlia", "Sara", "Carla",
99
+ "Alba", "Elisabet", "Rosa", "Gemma", "Sílvia", "Teresa", "Irene", "Laia", "Marina", "Bet"]
100
+ return noms_home, noms_dona
101
+
102
+ def get_catalan_name_for_speaker(speaker_label: int, used_names_home: list = None, used_names_dona: list = None) -> str:
103
+ """
104
+ Genera un nom català per a un speaker, reutilitzant noms de caras si estan disponibles.
105
+
106
+ Args:
107
+ speaker_label: Índex del speaker (0, 1, 2, ...)
108
+ used_names_home: Noms d'home ja usats en caras (per reutilitzar-los)
109
+ used_names_dona: Noms de dona ja usats en caras (per reutilitzar-los)
110
+
111
+ Returns:
112
+ Nom català
113
+ """
114
+ noms_home, noms_dona = get_all_catalan_names()
115
+
116
+ # Si no hi ha noms usats, usar llistes completes
117
+ if used_names_home is None:
118
+ used_names_home = []
119
+ if used_names_dona is None:
120
+ used_names_dona = []
121
+
122
+ # Alternar entre home y dona según el índex
123
+ is_male = (speaker_label % 2 == 0)
124
+
125
+ if is_male:
126
+ # Primer intentar reutilitzar noms d'home de caras
127
+ if used_names_home:
128
+ idx = speaker_label // 2 # 0, 2, 4 -> 0, 1, 2
129
+ return used_names_home[idx % len(used_names_home)]
130
+ else:
131
+ # Si no hi ha noms usats, usar hash
132
+ hash_val = hash(f"speaker_{speaker_label}")
133
+ return noms_home[abs(hash_val) % len(noms_home)]
134
+ else:
135
+ # Primer intentar reutilitzar noms de dona de caras
136
+ if used_names_dona:
137
+ idx = speaker_label // 2 # 1, 3, 5 -> 0, 1, 2
138
+ return used_names_dona[idx % len(used_names_dona)]
139
+ else:
140
+ # Si no hi ha noms usats, usar hash
141
+ hash_val = hash(f"speaker_{speaker_label}")
142
+ return noms_dona[abs(hash_val) % len(noms_dona)]
143
+
144
  def create_default_users_if_needed():
145
  """Asegura que existan los usuarios por defecto y sus contraseñas esperadas (texto plano)."""
146
  log("Sincronizando usuarios por defecto...")
 
740
  # --- 5) Mostrar resultats: Clústers de veu (amb carrusels) ---
741
  if st.session_state.get("audio_segments") is not None:
742
  st.markdown("---")
743
+
744
+ # Extreure noms de caras per reutilitzar-los en veus
745
+ used_names_home = []
746
+ used_names_dona = []
747
+ noms_home_all, noms_dona_all = get_all_catalan_names()
748
+
749
+ for ch in (st.session_state.characters_detected or []):
750
+ # Obtenir nom del personatge (ja assignat per svision)
751
+ ch_name = ch.get("name", "")
752
+ if ch_name in noms_home_all:
753
+ used_names_home.append(ch_name)
754
+ elif ch_name in noms_dona_all:
755
+ used_names_dona.append(ch_name)
756
+
757
  # Agrupar per etiqueta
758
  segs = st.session_state.audio_segments or []
759
  vlabels = st.session_state.voice_labels or []
 
824
  with c2:
825
  name_key = f"{key_prefix}_name"
826
  desc_key = f"{key_prefix}_desc"
827
+ # Reutilitzar noms de caras
828
+ default_name = get_catalan_name_for_speaker(lbl, used_names_home, used_names_dona)
829
  st.text_input("Nom del clúster", value=st.session_state.get(name_key, default_name), key=name_key)
830
  st.text_area("Descripció", value=st.session_state.get(desc_key, ""), key=desc_key, height=80)
831
 
 
948
  "folder": ch.get("folder"),
949
  "kept_files": kept,
950
  })
951
+ # Extreure noms de caras per reutilitzar-los
952
+ used_names_home_fin = []
953
+ used_names_dona_fin = []
954
+ noms_home_all, noms_dona_all = get_all_catalan_names()
955
+ for cp in chars_payload:
956
+ face_name = cp.get("name", "")
957
+ if face_name in noms_home_all:
958
+ used_names_home_fin.append(face_name)
959
+ elif face_name in noms_dona_all:
960
+ used_names_dona_fin.append(face_name)
961
+
962
  # Agrupar segments d'àudio per etiqueta de veu
963
  segs = st.session_state.audio_segments or []
964
  vlabels = st.session_state.voice_labels or []
 
970
  clip_local = seg.get("clip_path")
971
  fname = _os.path.basename(clip_local) if clip_local else None
972
  if fname:
973
+ # Generar nombre catalán por defecto, reutilitzant noms de caras
974
+ default_voice_name = get_catalan_name_for_speaker(int(lbl), used_names_home_fin, used_names_dona_fin) if isinstance(lbl, int) and lbl >= 0 else "UNKNOWN"
975
+ voice_clusters.setdefault(lbl, {"label": lbl, "name": default_voice_name, "description": "", "clips": []})
976
  # Incloure noms/descr. personalitzats del formulari si existeixen
977
  if isinstance(lbl, int) and lbl >= 0:
978
  vpref = f"voice_{int(lbl):02d}"
 
1080
  "description": desc,
1081
  })
1082
 
1083
+ # Extreure noms de caras per reutilitzar-los en veus
1084
+ used_names_home_pers = []
1085
+ used_names_dona_pers = []
1086
+ noms_home_all, noms_dona_all = get_all_catalan_names()
1087
+ for cp in chars_payload:
1088
+ face_name = cp.get("name", "")
1089
+ if face_name in noms_home_all:
1090
+ used_names_home_pers.append(face_name)
1091
+ elif face_name in noms_dona_all:
1092
+ used_names_dona_pers.append(face_name)
1093
+
1094
  # Agrupar segments de veu per etiqueta i nom normalizado
1095
  segs = st.session_state.audio_segments or []
1096
  vlabels = st.session_state.voice_labels or []
 
1102
  if not (isinstance(lbl, int) and lbl >= 0):
1103
  continue
1104
  vpref = f"voice_{int(lbl):02d}"
1105
+ # Generar nombre catalán por defecto para voces, reutilitzant noms de caras
1106
+ default_voice_name = get_catalan_name_for_speaker(int(lbl), used_names_home_pers, used_names_dona_pers) if isinstance(lbl, int) and lbl >= 0 else f"SPEAKER_{int(lbl):02d}"
1107
+ vname_custom = st.session_state.get(f"{vpref}_name") or default_voice_name
1108
  vname_normalized = normalize_name(vname_custom)
1109
  vdesc = st.session_state.get(f"{vpref}_desc", "").strip()
1110
  clip_local = seg.get("clip_path")