VeuReu commited on
Commit
6f1a568
·
verified ·
1 Parent(s): 6f4dc01

Upload api.py

Browse files
Files changed (1) hide show
  1. api.py +24 -10
api.py CHANGED
@@ -154,9 +154,9 @@ def normalize_face_lighting(image):
154
  normalized = cv2.cvtColor(lab_normalized, cv2.COLOR_LAB2BGR)
155
  return normalized
156
 
157
- def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int):
158
  """
159
- Clustering jerárquico aglomerativo con selección óptima del número de clusters.
160
  Selecciona automáticamente el mejor número de clusters (hasta max_groups) usando silhouette score.
161
  Filtra clusters con menos de min_cluster_size muestras (marcados como -1/ruido).
162
 
@@ -164,6 +164,10 @@ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int
164
  X: Array de embeddings (N, D)
165
  max_groups: Número máximo de clusters a formar
166
  min_cluster_size: Tamaño mínimo de cluster válido
 
 
 
 
167
 
168
  Returns:
169
  Array de labels (N,) donde -1 indica ruido
@@ -203,10 +207,12 @@ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int
203
  if valid_clusters >= 2:
204
  try:
205
  score = silhouette_score(X, trial_labels, metric='cosine')
206
- # Penalizar configuraciones con muchos clusters para evitar overfitting
207
- # Penalización MUY fuerte para reducir duplicados de la misma persona
208
- # Valores: 0.05 = fuerte, 0.07 = muy fuerte, 0.10 = extremo
209
- adjusted_score = score - (n_clusters * 0.07)
 
 
210
 
211
  if adjusted_score > best_score:
212
  best_score = adjusted_score
@@ -215,7 +221,8 @@ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int
215
  pass # Si falla el cálculo, ignorar esta configuración
216
 
217
  # Usar el número óptimo de clusters encontrado
218
- print(f"Clustering óptimo: {best_n_clusters} clusters (de máximo {max_groups}), silhouette score: {best_score:.3f}")
 
219
  labels = fcluster(Z, t=best_n_clusters, criterion='maxclust')
220
 
221
  # fcluster devuelve labels 1-indexed, convertir a 0-indexed
@@ -255,8 +262,10 @@ async def create_initial_casting(
255
  video: UploadFile = File(...),
256
  max_groups: int = Form(default=3),
257
  min_cluster_size: int = Form(default=3),
 
258
  voice_max_groups: int = Form(default=3),
259
  voice_min_cluster_size: int = Form(default=3),
 
260
  max_frames: int = Form(default=100),
261
  ):
262
  """
@@ -280,8 +289,10 @@ async def create_initial_casting(
280
  "video_name": video_name,
281
  "max_groups": int(max_groups),
282
  "min_cluster_size": int(min_cluster_size),
 
283
  "voice_max_groups": int(voice_max_groups),
284
  "voice_min_cluster_size": int(voice_min_cluster_size),
 
285
  "max_frames": int(max_frames),
286
  "created_at": datetime.now().isoformat(),
287
  "results": None,
@@ -358,8 +369,10 @@ def process_video_job(job_id: str):
358
  video_name = job["video_name"]
359
  max_groups = int(job.get("max_groups", 5))
360
  min_cluster_size = int(job.get("min_cluster_size", 3))
 
361
  v_max_groups = int(job.get("voice_max_groups", 5))
362
  v_min_cluster = int(job.get("voice_min_cluster_size", 3))
 
363
 
364
  # Crear estructura de carpetas
365
  base = TEMP_ROOT / video_name
@@ -521,7 +534,7 @@ def process_video_job(job_id: str):
521
  # Clustering jerárquico de caras
522
  if embeddings:
523
  Xf = np.array(embeddings)
524
- labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size).tolist()
525
  print(f"[{job_id}] Clustering jerárquico de caras: {len(set([l for l in labels if l >= 0]))} clusters")
526
  else:
527
  labels = []
@@ -781,7 +794,7 @@ def process_video_job(job_id: str):
781
  if voice_embeddings:
782
  try:
783
  Xv = np.array(voice_embeddings)
784
- v_labels = hierarchical_cluster_with_min_size(Xv, v_max_groups, v_min_cluster).tolist()
785
  print(f"[{job_id}] Clustering jerárquico de voz: {len(set([l for l in v_labels if l >= 0]))} clusters")
786
  except Exception as _e:
787
  print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
@@ -994,6 +1007,7 @@ async def detect_scenes(
994
  video: UploadFile = File(...),
995
  max_groups: int = Form(default=3),
996
  min_cluster_size: int = Form(default=3),
 
997
  frame_interval_sec: float = Form(default=0.5),
998
  ):
999
  """
@@ -1045,7 +1059,7 @@ async def detect_scenes(
1045
  return {"scene_clusters": []}
1046
 
1047
  X = np.array(frames)
1048
- labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size).tolist()
1049
  initial_clusters = len(set([l for l in labels if l >= 0]))
1050
  print(f"Scene clustering jeràrquic inicial: {initial_clusters} clusters")
1051
 
 
154
  normalized = cv2.cvtColor(lab_normalized, cv2.COLOR_LAB2BGR)
155
  return normalized
156
 
157
+ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int, sensitivity: float = 0.5) -> np.ndarray:
158
  """
159
+ Clustering jerárquico con silhouette score para encontrar automáticamente el mejor número de clusters.
160
  Selecciona automáticamente el mejor número de clusters (hasta max_groups) usando silhouette score.
161
  Filtra clusters con menos de min_cluster_size muestras (marcados como -1/ruido).
162
 
 
164
  X: Array de embeddings (N, D)
165
  max_groups: Número máximo de clusters a formar
166
  min_cluster_size: Tamaño mínimo de cluster válido
167
+ sensitivity: Sensibilidad del clustering (0.0-1.0)
168
+ - 0.0 = muy agresivo (menos clusters)
169
+ - 0.5 = balanceado (recomendado)
170
+ - 1.0 = muy permisivo (más clusters)
171
 
172
  Returns:
173
  Array de labels (N,) donde -1 indica ruido
 
207
  if valid_clusters >= 2:
208
  try:
209
  score = silhouette_score(X, trial_labels, metric='cosine')
210
+ # Penalización dinámica basada en sensibilidad:
211
+ # - sensitivity=0.0 penalty=0.14 (muy agresivo, menos clusters)
212
+ # - sensitivity=0.5 → penalty=0.07 (balanceado, recomendado)
213
+ # - sensitivity=1.0 penalty=0.01 (permisivo, más clusters)
214
+ penalty = 0.14 - (sensitivity * 0.13)
215
+ adjusted_score = score - (n_clusters * penalty)
216
 
217
  if adjusted_score > best_score:
218
  best_score = adjusted_score
 
221
  pass # Si falla el cálculo, ignorar esta configuración
222
 
223
  # Usar el número óptimo de clusters encontrado
224
+ penalty = 0.14 - (sensitivity * 0.13)
225
+ print(f"Clustering óptimo: {best_n_clusters} clusters (de máximo {max_groups}), sensitivity={sensitivity:.2f}, penalty={penalty:.3f}, silhouette={best_score:.3f}")
226
  labels = fcluster(Z, t=best_n_clusters, criterion='maxclust')
227
 
228
  # fcluster devuelve labels 1-indexed, convertir a 0-indexed
 
262
  video: UploadFile = File(...),
263
  max_groups: int = Form(default=3),
264
  min_cluster_size: int = Form(default=3),
265
+ face_sensitivity: float = Form(default=0.5),
266
  voice_max_groups: int = Form(default=3),
267
  voice_min_cluster_size: int = Form(default=3),
268
+ voice_sensitivity: float = Form(default=0.5),
269
  max_frames: int = Form(default=100),
270
  ):
271
  """
 
289
  "video_name": video_name,
290
  "max_groups": int(max_groups),
291
  "min_cluster_size": int(min_cluster_size),
292
+ "face_sensitivity": float(face_sensitivity),
293
  "voice_max_groups": int(voice_max_groups),
294
  "voice_min_cluster_size": int(voice_min_cluster_size),
295
+ "voice_sensitivity": float(voice_sensitivity),
296
  "max_frames": int(max_frames),
297
  "created_at": datetime.now().isoformat(),
298
  "results": None,
 
369
  video_name = job["video_name"]
370
  max_groups = int(job.get("max_groups", 5))
371
  min_cluster_size = int(job.get("min_cluster_size", 3))
372
+ face_sensitivity = float(job.get("face_sensitivity", 0.5))
373
  v_max_groups = int(job.get("voice_max_groups", 5))
374
  v_min_cluster = int(job.get("voice_min_cluster_size", 3))
375
+ voice_sensitivity = float(job.get("voice_sensitivity", 0.5))
376
 
377
  # Crear estructura de carpetas
378
  base = TEMP_ROOT / video_name
 
534
  # Clustering jerárquico de caras
535
  if embeddings:
536
  Xf = np.array(embeddings)
537
+ labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size, face_sensitivity).tolist()
538
  print(f"[{job_id}] Clustering jerárquico de caras: {len(set([l for l in labels if l >= 0]))} clusters")
539
  else:
540
  labels = []
 
794
  if voice_embeddings:
795
  try:
796
  Xv = np.array(voice_embeddings)
797
+ v_labels = hierarchical_cluster_with_min_size(Xv, v_max_groups, v_min_cluster, voice_sensitivity).tolist()
798
  print(f"[{job_id}] Clustering jerárquico de voz: {len(set([l for l in v_labels if l >= 0]))} clusters")
799
  except Exception as _e:
800
  print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
 
1007
  video: UploadFile = File(...),
1008
  max_groups: int = Form(default=3),
1009
  min_cluster_size: int = Form(default=3),
1010
+ scene_sensitivity: float = Form(default=0.5),
1011
  frame_interval_sec: float = Form(default=0.5),
1012
  ):
1013
  """
 
1059
  return {"scene_clusters": []}
1060
 
1061
  X = np.array(frames)
1062
+ labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size, scene_sensitivity).tolist()
1063
  initial_clusters = len(set([l for l in labels if l >= 0]))
1064
  print(f"Scene clustering jeràrquic inicial: {initial_clusters} clusters")
1065