Upload api.py
Browse files
api.py
CHANGED
|
@@ -154,9 +154,9 @@ def normalize_face_lighting(image):
|
|
| 154 |
normalized = cv2.cvtColor(lab_normalized, cv2.COLOR_LAB2BGR)
|
| 155 |
return normalized
|
| 156 |
|
| 157 |
-
def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int):
|
| 158 |
"""
|
| 159 |
-
Clustering jerárquico
|
| 160 |
Selecciona automáticamente el mejor número de clusters (hasta max_groups) usando silhouette score.
|
| 161 |
Filtra clusters con menos de min_cluster_size muestras (marcados como -1/ruido).
|
| 162 |
|
|
@@ -164,6 +164,10 @@ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int
|
|
| 164 |
X: Array de embeddings (N, D)
|
| 165 |
max_groups: Número máximo de clusters a formar
|
| 166 |
min_cluster_size: Tamaño mínimo de cluster válido
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
Returns:
|
| 169 |
Array de labels (N,) donde -1 indica ruido
|
|
@@ -203,10 +207,12 @@ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int
|
|
| 203 |
if valid_clusters >= 2:
|
| 204 |
try:
|
| 205 |
score = silhouette_score(X, trial_labels, metric='cosine')
|
| 206 |
-
#
|
| 207 |
-
#
|
| 208 |
-
#
|
| 209 |
-
|
|
|
|
|
|
|
| 210 |
|
| 211 |
if adjusted_score > best_score:
|
| 212 |
best_score = adjusted_score
|
|
@@ -215,7 +221,8 @@ def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int
|
|
| 215 |
pass # Si falla el cálculo, ignorar esta configuración
|
| 216 |
|
| 217 |
# Usar el número óptimo de clusters encontrado
|
| 218 |
-
|
|
|
|
| 219 |
labels = fcluster(Z, t=best_n_clusters, criterion='maxclust')
|
| 220 |
|
| 221 |
# fcluster devuelve labels 1-indexed, convertir a 0-indexed
|
|
@@ -255,8 +262,10 @@ async def create_initial_casting(
|
|
| 255 |
video: UploadFile = File(...),
|
| 256 |
max_groups: int = Form(default=3),
|
| 257 |
min_cluster_size: int = Form(default=3),
|
|
|
|
| 258 |
voice_max_groups: int = Form(default=3),
|
| 259 |
voice_min_cluster_size: int = Form(default=3),
|
|
|
|
| 260 |
max_frames: int = Form(default=100),
|
| 261 |
):
|
| 262 |
"""
|
|
@@ -280,8 +289,10 @@ async def create_initial_casting(
|
|
| 280 |
"video_name": video_name,
|
| 281 |
"max_groups": int(max_groups),
|
| 282 |
"min_cluster_size": int(min_cluster_size),
|
|
|
|
| 283 |
"voice_max_groups": int(voice_max_groups),
|
| 284 |
"voice_min_cluster_size": int(voice_min_cluster_size),
|
|
|
|
| 285 |
"max_frames": int(max_frames),
|
| 286 |
"created_at": datetime.now().isoformat(),
|
| 287 |
"results": None,
|
|
@@ -358,8 +369,10 @@ def process_video_job(job_id: str):
|
|
| 358 |
video_name = job["video_name"]
|
| 359 |
max_groups = int(job.get("max_groups", 5))
|
| 360 |
min_cluster_size = int(job.get("min_cluster_size", 3))
|
|
|
|
| 361 |
v_max_groups = int(job.get("voice_max_groups", 5))
|
| 362 |
v_min_cluster = int(job.get("voice_min_cluster_size", 3))
|
|
|
|
| 363 |
|
| 364 |
# Crear estructura de carpetas
|
| 365 |
base = TEMP_ROOT / video_name
|
|
@@ -521,7 +534,7 @@ def process_video_job(job_id: str):
|
|
| 521 |
# Clustering jerárquico de caras
|
| 522 |
if embeddings:
|
| 523 |
Xf = np.array(embeddings)
|
| 524 |
-
labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size).tolist()
|
| 525 |
print(f"[{job_id}] Clustering jerárquico de caras: {len(set([l for l in labels if l >= 0]))} clusters")
|
| 526 |
else:
|
| 527 |
labels = []
|
|
@@ -781,7 +794,7 @@ def process_video_job(job_id: str):
|
|
| 781 |
if voice_embeddings:
|
| 782 |
try:
|
| 783 |
Xv = np.array(voice_embeddings)
|
| 784 |
-
v_labels = hierarchical_cluster_with_min_size(Xv, v_max_groups, v_min_cluster).tolist()
|
| 785 |
print(f"[{job_id}] Clustering jerárquico de voz: {len(set([l for l in v_labels if l >= 0]))} clusters")
|
| 786 |
except Exception as _e:
|
| 787 |
print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
|
|
@@ -994,6 +1007,7 @@ async def detect_scenes(
|
|
| 994 |
video: UploadFile = File(...),
|
| 995 |
max_groups: int = Form(default=3),
|
| 996 |
min_cluster_size: int = Form(default=3),
|
|
|
|
| 997 |
frame_interval_sec: float = Form(default=0.5),
|
| 998 |
):
|
| 999 |
"""
|
|
@@ -1045,7 +1059,7 @@ async def detect_scenes(
|
|
| 1045 |
return {"scene_clusters": []}
|
| 1046 |
|
| 1047 |
X = np.array(frames)
|
| 1048 |
-
labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size).tolist()
|
| 1049 |
initial_clusters = len(set([l for l in labels if l >= 0]))
|
| 1050 |
print(f"Scene clustering jeràrquic inicial: {initial_clusters} clusters")
|
| 1051 |
|
|
|
|
| 154 |
normalized = cv2.cvtColor(lab_normalized, cv2.COLOR_LAB2BGR)
|
| 155 |
return normalized
|
| 156 |
|
| 157 |
+
def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int, sensitivity: float = 0.5) -> np.ndarray:
|
| 158 |
"""
|
| 159 |
+
Clustering jerárquico con silhouette score para encontrar automáticamente el mejor número de clusters.
|
| 160 |
Selecciona automáticamente el mejor número de clusters (hasta max_groups) usando silhouette score.
|
| 161 |
Filtra clusters con menos de min_cluster_size muestras (marcados como -1/ruido).
|
| 162 |
|
|
|
|
| 164 |
X: Array de embeddings (N, D)
|
| 165 |
max_groups: Número máximo de clusters a formar
|
| 166 |
min_cluster_size: Tamaño mínimo de cluster válido
|
| 167 |
+
sensitivity: Sensibilidad del clustering (0.0-1.0)
|
| 168 |
+
- 0.0 = muy agresivo (menos clusters)
|
| 169 |
+
- 0.5 = balanceado (recomendado)
|
| 170 |
+
- 1.0 = muy permisivo (más clusters)
|
| 171 |
|
| 172 |
Returns:
|
| 173 |
Array de labels (N,) donde -1 indica ruido
|
|
|
|
| 207 |
if valid_clusters >= 2:
|
| 208 |
try:
|
| 209 |
score = silhouette_score(X, trial_labels, metric='cosine')
|
| 210 |
+
# Penalización dinámica basada en sensibilidad:
|
| 211 |
+
# - sensitivity=0.0 → penalty=0.14 (muy agresivo, menos clusters)
|
| 212 |
+
# - sensitivity=0.5 → penalty=0.07 (balanceado, recomendado)
|
| 213 |
+
# - sensitivity=1.0 → penalty=0.01 (permisivo, más clusters)
|
| 214 |
+
penalty = 0.14 - (sensitivity * 0.13)
|
| 215 |
+
adjusted_score = score - (n_clusters * penalty)
|
| 216 |
|
| 217 |
if adjusted_score > best_score:
|
| 218 |
best_score = adjusted_score
|
|
|
|
| 221 |
pass # Si falla el cálculo, ignorar esta configuración
|
| 222 |
|
| 223 |
# Usar el número óptimo de clusters encontrado
|
| 224 |
+
penalty = 0.14 - (sensitivity * 0.13)
|
| 225 |
+
print(f"Clustering óptimo: {best_n_clusters} clusters (de máximo {max_groups}), sensitivity={sensitivity:.2f}, penalty={penalty:.3f}, silhouette={best_score:.3f}")
|
| 226 |
labels = fcluster(Z, t=best_n_clusters, criterion='maxclust')
|
| 227 |
|
| 228 |
# fcluster devuelve labels 1-indexed, convertir a 0-indexed
|
|
|
|
| 262 |
video: UploadFile = File(...),
|
| 263 |
max_groups: int = Form(default=3),
|
| 264 |
min_cluster_size: int = Form(default=3),
|
| 265 |
+
face_sensitivity: float = Form(default=0.5),
|
| 266 |
voice_max_groups: int = Form(default=3),
|
| 267 |
voice_min_cluster_size: int = Form(default=3),
|
| 268 |
+
voice_sensitivity: float = Form(default=0.5),
|
| 269 |
max_frames: int = Form(default=100),
|
| 270 |
):
|
| 271 |
"""
|
|
|
|
| 289 |
"video_name": video_name,
|
| 290 |
"max_groups": int(max_groups),
|
| 291 |
"min_cluster_size": int(min_cluster_size),
|
| 292 |
+
"face_sensitivity": float(face_sensitivity),
|
| 293 |
"voice_max_groups": int(voice_max_groups),
|
| 294 |
"voice_min_cluster_size": int(voice_min_cluster_size),
|
| 295 |
+
"voice_sensitivity": float(voice_sensitivity),
|
| 296 |
"max_frames": int(max_frames),
|
| 297 |
"created_at": datetime.now().isoformat(),
|
| 298 |
"results": None,
|
|
|
|
| 369 |
video_name = job["video_name"]
|
| 370 |
max_groups = int(job.get("max_groups", 5))
|
| 371 |
min_cluster_size = int(job.get("min_cluster_size", 3))
|
| 372 |
+
face_sensitivity = float(job.get("face_sensitivity", 0.5))
|
| 373 |
v_max_groups = int(job.get("voice_max_groups", 5))
|
| 374 |
v_min_cluster = int(job.get("voice_min_cluster_size", 3))
|
| 375 |
+
voice_sensitivity = float(job.get("voice_sensitivity", 0.5))
|
| 376 |
|
| 377 |
# Crear estructura de carpetas
|
| 378 |
base = TEMP_ROOT / video_name
|
|
|
|
| 534 |
# Clustering jerárquico de caras
|
| 535 |
if embeddings:
|
| 536 |
Xf = np.array(embeddings)
|
| 537 |
+
labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size, face_sensitivity).tolist()
|
| 538 |
print(f"[{job_id}] Clustering jerárquico de caras: {len(set([l for l in labels if l >= 0]))} clusters")
|
| 539 |
else:
|
| 540 |
labels = []
|
|
|
|
| 794 |
if voice_embeddings:
|
| 795 |
try:
|
| 796 |
Xv = np.array(voice_embeddings)
|
| 797 |
+
v_labels = hierarchical_cluster_with_min_size(Xv, v_max_groups, v_min_cluster, voice_sensitivity).tolist()
|
| 798 |
print(f"[{job_id}] Clustering jerárquico de voz: {len(set([l for l in v_labels if l >= 0]))} clusters")
|
| 799 |
except Exception as _e:
|
| 800 |
print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
|
|
|
|
| 1007 |
video: UploadFile = File(...),
|
| 1008 |
max_groups: int = Form(default=3),
|
| 1009 |
min_cluster_size: int = Form(default=3),
|
| 1010 |
+
scene_sensitivity: float = Form(default=0.5),
|
| 1011 |
frame_interval_sec: float = Form(default=0.5),
|
| 1012 |
):
|
| 1013 |
"""
|
|
|
|
| 1059 |
return {"scene_clusters": []}
|
| 1060 |
|
| 1061 |
X = np.array(frames)
|
| 1062 |
+
labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size, scene_sensitivity).tolist()
|
| 1063 |
initial_clusters = len(set([l for l in labels if l >= 0]))
|
| 1064 |
print(f"Scene clustering jeràrquic inicial: {initial_clusters} clusters")
|
| 1065 |
|