VeuReu commited on
Commit
da15379
verified
1 Parent(s): bd1e4fb

Upload api.py

Browse files
Files changed (1) hide show
  1. api.py +55 -1
api.py CHANGED
@@ -1012,7 +1012,8 @@ async def detect_scenes(
1012
 
1013
  X = np.array(frames)
1014
  labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size).tolist()
1015
- print(f"Scene clustering jer脿rquic: {len(set([l for l in labels if l >= 0]))} clusters")
 
1016
 
1017
  # Agrupar per etiqueta (>=0)
1018
  clusters = {}
@@ -1020,6 +1021,59 @@ async def detect_scenes(
1020
  if lbl is None or lbl < 0:
1021
  continue
1022
  clusters.setdefault(int(lbl), []).append(i)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1023
 
1024
  # Escriure imatges representatives per a cada cl煤ster
1025
  base = TEMP_ROOT / video_name / "scenes"
 
1012
 
1013
  X = np.array(frames)
1014
  labels = hierarchical_cluster_with_min_size(X, max_groups, min_cluster_size).tolist()
1015
+ initial_clusters = len(set([l for l in labels if l >= 0]))
1016
+ print(f"Scene clustering jer脿rquic inicial: {initial_clusters} clusters")
1017
 
1018
  # Agrupar per etiqueta (>=0)
1019
  clusters = {}
 
1021
  if lbl is None or lbl < 0:
1022
  continue
1023
  clusters.setdefault(int(lbl), []).append(i)
1024
+
1025
+ # VALIDACI脫: Mesurar robustesa dels clusters i fusionar si s贸n massa similars
1026
+ # Calcular centroides (histograma promig de cada cluster)
1027
+ centroids = {}
1028
+ for lbl, idxs in clusters.items():
1029
+ cluster_histograms = X[idxs]
1030
+ centroids[lbl] = np.mean(cluster_histograms, axis=0)
1031
+
1032
+ # Comparar dist脿ncies entre clusters
1033
+ # Si dos clusters tenen una dist脿ncia euclidiana < threshold, s贸n massa similars
1034
+ SIMILARITY_THRESHOLD = 0.15 # Ajustable: m茅s baix = m茅s estricte
1035
+
1036
+ # Calcular matriu de dist脿ncies entre centroides
1037
+ cluster_labels = sorted(centroids.keys())
1038
+ distances = {}
1039
+ for i, lbl1 in enumerate(cluster_labels):
1040
+ for lbl2 in cluster_labels[i+1:]:
1041
+ dist = np.linalg.norm(centroids[lbl1] - centroids[lbl2])
1042
+ distances[(lbl1, lbl2)] = dist
1043
+
1044
+ # Trobar parelles de clusters massa similars i fusionar-los
1045
+ merged = {} # mapatge de label_old -> label_new
1046
+ for lbl in cluster_labels:
1047
+ merged[lbl] = lbl
1048
+
1049
+ # Fusionar clusters similars (greedy approach)
1050
+ for (lbl1, lbl2), dist in sorted(distances.items(), key=lambda x: x[1]):
1051
+ if dist < SIMILARITY_THRESHOLD:
1052
+ # Fusionar lbl2 amb lbl1
1053
+ current_lbl1 = merged.get(lbl1, lbl1)
1054
+ current_lbl2 = merged.get(lbl2, lbl2)
1055
+ if current_lbl1 != current_lbl2:
1056
+ # Assignar lbl2 al grup de lbl1
1057
+ for k, v in merged.items():
1058
+ if v == current_lbl2:
1059
+ merged[k] = current_lbl1
1060
+ print(f"[SCENE VALIDATION] Fusionant clusters {lbl2} i {lbl1} (dist脿ncia={dist:.3f})")
1061
+
1062
+ # Aplicar fusi贸 als clusters
1063
+ new_clusters = {}
1064
+ for lbl, idxs in clusters.items():
1065
+ new_lbl = merged[lbl]
1066
+ if new_lbl not in new_clusters:
1067
+ new_clusters[new_lbl] = []
1068
+ new_clusters[new_lbl].extend(idxs)
1069
+
1070
+ clusters = new_clusters
1071
+ final_clusters = len(clusters)
1072
+ eliminated = initial_clusters - final_clusters
1073
+
1074
+ if eliminated > 0:
1075
+ print(f"[SCENE VALIDATION] Redu茂t de {initial_clusters} a {final_clusters} clusters "
1076
+ f"(eliminats {eliminated} clusters massa similars)")
1077
 
1078
  # Escriure imatges representatives per a cada cl煤ster
1079
  base = TEMP_ROOT / video_name / "scenes"