Update preprocessing_router.py
Browse files- preprocessing_router.py +38 -0
preprocessing_router.py
CHANGED
|
@@ -25,6 +25,7 @@ from sklearn.cluster import KMeans
|
|
| 25 |
from sklearn.neighbors import KNeighborsClassifier
|
| 26 |
|
| 27 |
from svision_client import get_face_embeddings_simple
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
ROOT = Path("/tmp/veureu")
|
|
@@ -269,6 +270,43 @@ async def finalize_casting(
|
|
| 269 |
print("-" * 30)
|
| 270 |
print("="*50 + "\n")
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
print(casting_json)
|
| 273 |
|
| 274 |
MEDIA_ROOT = _P("/data/media")
|
|
|
|
| 25 |
from sklearn.neighbors import KNeighborsClassifier
|
| 26 |
|
| 27 |
from svision_client import get_face_embeddings_simple
|
| 28 |
+
from asr_client import get_voice_embedding
|
| 29 |
|
| 30 |
|
| 31 |
ROOT = Path("/tmp/veureu")
|
|
|
|
| 270 |
print("-" * 30)
|
| 271 |
print("="*50 + "\n")
|
| 272 |
|
| 273 |
+
print("\n" + "="*50)
|
| 274 |
+
print("DEBUG: PROCESANDO VOCES")
|
| 275 |
+
print("="*50)
|
| 276 |
+
|
| 277 |
+
for v_idx, cluster in enumerate(voice_clusters):
|
| 278 |
+
v_name = cluster.get("name", f"Voz_{v_idx}")
|
| 279 |
+
v_folder = cluster.get("folder", "")
|
| 280 |
+
v_files = cluster.get("files", [])
|
| 281 |
+
|
| 282 |
+
print(f"🔊 Voz {v_idx+1}: {v_name}")
|
| 283 |
+
print(f" 📂 Carpeta origen: {v_folder}")
|
| 284 |
+
print(f" 🎵 Audios seleccionados ({len(v_files)}):")
|
| 285 |
+
|
| 286 |
+
for f in v_files:
|
| 287 |
+
f_name = Path(f).name # seg_0001.wav
|
| 288 |
+
f_path = Path(v_folder) / f_name
|
| 289 |
+
|
| 290 |
+
if not f_path.exists():
|
| 291 |
+
print(f"❌ NO EXISTE: {f_path}")
|
| 292 |
+
continue
|
| 293 |
+
|
| 294 |
+
# ⬇️ AQUÍ tu función de embeddings de audio
|
| 295 |
+
v_emb = get_voice_embedding(str(f_path))
|
| 296 |
+
|
| 297 |
+
print(f" - original: {f}")
|
| 298 |
+
print(f" - resolved: {f_path}")
|
| 299 |
+
print(f" - emb: {'OK' if v_emb else 'VACÍO'}")
|
| 300 |
+
|
| 301 |
+
if v_emb:
|
| 302 |
+
casting_json["voice_col"].append({
|
| 303 |
+
"nombre": v_name,
|
| 304 |
+
"embedding": v_emb,
|
| 305 |
+
})
|
| 306 |
+
|
| 307 |
+
print("-" * 30)
|
| 308 |
+
|
| 309 |
+
|
| 310 |
print(casting_json)
|
| 311 |
|
| 312 |
MEDIA_ROOT = _P("/data/media")
|