File size: 41,213 Bytes
612098a
 
 
 
 
 
 
 
 
 
 
 
 
5a05e40
612098a
 
 
 
 
 
 
 
 
a72be74
 
 
b1de1ba
eed1e35
b1de1ba
612098a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a05e40
 
 
 
 
 
 
 
 
 
612098a
 
 
 
 
 
 
 
 
 
8a377f7
612098a
8a377f7
8fea277
 
 
 
b1de1ba
 
8fea277
 
 
 
 
 
 
 
 
 
acbbc10
 
b3e9eae
 
 
b1de1ba
8fea277
b3e9eae
72fe2c9
 
 
 
 
b1de1ba
8fea277
 
 
91b4928
 
eed1e35
 
a7c648c
437b633
eed1e35
736284d
a7c648c
eed1e35
437b633
eed1e35
437b633
 
eed1e35
 
 
 
 
437b633
eed1e35
 
437b633
eed1e35
 
 
 
 
 
 
 
 
 
 
b1de1ba
 
5a05e40
afd3092
612098a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
038571f
eaeb806
612098a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a72be74
 
 
 
 
 
 
612098a
d5bf8e1
a72be74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612098a
 
 
 
 
 
 
 
 
 
 
 
 
8b0b387
612098a
8b0b387
612098a
 
 
 
 
 
6a45b56
612098a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
from __future__ import annotations

from fastapi import APIRouter, UploadFile, File, Form, BackgroundTasks, HTTPException, Body
from fastapi.responses import FileResponse
from datetime import datetime
from enum import Enum
from typing import Dict, Any, List
import shutil
import os
import uuid
import numpy as np
import cv2
import tempfile
from pathlib import Path

from casting_loader import ensure_chroma, build_faces_index, build_voices_index
from llm_router import load_yaml, LLMRouter
from storage.media_routers import upload_video

# External space clients (no local GPU needed)
import svision_client
import asr_client

from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier

from svision_client import get_face_embeddings_simple
from asr_client import get_voice_embedding


ROOT = Path("/tmp/veureu")
ROOT.mkdir(parents=True, exist_ok=True)
TEMP_ROOT = Path("/tmp/temp")
TEMP_ROOT.mkdir(parents=True, exist_ok=True)
VIDEOS_ROOT = Path("/tmp/data/videos")
VIDEOS_ROOT.mkdir(parents=True, exist_ok=True)
IDENTITIES_ROOT = Path("/tmp/characters")
IDENTITIES_ROOT.mkdir(parents=True, exist_ok=True)
VEUREU_TOKEN = os.getenv("VEUREU_TOKEN")


class JobStatus(str, Enum):
    QUEUED = "queued"
    PROCESSING = "processing"
    DONE = "done"
    FAILED = "failed"


jobs: Dict[str, dict] = {}


# ---------------------------------------------------------------------------
# Helper function for clustering (only math, no GPU)
# ---------------------------------------------------------------------------

def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int, sensitivity: float = 0.5) -> np.ndarray:
    """Hierarchical clustering using only min_cluster_size and k-target (max_groups).

    - Primero intenta crear el máximo número posible de clusters con al menos
      ``min_cluster_size`` elementos.
    - Después fusiona implícitamente (bajando el número de clusters) hasta
      llegar a un número de clusters válidos (tamaño >= min_cluster_size)
      menor o igual que ``max_groups``.

    ``sensitivity`` se mantiene en la firma por compatibilidad, pero no se usa.
    """
    from scipy.cluster.hierarchy import linkage, fcluster
    from collections import Counter

    n_samples = len(X)
    if n_samples == 0:
        return np.array([])

    # Si no hay suficientes muestras para formar un solo cluster válido,
    # marcamos todo como ruido (-1).
    if n_samples < min_cluster_size:
        return np.full(n_samples, -1, dtype=int)

    # k_target = max_groups (interpretamos este parámetro como k-Target)
    k_target = max(0, int(max_groups))

    # Caso especial: k_target == 0 => no queremos clusters, todo ruido.
    if k_target == 0:
        return np.full(n_samples, -1, dtype=int)

    # Enlace jerárquico una sola vez
    Z = linkage(X, method="average", metric="cosine")

    # Máximo número de clusters posibles respetando min_cluster_size
    max_possible = n_samples // min_cluster_size
    if max_possible <= 0:
        return np.full(n_samples, -1, dtype=int)

    max_to_try = min(max_possible, n_samples)

    best_labels = np.full(n_samples, -1, dtype=int)

    # Recorremos de más clusters a menos, buscando la primera solución
    # que tenga entre 1 y k_target clusters válidos.
    for n_clusters in range(max_to_try, 0, -1):
        trial_labels = fcluster(Z, t=n_clusters, criterion="maxclust") - 1
        counts = Counter(trial_labels)

        # Clusters con tamaño suficiente
        valid_clusters = {lbl for lbl, cnt in counts.items() if cnt >= min_cluster_size}
        num_valid = len(valid_clusters)

        if num_valid == 0:
            # Demasiado fino, todos los clusters son demasiado pequeños
            continue

        if num_valid <= k_target:
            # Aceptamos esta solución
            final_labels = []
            for lbl in trial_labels:
                if lbl in valid_clusters:
                    final_labels.append(lbl)
                else:
                    final_labels.append(-1)
            best_labels = np.array(final_labels, dtype=int)
            break

    return best_labels


router = APIRouter(tags=["Preprocessing Manager"])


@router.post("/create_initial_casting")
async def create_initial_casting(
    background_tasks: BackgroundTasks,
    video: UploadFile = File(...),
    max_groups: int = Form(default=3),
    min_cluster_size: int = Form(default=3),
    face_sensitivity: float = Form(default=0.5),
    voice_max_groups: int = Form(default=3),
    voice_min_cluster_size: int = Form(default=3),
    voice_sensitivity: float = Form(default=0.5),
    max_frames: int = Form(default=100),
):
    video_name = Path(video.filename).stem
    dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
    with dst_video.open("wb") as f:
        shutil.copyfileobj(video.file, f)

    upload_video(video, VEUREU_TOKEN)
    
    job_id = str(uuid.uuid4())

    jobs[job_id] = {
        "id": job_id,
        "status": JobStatus.QUEUED,
        "video_path": str(dst_video),
        "video_name": video_name,
        "max_groups": int(max_groups),
        "min_cluster_size": int(min_cluster_size),
        "face_sensitivity": float(face_sensitivity),
        "voice_max_groups": int(voice_max_groups),
        "voice_min_cluster_size": int(voice_min_cluster_size),
        "voice_sensitivity": float(voice_sensitivity),
        "max_frames": int(max_frames),
        "created_at": datetime.now().isoformat(),
        "results": None,
        "error": None,
    }

    print(f"[{job_id}] Job creado para vídeo: {video_name}")
    background_tasks.add_task(process_video_job, job_id)
    return {"job_id": job_id}


@router.get("/jobs/{job_id}/status")
def get_job_status(job_id: str):
    if job_id not in jobs:
        raise HTTPException(status_code=404, detail="Job not found")

    job = jobs[job_id]
    status_value = job["status"].value if isinstance(job["status"], JobStatus) else str(job["status"])
    response = {"status": status_value}

    if job.get("results") is not None:
        response["results"] = job["results"]
    if job.get("error"):
        response["error"] = job["error"]

    return response


@router.get("/files/{video_name}/{char_id}/{filename}")
def serve_character_file(video_name: str, char_id: str, filename: str):
    file_path = TEMP_ROOT / video_name / "characters" / char_id / filename
    if not file_path.exists():
        raise HTTPException(status_code=404, detail="File not found")
    return FileResponse(file_path)


@router.get("/audio/{video_name}/{filename}")
def serve_audio_file(video_name: str, filename: str):
    file_path = TEMP_ROOT / video_name / "clips" / filename
    if not file_path.exists():
        raise HTTPException(status_code=404, detail="File not found")
    return FileResponse(file_path)


@router.post("/load_casting")
async def load_casting(
    faces_dir: str = Form("identities/faces"),
    voices_dir: str = Form("identities/voices"),
    db_dir: str = Form("chroma_db"),
    drop_collections: bool = Form(False),
):
    client = ensure_chroma(Path(db_dir))
    n_faces = build_faces_index(Path(faces_dir), client, collection_name="index_faces", drop=drop_collections)
    n_voices = build_voices_index(Path(voices_dir), client, collection_name="index_voices", drop=drop_collections)
    return {"ok": True, "faces": n_faces, "voices": n_voices}


from pathlib import Path
def find_video_hash(filename: str, media_root) -> str | None:
    for hash_dir in media_root.iterdir():
        if hash_dir.is_dir():
            clips_dir = hash_dir / "clips"
            video_path = clips_dir / filename
            if video_path.exists():
                return hash_dir.name
    return None

@router.post("/finalize_casting")
async def finalize_casting(
    payload: dict = Body(...),
):
    import shutil as _sh
    from pathlib import Path as _P

    video_name = payload.get("video_name")
    base_dir = payload.get("base_dir")
    characters = payload.get("characters", []) or []
    video_hash = payload.get("video_hash") or "empty"
    voice_clusters = payload.get("voice_clusters", []) or []

    # --- DEBUG DE PERSONAJES RECIBIDOS ---
    print("\n" + "="*50)
    print(f"DEBUG: RECIBIENDO PERSONAJES PARA EL VÍDEO: {video_name}")
    print("="*50)

    casting_json = {"face_col": [], "voice_col": []}
    
    for idx, char in enumerate(characters):
        c_name = char.get("name", "Sin nombre")
        c_folder = char.get("folder", "Sin carpeta")
        c_files = char.get("kept_files", [])
        
        print(f"👤 Personaje {idx+1}: {c_name}")
        print(f"   📂 Carpeta origen: {c_folder}")
        print(f"   🖼️ Archivos seleccionados ({len(c_files)}):")
        for f in c_files:
            f_name = Path(f).name  # face_000042_000.jpg
            f_path = Path(c_folder) / f_name
            
            emb = get_face_embeddings_simple(str(f_path))

            print(emb)
            print(f"      - {f}")
            print(f"      - {f_path}")
            if emb:
                casting_json["face_col"].append({
                    "nombre": c_name,
                    "embedding": emb[0],
                })
            
        print("-" * 30)
    print("="*50 + "\n")

    print(voice_clusters)

    for v_idx, cluster in enumerate(voice_clusters):
        v_name = cluster.get("name", f"Voz_{v_idx}")
        label = cluster.get("label", v_idx)  # <-- aquí definimos label
        clips = cluster.get("clips", [])
    
        AUDIO_BASE_FOLDER = f"/tmp/temp/{video_name}/clips"  # ahora funciona

        print(f"🔊 Voz {v_idx+1}: {v_name}")
        print(f"   🎵 Clips seleccionados ({len(clips)}):")
    
        for clip_name in clips:
            f_path = Path(AUDIO_BASE_FOLDER) / clip_name
    
            if not f_path.exists():
                print(f"❌ NO EXISTE: {f_path}")
                continue
    
            # Llamada a tu función de embedding
            v_emb = get_voice_embedding(str(f_path))
    
            print(f"      - clip: {clip_name}")
            print(f"      - resolved: {f_path}")
            print(f"      - emb: {'OK' if v_emb else 'VACÍO'}")
    
            if v_emb:
                casting_json["voice_col"].append({
                    "nombre": v_name,
                    "embedding": v_emb,
                })
    
        print("-" * 30)

    print(casting_json)

    MEDIA_ROOT = _P("/data/media")
    video_hash = find_video_hash(video_name+".mp4",MEDIA_ROOT)
    if not video_name or not base_dir:
        raise HTTPException(status_code=400, detail="Missing video_name or base_dir")

    faces_out = IDENTITIES_ROOT / video_name / "faces"
    voices_out = IDENTITIES_ROOT / video_name / "voices"
    faces_out.mkdir(parents=True, exist_ok=True)
    voices_out.mkdir(parents=True, exist_ok=True)

    for ch in characters:
        ch_name = (ch.get("name") or "Unknown").strip() or "Unknown"
        ch_folder = ch.get("folder")
        kept = ch.get("kept_files") or []
        if not ch_folder or not os.path.isdir(ch_folder):
            continue
        dst_dir = faces_out / ch_name
        dst_dir.mkdir(parents=True, exist_ok=True)
        for fname in kept:
            src = _P(ch_folder) / fname
            if src.exists() and src.is_file():
                try:
                    _sh.copy2(src, dst_dir / fname)
                except Exception:
                    pass

    clips_dir = _P(base_dir) / "clips"
    for vc in voice_clusters:
        v_name = (vc.get("name") or f"SPEAKER_{int(vc.get('label',0)):02d}").strip()
        dst_dir = voices_out / v_name
        dst_dir.mkdir(parents=True, exist_ok=True)
        for wav in (vc.get("clips") or []):
            src = clips_dir / wav
            if src.exists() and src.is_file():
                try:
                    _sh.copy2(src, dst_dir / wav)
                except Exception:
                    pass

    db_dir = IDENTITIES_ROOT / video_name / "chroma_db"
    try:
        client = ensure_chroma(db_dir)
        n_faces = build_faces_index(
            faces_out,
            client,
            collection_name="index_faces",
            deepface_model="Facenet512",
            drop=True,
        )
        n_voices = build_voices_index(
            voices_out,
            client,
            collection_name="index_voices",
            drop=True,
        )
    except Exception as e:
        print(f"[finalize_casting] WARN - No se pudieron construir índices ChromaDB: {e}")
        n_faces = 0
        n_voices = 0

    face_identities = sorted([p.name for p in faces_out.iterdir() if p.is_dir()]) if faces_out.exists() else []
    voice_identities = sorted([p.name for p in voices_out.iterdir() if p.is_dir()]) if voices_out.exists() else []

    return {
        "ok": True,
        "video_name": video_name,
        "faces_dir": str(faces_out),
        "voices_dir": str(voices_out),
        "db_dir": str(db_dir),
        "n_faces_embeddings": n_faces,
        "n_voices_embeddings": n_voices,
        "face_identities": face_identities,
        "voice_identities": voice_identities,
        "casting_json": casting_json,
    }


@router.get("/files_scene/{video_name}/{scene_id}/{filename}")
def serve_scene_file(video_name: str, scene_id: str, filename: str):
    file_path = TEMP_ROOT / video_name / "scenes" / scene_id / filename
    if not file_path.exists():
        raise HTTPException(status_code=404, detail="File not found")
    return FileResponse(file_path)


@router.post("/detect_scenes")
async def detect_scenes(
    video: UploadFile = File(...),
    max_groups: int = Form(default=3),
    min_cluster_size: int = Form(default=3),
    scene_sensitivity: float = Form(default=0.5),
    frame_interval_sec: float = Form(default=0.5),  # mantenido por compatibilidad, no se usa
    max_frames: int = Form(default=100),
):
    """Detecta escenas usando frames equiespaciados del vídeo y clustering jerárquico.

    - Extrae ``max_frames`` fotogramas equiespaciados del vídeo original.
    - Descarta frames negros o muy oscuros antes de construir el histograma.
    - Representa cada frame por un histograma de color 3D (8x8x8) normalizado
      dividiendo por la media (si el histograma es todo ceros o la media es 0,
      se descarta el frame).
    - Aplica ``hierarchical_cluster_with_min_size`` igual que para cares i veus.
    """

    video_name = Path(video.filename).stem
    dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
    with dst_video.open("wb") as f:
        shutil.copyfileobj(video.file, f)

    try:
        print(f"[detect_scenes] Extrayendo frames equiespaciados de {video_name}...")

        cap = cv2.VideoCapture(str(dst_video))
        if not cap.isOpened():
            raise RuntimeError("No se pudo abrir el vídeo para detectar escenas")

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
        if total_frames <= 0:
            cap.release()
            print("[detect_scenes] total_frames <= 0")
            return {"scene_clusters": []}

        n_samples = max(1, min(int(max_frames), total_frames))
        frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=n_samples, dtype=int).tolist()))
        print(f"[detect_scenes] Total frames: {total_frames}, muestreando {len(frame_indices)} frames")

        # Create base directory for scenes
        base = TEMP_ROOT / video_name
        scenes_dir = base / "scenes"
        scenes_dir.mkdir(parents=True, exist_ok=True)

        # ------------------------------------------------------------------
        # STEP 1: Guardar frames y construir embeddings sencillos (histogramas)
        # ------------------------------------------------------------------
        keyframe_paths: List[Path] = []
        keyframe_infos: List[dict] = []
        features: List[np.ndarray] = []

        for i, frame_idx in enumerate(frame_indices):
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
            ret, frame = cap.read()
            if not ret:
                continue

            # Filtrar frames negros o muy oscuros (umbral sobre la media de intensidad)
            # Trabajamos en escala de grises para evaluar brillo global.
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            mean_intensity = float(gray.mean())
            if mean_intensity < 5.0:
                # Frame negro o casi negro, lo descartamos
                continue

            local_keyframe = scenes_dir / f"keyframe_{frame_idx:06d}.jpg"
            try:
                cv2.imwrite(str(local_keyframe), frame)
            except Exception as werr:
                print(f"[detect_scenes] Error guardando frame {frame_idx}: {werr}")
                continue

            try:
                # Histograma de color 8x8x8 en RGB
                img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                hist = cv2.calcHist(
                    [img_rgb], [0, 1, 2], None,
                    [8, 8, 8], [0, 256, 0, 256, 0, 256]
                ).astype("float32").flatten()

                if not np.any(hist):
                    # Todo ceros, descartamos
                    continue

                mean_val = float(hist.mean())
                if mean_val <= 0.0:
                    # Media cero o negativa, descartamos
                    continue

                hist /= mean_val
                features.append(hist)
            except Exception as fe_err:
                print(f"[detect_scenes] Error calculando embedding para frame {frame_idx}: {fe_err}")
                continue

            keyframe_paths.append(local_keyframe)
            # Como no tenemos frames_info de svision, usamos el índice de frame
            info = {"start": int(frame_idx), "end": int(frame_idx) + 1}
            keyframe_infos.append(info)

        cap.release()

        if not features or len(features) < min_cluster_size:
            print(
                f"[detect_scenes] No hay suficientes frames válidos para clusterizar escenas: "
                f"validos={len(features)}, min_cluster_size={min_cluster_size}"
            )
            return {"scene_clusters": []}

        Xs = np.vstack(features)

        # ------------------------------------------------------------------
        # STEP 2: Clustering jerárquico de escenas (k-Target + mida mínima)
        # ------------------------------------------------------------------
        print("[detect_scenes] Clustering jerárquico de escenas...")
        scene_labels = hierarchical_cluster_with_min_size(Xs, max_groups, min_cluster_size, 0.5)
        unique_labels = sorted({int(l) for l in scene_labels if int(l) >= 0})
        print(f"[detect_scenes] Etiquetas de escena válidas: {unique_labels}")

        # Mapear índices de keyframes a clusters
        cluster_map: Dict[int, List[int]] = {}
        for idx, lbl in enumerate(scene_labels):
            lbl = int(lbl)
            if lbl >= 0:
                cluster_map.setdefault(lbl, []).append(idx)

        # ------------------------------------------------------------------
        # STEP 3: Construir scene_clusters con el formato esperado por el demo
        # ------------------------------------------------------------------
        scene_clusters: List[Dict[str, Any]] = []
        for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
            if not idxs:
                continue

            scene_id = f"scene_{ci:02d}"
            scene_out_dir = scenes_dir / scene_id
            scene_out_dir.mkdir(parents=True, exist_ok=True)

            # Copiar todos los keyframes del cluster a la carpeta del cluster
            cluster_start = None
            cluster_end = None
            representative_file = None

            for j, k_idx in enumerate(idxs):
                src = keyframe_paths[k_idx]
                dst = scene_out_dir / src.name
                try:
                    shutil.copy2(src, dst)
                except Exception as cp_err:
                    print(f"[detect_scenes] Error copiando keyframe {src} a cluster {scene_id}: {cp_err}")
                    continue

                if representative_file is None:
                    representative_file = dst

                info = keyframe_infos[k_idx]
                start = info.get("start", k_idx)
                end = info.get("end", k_idx + 1)
                cluster_start = start if cluster_start is None else min(cluster_start, start)
                cluster_end = end if cluster_end is None else max(cluster_end, end)

            if representative_file is None:
                continue

            scene_clusters.append({
                "id": scene_id,
                "name": f"Escena {len(scene_clusters)+1}",
                "folder": str(scene_out_dir),
                "image_url": f"/files_scene/{video_name}/{scene_id}/{representative_file.name}",
                "start_time": float(cluster_start) if cluster_start is not None else 0.0,
                "end_time": float(cluster_end) if cluster_end is not None else 0.0,
            })

        print(f"[detect_scenes]  {len(scene_clusters)} escenes clusteritzades")
        return {"scene_clusters": scene_clusters}

    except Exception as e:
        print(f"[detect_scenes] Error: {e}")
        import traceback
        traceback.print_exc()
        return {"scene_clusters": [], "error": str(e)}


def process_video_job(job_id: str):
    """
    Process video job in background using EXTERNAL spaces (svision, asr).
    
    NO local GPU needed - all vision/audio processing is delegated to:
    - svision: face detection + embeddings (MTCNN + FaceNet)
    - asr: audio diarization + voice embeddings (pyannote + ECAPA)
    
    Engine only does: frame extraction, clustering (math), file organization.
    """
    try:
        job = jobs[job_id]
        print(f"[{job_id}] Iniciando procesamiento (delegando a svision/asr)...")

        job["status"] = JobStatus.PROCESSING

        video_path = job["video_path"]
        video_name = job["video_name"]
        max_groups = int(job.get("max_groups", 5))
        min_cluster_size = int(job.get("min_cluster_size", 3))
        face_sensitivity = float(job.get("face_sensitivity", 0.5))

        base = TEMP_ROOT / video_name
        base.mkdir(parents=True, exist_ok=True)
        print(f"[{job_id}] Directorio base: {base}")

        try:
            # ============================================================
            # STEP 1: Extract frames from video (local, simple cv2)
            # ============================================================
            print(f"[{job_id}] Extrayendo frames del vídeo...")
            
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise RuntimeError("No se pudo abrir el vídeo")
            
            fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
            max_samples = job.get("max_frames", 100)

            if total_frames > 0:
                frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=min(max_samples, max(1, total_frames)), dtype=int).tolist()))
            else:
                frame_indices = []
            
            print(f"[{job_id}] Total frames: {total_frames}, FPS: {fps:.2f}, Muestreando {len(frame_indices)} frames")

            # Save frames temporarily for svision processing
            frames_dir = base / "frames_temp"
            frames_dir.mkdir(parents=True, exist_ok=True)
            faces_root = base / "faces_raw"
            faces_root.mkdir(parents=True, exist_ok=True)

            frame_paths: List[str] = []
            for frame_idx in frame_indices:
                cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
                ret, frame = cap.read()
                if not ret:
                    continue
                frame_path = frames_dir / f"frame_{frame_idx:06d}.jpg"
                cv2.imwrite(str(frame_path), frame)
                frame_paths.append(str(frame_path))
            cap.release()
            
            print(f"[{job_id}] ✓ {len(frame_paths)} frames extraídos")

            # ============================================================
            # STEP 2: Send frames to SVISION for face detection + embeddings
            # ============================================================
            print(f"[{job_id}] Enviando frames a svision para detección de caras...")
            
            embeddings: List[List[float]] = []
            crops_meta: List[dict] = []
            saved_count = 0
            frames_with_faces = 0

            for i, frame_path in enumerate(frame_paths):
                frame_idx = frame_indices[i] if i < len(frame_indices) else i
                try:
                    # Call svision to get faces + embeddings
                    faces = svision_client.get_face_embeddings_from_image(frame_path)
                    
                    if faces:
                        frames_with_faces += 1
                        for face_data in faces:
                            emb = face_data.get("embedding", [])
                            if not emb:
                                continue
                            
                            # Normalize embedding
                            emb = np.array(emb, dtype=float)
                            emb = emb / (np.linalg.norm(emb) + 1e-9)
                            embeddings.append(emb.tolist())
                            
                            # Save face crop if provided by svision
                            crop_path = face_data.get("face_crop_path")
                            fn = f"face_{frame_idx:06d}_{saved_count:03d}.jpg"
                            local_crop_path = faces_root / fn
                            
                            crop_saved = False
                            if crop_path:
                                # Handle remote URLs from svision (Gradio)
                                if isinstance(crop_path, str) and crop_path.startswith("http"):
                                    try:
                                        import requests
                                        resp = requests.get(crop_path, timeout=30)
                                        if resp.status_code == 200:
                                            with open(local_crop_path, "wb") as f:
                                                f.write(resp.content)
                                            crop_saved = True
                                    except Exception as dl_err:
                                        print(f"[{job_id}] Error descargando crop: {dl_err}")
                                # Handle local paths
                                elif isinstance(crop_path, str) and os.path.exists(crop_path):
                                    shutil.copy2(crop_path, local_crop_path)
                                    crop_saved = True
                            
                            if not crop_saved:
                                # If no crop from svision, use original frame
                                shutil.copy2(frame_path, local_crop_path)
                            
                            crops_meta.append({
                                "file": fn,
                                "frame": frame_idx,
                                "index": face_data.get("index", saved_count),
                            })
                            saved_count += 1
                            
                except Exception as e:
                    print(f"[{job_id}] Error procesando frame {frame_idx}: {e}")
                    continue

            print(f"[{job_id}] ✓ Frames con caras: {frames_with_faces}/{len(frame_paths)}")
            print(f"[{job_id}] ✓ Caras detectadas: {len(embeddings)}")

            # ============================================================
            # STEP 3: Clustering (local, only math - no GPU)
            # ============================================================
            if embeddings:
                print(f"[{job_id}] Clustering jerárquico...")
                Xf = np.array(embeddings)
                labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size, face_sensitivity).tolist()
                n_clusters = len(set([l for l in labels if l >= 0]))
                print(f"[{job_id}] ✓ Clustering: {n_clusters} clusters")
            else:
                labels = []

            # ============================================================
            # STEP 4: Organize faces into character folders
            # ============================================================
            characters: List[Dict[str, Any]] = []
            cluster_map: Dict[int, List[int]] = {}
            for idx, lbl in enumerate(labels):
                if isinstance(lbl, int) and lbl >= 0:
                    cluster_map.setdefault(lbl, []).append(idx)

            chars_dir = base / "characters"
            chars_dir.mkdir(parents=True, exist_ok=True)

            print(f"[{job_id}] cluster_map: {cluster_map}")
            print(f"[{job_id}] crops_meta count: {len(crops_meta)}")
            print(f"[{job_id}] faces_root: {faces_root}, exists: {faces_root.exists()}")
            if faces_root.exists():
                existing_files = list(faces_root.glob("*"))
                print(f"[{job_id}] Files in faces_root: {len(existing_files)}")
                for ef in existing_files[:5]:
                    print(f"[{job_id}]   - {ef.name}")
            
            for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
                char_id = f"char_{ci:02d}"
                print(f"[{job_id}] Processing cluster {char_id} with {len(idxs)} indices: {idxs[:5]}...")
                
                if not idxs:
                    continue

                out_dir = chars_dir / char_id
                out_dir.mkdir(parents=True, exist_ok=True)

                # Select faces to show (half + 1)
                total_faces = len(idxs)
                max_faces_to_show = (total_faces // 2) + 1
                selected_idxs = idxs[:max_faces_to_show]

                files: List[str] = []
                file_urls: List[str] = []
                
                for j in selected_idxs:
                    if j >= len(crops_meta):
                        print(f"[{job_id}]   Index {j} out of range (crops_meta len={len(crops_meta)})")
                        continue
                    meta = crops_meta[j]
                    fname = meta.get("file")
                    if not fname:
                        print(f"[{job_id}]   No filename in meta for index {j}")
                        continue
                    
                    src = faces_root / fname
                    dst = out_dir / fname
                    try:
                        if src.exists():
                            shutil.copy2(src, dst)
                            files.append(fname)
                            file_urls.append(f"/files/{video_name}/{char_id}/{fname}")
                        else:
                            print(f"[{job_id}]   Source file not found: {src}")
                    except Exception as cp_err:
                        print(f"[{job_id}]   Error copying {fname}: {cp_err}")

                # Create representative image
                rep = files[0] if files else None
                if rep:
                    try:
                        shutil.copy2(out_dir / rep, out_dir / "representative.jpg")
                    except Exception:
                        pass

                cluster_number = ci + 1
                character_name = f"Cluster {cluster_number}"

                characters.append({
                    "id": char_id,
                    "name": character_name,
                    "folder": str(out_dir),
                    "num_faces": len(files),
                    "total_faces_detected": total_faces,
                    "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
                    "face_files": file_urls,
                })
                print(f"[{job_id}] ✓ Cluster {char_id}: {len(files)} caras")

            # Cleanup temp frames
            try:
                shutil.rmtree(frames_dir)
            except Exception:
                pass

            print(f"[{job_id}] ✓ Total: {len(characters)} personajes")

            # ============================================================
            # STEP 5: Audio diarization + voice embeddings using ASR space
            # ============================================================
            voice_max_groups = int(job.get("voice_max_groups", 3))
            voice_min_cluster_size = int(job.get("voice_min_cluster_size", 3))
            voice_sensitivity = float(job.get("voice_sensitivity", 0.5))
            
            audio_segments: List[Dict[str, Any]] = []
            voice_labels: List[int] = []
            voice_embeddings: List[List[float]] = []
            diarization_info: Dict[str, Any] = {}
            
            print(f"[{job_id}] Procesando audio con ASR space...")
            try:
                # Extract audio and diarize
                diar_result = asr_client.extract_audio_and_diarize(video_path)
                clips = diar_result.get("clips", [])
                segments = diar_result.get("segments", [])
                
                print(f"[{job_id}] Diarización: {len(clips)} clips, {len(segments)} segmentos")
                
                # Save clips locally
                clips_dir = base / "clips"
                clips_dir.mkdir(parents=True, exist_ok=True)
                
                for i, clip_info in enumerate(clips if isinstance(clips, list) else []):
                    clip_path = clip_info if isinstance(clip_info, str) else clip_info.get("path") if isinstance(clip_info, dict) else None
                    if not clip_path:
                        continue
                    
                    # Download or copy clip
                    local_clip = clips_dir / f"segment_{i:03d}.wav"
                    try:
                        if isinstance(clip_path, str) and clip_path.startswith("http"):
                            import requests
                            resp = requests.get(clip_path, timeout=30)
                            if resp.status_code == 200:
                                with open(local_clip, "wb") as f:
                                    f.write(resp.content)
                        elif isinstance(clip_path, str) and os.path.exists(clip_path):
                            shutil.copy2(clip_path, local_clip)
                    except Exception as dl_err:
                        print(f"[{job_id}] Error guardando clip {i}: {dl_err}")
                        continue
                    
                    # Get segment info
                    seg_info = segments[i] if i < len(segments) else {}
                    speaker = seg_info.get("speaker", f"SPEAKER_{i:02d}")
                    
                    # Get voice embedding for this clip
                    emb = asr_client.get_voice_embedding(str(local_clip))
                    if emb:
                        voice_embeddings.append(emb)
                    
                    audio_segments.append({
                        "index": i,
                        "clip_path": str(local_clip),
                        "clip_url": f"/audio/{video_name}/segment_{i:03d}.wav",
                        "speaker": speaker,
                        "start": seg_info.get("start", 0),
                        "end": seg_info.get("end", 0),
                    })
                
                print(f"[{job_id}] \u2713 {len(audio_segments)} segmentos de audio procesados")
                
                # Cluster voice embeddings
                if voice_embeddings:
                    print(f"[{job_id}] Clustering KMeans+KNN de voz (forzado)...")
                    print(f"[{job_id}]   - voice_embeddings: {len(voice_embeddings)}")
                    print(f"[{job_id}]   - parámetros: grupos={voice_max_groups}, max_por_cluster={voice_min_cluster_size}")
                
                    # ------------------------------
                    # NORMALIZAR EMBEDDINGS
                    # ------------------------------
                    Xv = np.array(voice_embeddings)
                    Xv = Xv / np.linalg.norm(Xv, axis=1, keepdims=True)
                
                    N = len(Xv)
                    K = max(1, voice_max_groups)   # número mínimo de clusters
                    MAX_PER_CLUSTER = max(1, voice_min_cluster_size)
                
                    # ------------------------------
                    # STEP 1: KMEANS FORZADO
                    # ------------------------------
                    from sklearn.cluster import KMeans
                
                    km = KMeans(n_clusters=K, n_init=10, random_state=42)
                    labels = km.fit_predict(Xv)
                
                    print(f"[{job_id}]   - Inicial: {labels.tolist()}")
                
                    # ------------------------------
                    # STEP 2: REBALANCEO CON KNN SI HAY CLUSTERS SOBRECARGADOS
                    # ------------------------------
                    from sklearn.neighbors import KNeighborsClassifier
                
                    for iteration in range(10):  # máximo 10 ajustes
                        sizes = {c: np.sum(labels == c) for c in range(K)}
                        bad_clusters = [c for c, s in sizes.items() if s > MAX_PER_CLUSTER]
                
                        print(f"[{job_id}]   - Iter {iteration}: tamaños={sizes}")
                
                        if not bad_clusters:
                            break  # Todo OK, ningún cluster supera el límite
                
                        # Entrenar KNN usando SOLO clusters válidos
                        good_indices = []
                        for c in range(K):
                            idx = np.where(labels == c)[0]
                            if len(idx) <= MAX_PER_CLUSTER:
                                good_indices.extend(idx)
                
                        if len(good_indices) == 0:
                            print(f"[{job_id}]   - No hay clusters válidos para KNN, abortando rebalanceo.")
                            break
                
                        knn = KNeighborsClassifier(n_neighbors=min(3, len(good_indices)))
                        knn.fit(Xv[good_indices], labels[good_indices])
                
                        # Reasignar elementos excedentes
                        for c in bad_clusters:
                            idx = np.where(labels == c)[0]
                            excess = idx[MAX_PER_CLUSTER:]  # los que sobran
                
                            for i in excess:
                                new_lab = knn.predict([Xv[i]])[0]
                                labels[i] = new_lab
                
                    voice_labels = labels.tolist()
                    n_voice_clusters = len(set(voice_labels))
                
                    print(f"[{job_id}]   - Final voice_labels: {voice_labels}")
                    print(f"[{job_id}] ✓ Clustering voz final: {n_voice_clusters} clusters")

                
                diarization_info = {
                    "num_segments": len(audio_segments),
                    "num_voice_clusters": len(set([l for l in voice_labels if l >= 0])) if voice_labels else 0,
                }
                
            except Exception as audio_err:
                print(f"[{job_id}] Error en procesamiento de audio: {audio_err}")
                import traceback
                traceback.print_exc()

            job["results"] = {
                "characters": characters,
                "face_labels": [int(x) for x in labels],
                "audio_segments": audio_segments,
                "voice_labels": [int(x) for x in voice_labels],
                "diarization_info": diarization_info,
                "video_name": video_name,
                "base_dir": str(base),
            }
            job["status"] = JobStatus.DONE
            print(f"[{job_id}] ✓ Procesamiento completado")
            print(job["results"])

        except Exception as proc_error:
            print(f"[{job_id}] Error en procesamiento: {proc_error}")
            import traceback
            traceback.print_exc()
            job["results"] = {
                "characters": [], "face_labels": [],
                "audio_segments": [], "voice_labels": [], "diarization_info": {},
                "video_name": video_name, "base_dir": str(base)
            }
            job["status"] = JobStatus.DONE

    except Exception as e:
        print(f"[{job_id}] Error general: {e}")
        import traceback
        traceback.print_exc()
        job["status"] = JobStatus.FAILED
        job["error"] = str(e)