VeuReu commited on
Commit
173dcb8
·
verified ·
1 Parent(s): 6850476

Upload api.py

Browse files
Files changed (1) hide show
  1. api.py +115 -2
api.py CHANGED
@@ -72,6 +72,8 @@ async def create_initial_casting(
72
  video: UploadFile = File(...),
73
  epsilon: float = Form(...),
74
  min_cluster_size: int = Form(...),
 
 
75
  ):
76
  """
77
  Crea un job para procesar el vídeo de forma asíncrona.
@@ -94,6 +96,8 @@ async def create_initial_casting(
94
  "video_name": video_name,
95
  "epsilon": float(epsilon),
96
  "min_cluster_size": int(min_cluster_size),
 
 
97
  "created_at": datetime.now().isoformat(),
98
  "results": None,
99
  "error": None
@@ -168,6 +172,8 @@ def process_video_job(job_id: str):
168
  video_name = job["video_name"]
169
  epsilon = job["epsilon"]
170
  min_cluster_size = job["min_cluster_size"]
 
 
171
 
172
  # Crear estructura de carpetas
173
  base = TEMP_ROOT / video_name
@@ -283,8 +289,8 @@ def process_video_job(job_id: str):
283
  if voice_embeddings:
284
  try:
285
  Xv = np.array(voice_embeddings)
286
- v_eps = float(epsilon)
287
- v_min = max(1, int(min_cluster_size))
288
  v_labels = DBSCAN(eps=v_eps, min_samples=v_min, metric='euclidean').fit(Xv).labels_.tolist()
289
  except Exception as _e:
290
  print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
@@ -480,6 +486,113 @@ async def finalize_casting(
480
  "voice_identities": voice_identities,
481
  }
482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  @app.post("/refine_narration")
484
  async def refine_narration(
485
  dialogues_srt: str = Form(...),
 
72
  video: UploadFile = File(...),
73
  epsilon: float = Form(...),
74
  min_cluster_size: int = Form(...),
75
+ voice_epsilon: float = Form(0.5),
76
+ voice_min_cluster_size: int = Form(2),
77
  ):
78
  """
79
  Crea un job para procesar el vídeo de forma asíncrona.
 
96
  "video_name": video_name,
97
  "epsilon": float(epsilon),
98
  "min_cluster_size": int(min_cluster_size),
99
+ "voice_epsilon": float(voice_epsilon),
100
+ "voice_min_cluster_size": int(voice_min_cluster_size),
101
  "created_at": datetime.now().isoformat(),
102
  "results": None,
103
  "error": None
 
172
  video_name = job["video_name"]
173
  epsilon = job["epsilon"]
174
  min_cluster_size = job["min_cluster_size"]
175
+ v_epsilon = float(job.get("voice_epsilon", epsilon))
176
+ v_min_cluster = int(job.get("voice_min_cluster_size", min_cluster_size))
177
 
178
  # Crear estructura de carpetas
179
  base = TEMP_ROOT / video_name
 
289
  if voice_embeddings:
290
  try:
291
  Xv = np.array(voice_embeddings)
292
+ v_eps = float(v_epsilon)
293
+ v_min = max(1, int(v_min_cluster))
294
  v_labels = DBSCAN(eps=v_eps, min_samples=v_min, metric='euclidean').fit(Xv).labels_.tolist()
295
  except Exception as _e:
296
  print(f"[{job_id}] WARN - Voice clustering failed: {_e}")
 
486
  "voice_identities": voice_identities,
487
  }
488
 
489
+ @app.get("/files_scene/{video_name}/{scene_id}/{filename}")
490
+ def serve_scene_file(video_name: str, scene_id: str, filename: str):
491
+ file_path = TEMP_ROOT / video_name / "scenes" / scene_id / filename
492
+ if not file_path.exists():
493
+ raise HTTPException(status_code=404, detail="File not found")
494
+ return FileResponse(file_path)
495
+
496
+ @app.post("/detect_scenes")
497
+ async def detect_scenes(
498
+ video: UploadFile = File(...),
499
+ epsilon: float = Form(0.5),
500
+ min_cluster_size: int = Form(2),
501
+ frame_interval_sec: float = Form(0.5),
502
+ ):
503
+ """
504
+ Detecta clústers d'escenes mitjançant clustering de histogrames de color.
505
+ Retorna una llista de scene_clusters estructurada de forma similar a characters.
506
+ """
507
+ import cv2
508
+ import numpy as np
509
+ from sklearn.cluster import DBSCAN
510
+
511
+ # Guardar el vídeo temporalment
512
+ video_name = Path(video.filename).stem
513
+ dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
514
+ with dst_video.open("wb") as f:
515
+ shutil.copyfileobj(video.file, f)
516
+
517
+ cap = cv2.VideoCapture(str(dst_video))
518
+ if not cap.isOpened():
519
+ raise HTTPException(status_code=400, detail="Cannot open video")
520
+
521
+ fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
522
+ step = max(1, int(frame_interval_sec * fps))
523
+
524
+ frames = []
525
+ metas = []
526
+ idx = 0
527
+ while True:
528
+ ret = cap.grab()
529
+ if not ret:
530
+ break
531
+ if idx % step == 0:
532
+ ret2, frame = cap.retrieve()
533
+ if not ret2:
534
+ break
535
+ # Reduir mida per estabilitat i càlcul ràpid
536
+ small = cv2.resize(frame, (160, 90))
537
+ hsv = cv2.cvtColor(small, cv2.COLOR_BGR2HSV)
538
+ # Histograma per canal
539
+ h_hist = cv2.calcHist([hsv],[0],None,[32],[0,180]).flatten()
540
+ s_hist = cv2.calcHist([hsv],[1],None,[32],[0,256]).flatten()
541
+ v_hist = cv2.calcHist([hsv],[2],None,[32],[0,256]).flatten()
542
+ hist = np.concatenate([h_hist, s_hist, v_hist])
543
+ hist = hist / (np.linalg.norm(hist) + 1e-8)
544
+ frames.append(hist)
545
+ metas.append({"index": idx, "time_sec": idx/float(fps)})
546
+ idx += 1
547
+ cap.release()
548
+
549
+ if not frames:
550
+ return {"scene_clusters": []}
551
+
552
+ X = np.array(frames)
553
+ labels = DBSCAN(eps=float(epsilon), min_samples=int(min_cluster_size), metric='euclidean').fit(X).labels_.tolist()
554
+
555
+ # Agrupar per etiqueta (>=0)
556
+ clusters = {}
557
+ for i, lbl in enumerate(labels):
558
+ if lbl is None or lbl < 0:
559
+ continue
560
+ clusters.setdefault(int(lbl), []).append(i)
561
+
562
+ # Escriure imatges representatives per a cada clúster
563
+ base = TEMP_ROOT / video_name / "scenes"
564
+ base.mkdir(parents=True, exist_ok=True)
565
+ scene_list = []
566
+ cap = cv2.VideoCapture(str(dst_video))
567
+ for lbl, idxs in sorted(clusters.items(), key=lambda x: x[0]):
568
+ scene_id = f"scene_{int(lbl):02d}"
569
+ out_dir = base / scene_id
570
+ out_dir.mkdir(parents=True, exist_ok=True)
571
+ frame_files = []
572
+ # Guardar fins a 12 frames per clúster
573
+ for k, fi in enumerate(idxs[:12]):
574
+ frame_num = metas[fi]["index"]
575
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
576
+ ret2, frame = cap.read()
577
+ if not ret2:
578
+ continue
579
+ fn = f"frame_{k:03d}.jpg"
580
+ cv2.imwrite(str(out_dir / fn), frame)
581
+ frame_files.append(fn)
582
+ # Representative
583
+ rep = frame_files[0] if frame_files else None
584
+ image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
585
+ scene_list.append({
586
+ "id": scene_id,
587
+ "folder": str(out_dir),
588
+ "num_frames": len(frame_files),
589
+ "image_url": image_url,
590
+ "frame_files": frame_files,
591
+ })
592
+ cap.release()
593
+
594
+ return {"scene_clusters": scene_list, "base_dir": str(base)}
595
+
596
  @app.post("/refine_narration")
597
  async def refine_narration(
598
  dialogues_srt: str = Form(...),