Spaces:

VeuReu
/

engine

Sleeping

App Files Files Community

VeuReu commited on Oct 30, 2025

Commit

19221d2

verified ·

1 Parent(s): 5d269af

Upload api.py

Browse files

Files changed (1) hide show

api.py +8 -4

api.py CHANGED Viewed

@@ -74,6 +74,7 @@ async def create_initial_casting(
     min_cluster_size: int = Form(...),
     voice_epsilon: float = Form(0.5),
     voice_min_cluster_size: int = Form(2),
 ):
     """
     Crea un job para procesar el vídeo de forma asíncrona.
@@ -98,6 +99,7 @@ async def create_initial_casting(
         "min_cluster_size": int(min_cluster_size),
         "voice_epsilon": float(voice_epsilon),
         "voice_min_cluster_size": int(voice_min_cluster_size),
         "created_at": datetime.now().isoformat(),
         "results": None,
         "error": None
@@ -205,8 +207,8 @@ def process_video_job(job_id: str):
                 raise RuntimeError("No se pudo abrir el vídeo para extracción de caras")
             fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
             total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
-            max_samples = 100
-            # Índices de frames equiespaciados (hasta 100)
             if total_frames > 0:
                 frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=min(max_samples, max(1, total_frames)), dtype=int).tolist()))
             else:
@@ -266,7 +268,8 @@ def process_video_job(job_id: str):
                                 face_cascade = None
                             boxes_haar = []
                             if face_cascade is not None and not face_cascade.empty():
-                                faces_haar = face_cascade.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=3, minSize=(30, 30))
                                 for (x, y, w, h) in faces_haar:
                                     top, left, bottom, right = max(0, y), max(0, x), min(frame.shape[0], y+h), min(frame.shape[1], x+w)
                                     boxes_haar.append((top, right, bottom, left))
@@ -284,7 +287,8 @@ def process_video_job(job_id: str):
                                             # Validar que es un bbox real, no el frame completo
                                             # Si el bbox es prácticamente el frame completo, descartarlo
                                             is_full_frame = (x <= 5 and y <= 5 and w >= frame.shape[1] - 10 and h >= frame.shape[0] - 10)
-                                            if w > 30 and h > 30 and not is_full_frame:
                                                 top, left, bottom, right = max(0, y), max(0, x), min(frame.shape[0], y+h), min(frame.shape[1], x+w)
                                                 boxes_haar.append((top, right, bottom, left))
                                     tmp_detect.unlink(missing_ok=True)

     min_cluster_size: int = Form(...),
     voice_epsilon: float = Form(0.5),
     voice_min_cluster_size: int = Form(2),
+    max_frames: int = Form(100),
 ):
     """
     Crea un job para procesar el vídeo de forma asíncrona.
         "min_cluster_size": int(min_cluster_size),
         "voice_epsilon": float(voice_epsilon),
         "voice_min_cluster_size": int(voice_min_cluster_size),
+        "max_frames": int(max_frames),
         "created_at": datetime.now().isoformat(),
         "results": None,
         "error": None
                 raise RuntimeError("No se pudo abrir el vídeo para extracción de caras")
             fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
             total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+            max_samples = job.get("max_frames", 100)
+            # Índices de frames equiespaciados
             if total_frames > 0:
                 frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=min(max_samples, max(1, total_frames)), dtype=int).tolist()))
             else:
                                 face_cascade = None
                             boxes_haar = []
                             if face_cascade is not None and not face_cascade.empty():
+                                # Parámetros más estrictos para evitar falsos positivos
+                                faces_haar = face_cascade.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(50, 50))
                                 for (x, y, w, h) in faces_haar:
                                     top, left, bottom, right = max(0, y), max(0, x), min(frame.shape[0], y+h), min(frame.shape[1], x+w)
                                     boxes_haar.append((top, right, bottom, left))
                                             # Validar que es un bbox real, no el frame completo
                                             # Si el bbox es prácticamente el frame completo, descartarlo
                                             is_full_frame = (x <= 5 and y <= 5 and w >= frame.shape[1] - 10 and h >= frame.shape[0] - 10)
+                                            # Bbox mínimo de 50x50 para filtrar falsos positivos pequeños
+                                            if w > 50 and h > 50 and not is_full_frame:
                                                 top, left, bottom, right = max(0, y), max(0, x), min(frame.shape[0], y+h), min(frame.shape[1], x+w)
                                                 boxes_haar.append((top, right, bottom, left))
                                     tmp_detect.unlink(missing_ok=True)