Spaces:

Segizu
/

Computer_Vision

Sleeping

App Files Files Community

Segizu commited on Mar 14, 2025

Commit

de9232e

1 Parent(s): c286429

sd

Browse files

Files changed (1) hide show

app.py +48 -24

app.py CHANGED Viewed

@@ -2,58 +2,82 @@ import cv2
 import gradio as gr
 from transformers import pipeline
 from PIL import Image
-#kfsaf
-# Cargar el modelo de detección de objetos (usando CPU)
 detector = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
 def process_video(video_path):
     """
-    Procesa un video y devuelve el máximo número detectado de personas, bicicletas y motos en un fotograma.
     """
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
-        return {"person": 0, "bicycle": 0, "motorcycle": 0}
-    max_counts = {"person": 0, "bicycle": 0, "motorcycle": 0}
     while True:
         ret, frame = cap.read()
         if not ret:
             break
-        # Convertir el frame de BGR a RGB
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        # Convertir el array de NumPy a una imagen PIL
         pil_image = Image.fromarray(frame_rgb)
-        # Realizar la detección de objetos
         results = detector(pil_image)
-        # Contar objetos detectados en el frame actual (con umbral de confianza)
-        frame_counts = {"person": 0, "bicycle": 0, "motorcycle": 0}
         for detection in results:
-            if detection["score"] < 0.7:
-                continue
             label = detection["label"].lower()
-            if label in frame_counts:
-                frame_counts[label] += 1
-        # Actualizar el conteo máximo si en este frame se detecta un mayor número
-        for key in frame_counts:
-            if frame_counts[key] > max_counts[key]:
-                max_counts[key] = frame_counts[key]
     cap.release()
-    return max_counts
-# Crear la interfaz de Gradio para el Space
 iface = gr.Interface(
     fn=process_video,
     inputs=gr.Video(label="Sube tu video"),
-    outputs="json",
-    title="Detección de Objetos en Video",
-    description="Carga un video y detecta cuántas personas, bicicletas y motos aparecen usando modelos de Hugging Face."
 )
 if __name__ == "__main__":

 import gradio as gr
 from transformers import pipeline
 from PIL import Image
+import tempfile
+# Cargar el modelo de detección de objetos usando CPU
 detector = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
 def process_video(video_path):
     """
+    Procesa un video, detecta objetos y dibuja cuadros y etiquetas sobre ellos.
+    Solo se procesarán las detecciones de personas, bicicletas y motos.
+    Devuelve el video anotado.
     """
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
+        return None
+    # Obtener propiedades del video
+    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps    = cap.get(cv2.CAP_PROP_FPS)
+    # Crear un archivo temporal para guardar el video de salida
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    output_path = tmp_file.name
+    tmp_file.close()  # Se cierra para que VideoWriter pueda escribir en él
+    # Configurar VideoWriter (utilizamos el códec mp4v)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    # Definir las clases a las que queremos aplicar detección
+    valid_labels = {"person", "bicycle", "motorcycle"}
+    threshold = 0.7  # Umbral de confianza
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        # Convertir el frame de BGR a RGB y luego a imagen PIL
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(frame_rgb)
+        # Obtener detecciones con el pipeline
         results = detector(pil_image)
+        # Dibujar cada detección válida en el frame
         for detection in results:
+            score = detection["score"]
             label = detection["label"].lower()
+            if score < threshold or label not in valid_labels:
+                continue
+            # Obtener la caja del objeto en formato [xmin, ymin, width, height]
+            box = detection["box"]
+            xmin, ymin, w, h = box
+            xmax = xmin + w
+            ymax = ymin + h
+            # Dibujar el rectángulo y la etiqueta en el frame
+            cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 255, 0), thickness=2)
+            text = f"{label}: {score:.2f}"
+            cv2.putText(frame, text, (int(xmin), int(ymin)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        # Escribir el frame anotado en el video de salida
+        out.write(frame)
     cap.release()
+    out.release()
+    return output_path
 iface = gr.Interface(
     fn=process_video,
     inputs=gr.Video(label="Sube tu video"),
+    outputs=gr.Video(label="Video procesado"),
+    title="Detección y Visualización de Objetos en Video",
+    description="Carga un video y se detectan personas, bicicletas y motos. Los objetos se enmarcan y etiquetan, mostrando la detección en tiempo real."
 )
 if __name__ == "__main__":