Spaces:

Segizu
/

Computer_Vision

Sleeping

App Files Files Community

Segizu commited on Mar 14, 2025

Commit

27e96af

1 Parent(s): daa16ff

sdfs

Browse files

Files changed (1) hide show

app.py +11 -30

app.py CHANGED Viewed

@@ -3,29 +3,9 @@ import gradio as gr
 from transformers import pipeline
 from PIL import Image
 import tempfile
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Cargar el modelo de detección de objetos
-try:
-    detector = pipeline(
-        "object-detection",
-        model="facebook/detr-resnet-50",
-        device=0 if device == "cuda" else -1,  # 0 para GPU, -1 para CPU
-        framework="pt"  # Especificar PyTorch como framework
-    )
-    print("Model loaded successfully on", device)
-except Exception as e:
-    print(f"Error loading model: {e}")
-    print("Falling back to CPU")
-    detector = pipeline(
-        "object-detection",
-        model="facebook/detr-resnet-50",
-        device=-1,
-        framework="pt"
-    )
 def process_video(video_path):
     """
@@ -47,11 +27,11 @@ def process_video(video_path):
     output_path = tmp_file.name
     tmp_file.close()  # Se cierra para que VideoWriter pueda escribir en él
-    # Configurar VideoWriter (utilizamos el códec mp4v)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-    # Definir las clases a las que queremos aplicar detección
     valid_labels = {"person", "bicycle", "motorcycle"}
     threshold = 0.7  # Umbral de confianza
@@ -60,7 +40,7 @@ def process_video(video_path):
         if not ret:
             break
-        # Convertir el frame de BGR a RGB y luego a imagen PIL
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(frame_rgb)
@@ -74,11 +54,12 @@ def process_video(video_path):
             if score < threshold or label not in valid_labels:
                 continue
-            # Obtener la caja del objeto en formato [xmin, ymin, width, height]
             box = detection["box"]
-            xmin, ymin, w, h = box
-            xmax = xmin + w
-            ymax = ymin + h
             # Dibujar el rectángulo y la etiqueta en el frame
             cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 255, 0), thickness=2)
@@ -97,7 +78,7 @@ iface = gr.Interface(
     inputs=gr.Video(label="Sube tu video"),
     outputs=gr.Video(label="Video procesado"),
     title="Detección y Visualización de Objetos en Video",
-    description="Carga un video y se detectan personas, bicicletas y motos. Los objetos se enmarcan y etiquetan, mostrando la detección en tiempo real."
 )
 if __name__ == "__main__":

 from transformers import pipeline
 from PIL import Image
 import tempfile
+# Cargar el modelo de detección de objetos usando CPU
+detector = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
 def process_video(video_path):
     """
     output_path = tmp_file.name
     tmp_file.close()  # Se cierra para que VideoWriter pueda escribir en él
+    # Configurar VideoWriter (usamos el códec mp4v)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    # Definir las clases de interés
     valid_labels = {"person", "bicycle", "motorcycle"}
     threshold = 0.7  # Umbral de confianza
         if not ret:
             break
+        # Convertir el frame de BGR a RGB y a imagen PIL
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(frame_rgb)
             if score < threshold or label not in valid_labels:
                 continue
+            # Extraer la caja del objeto (dado que es un diccionario)
             box = detection["box"]
+            xmin = box["xmin"]
+            ymin = box["ymin"]
+            xmax = box["xmax"]
+            ymax = box["ymax"]
             # Dibujar el rectángulo y la etiqueta en el frame
             cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 255, 0), thickness=2)
     inputs=gr.Video(label="Sube tu video"),
     outputs=gr.Video(label="Video procesado"),
     title="Detección y Visualización de Objetos en Video",
+    description="Carga un video y se detectan personas, bicicletas y motos. Los objetos se enmarcan y etiquetan en tiempo real."
 )
 if __name__ == "__main__":