Spaces:

Kesherat
/

blade-inspection-demo

Sleeping

Kesheratmex commited on Aug 10

Commit

cbd697d

1 Parent(s): 96316b4

Make infer_media configurable, return counts and support scaling

- Added optional parameters (`conf`, `iou`, `out_res`, `preset`) to `infer_media` for flexible inference settings.
- Implemented validation for required `media_path`.
- Introduced resolution mapping and optional resizing of output frames/images.
- Added per‑class counting for both video and image processing, returning a dictionary with paths and class counts.
- Improved video FPS handling with fallback and NaN checks.
- Updated UI: added hidden JSON components to expose the new dictionary results.
- Adjusted drawing code and file handling to work with the new return structure.

Files changed (1) hide show

app.py +60 -17

app.py CHANGED Viewed

@@ -14,29 +14,53 @@ model = YOLO("best2.pt")            # carga el modelo UNA sola vez
 # ────────────────────────────
 # Funciones de Inferencia
 # ────────────────────────────
-def infer_media(media_path):
     """
-    Procesa un fichero de vídeo o imagen:
-    - Si es vídeo, lo anota frame a frame y devuelve un MP4.
-    - Si es imagen, dibuja las cajas sobre la imagen y devuelve un array BGR.
     """
     ext = os.path.splitext(media_path)[1].lower()
     tmpdir = tempfile.mkdtemp()
     # ─ Vídeo ───────────────────────────────────────────────────────
     if ext in [".mp4", ".mov", ".avi", ".mkv"]:
         in_vid  = os.path.join(tmpdir, "in.mp4")
         out_vid = os.path.join(tmpdir, "out.mp4")
         shutil.copy(media_path, in_vid)
-        # Preparamos writer de vídeo
         writer = None
-        fps = 30  # ajústalo si tu vídeo tiene otro FPS
-        # Streaming de frames con anotaciones
-        results = model.predict(source=in_vid, conf=0.25, iou=0.45, stream=True)
         for r in results:
             annotated = r.plot()  # frame anotado
             if writer is None:
                 h, w = annotated.shape[:2]
@@ -47,30 +71,40 @@ def infer_media(media_path):
         if writer:
             writer.release()
-        return out_vid
     # ─ Imagen ──────────────────────────────────────────────────────
     elif ext in [".jpg", ".jpeg", ".png", ".bmp"]:
         img = cv2.imread(media_path)
-        results = model.predict(source=media_path, conf=0.25, iou=0.45, save=False)
-        # Dibujamos cajas manualmente
         for box in results[0].boxes:
             x1, y1, x2, y2 = map(int, box.xyxy[0])
             cls_id = int(box.cls[0])
             label  = model.names[cls_id]
             # rectángulo
-            cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
             # texto
             cv2.putText(img, label, (x1, y1 - 10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)
-        return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     else:
         raise ValueError(f"Formato no soportado: {ext}")
 def show_classes():
     """Devuelve las clases que el modelo conoce."""
     names = model.names
@@ -98,10 +132,19 @@ with gr.Blocks(title="Kesherat · Inspección de palas eólicas") as demo:
     output_video = gr.Video(label="Vídeo anotado")
     output_image = gr.Image(label="Imagen anotada")
     btn_detect = gr.Button("Detectar defectos")
-    # Conectamos ambos inputs al mismo infer_media, con salidas condicionadas
-    btn_detect.click(fn=infer_media, inputs=video_input, outputs=output_video)
-    btn_detect.click(fn=infer_media, inputs=image_input, outputs=output_image)
     btn_classes = gr.Button("Mostrar clases del modelo")
     txt_classes = gr.Textbox(label="Clases cargadas", interactive=False)

 # ────────────────────────────
 # Funciones de Inferencia
 # ────────────────────────────
+def infer_media(media_path, conf=0.25, iou=0.45, out_res="720p", preset="default"):
     """
+    Procesa un fichero de vídeo o imagen con parámetros configurables.
+    Retornos:
+      - Vídeo: {"video": out_vid_path, "classes": {label: count, ...}}
+      - Imagen: {"path": out_img_path, "classes": {label: count, ...}}
     """
+    if not media_path:
+        raise ValueError("media_path es requerido")
     ext = os.path.splitext(media_path)[1].lower()
     tmpdir = tempfile.mkdtemp()
+    # Resolución objetivo
+    res_map = {"360p": (640, 360), "480p": (854, 480), "720p": (1280, 720)}
+    target_size = res_map.get(out_res)
     # ─ Vídeo ───────────────────────────────────────────────────────
     if ext in [".mp4", ".mov", ".avi", ".mkv"]:
         in_vid  = os.path.join(tmpdir, "in.mp4")
         out_vid = os.path.join(tmpdir, "out.mp4")
         shutil.copy(media_path, in_vid)
+        # FPS del vídeo (opcional: tomar real si existe)
+        cap = cv2.VideoCapture(in_vid)
+        fps = cap.get(cv2.CAP_PROP_FPS) or 30
+        try:
+            fps = float(fps)
+            if fps <= 0 or fps != fps:  # NaN check
+                fps = 30
+        except Exception:
+            fps = 30
         writer = None
+        counts = {}
+        # Streaming de frames con anotaciones y conteo por clase
+        results = model.predict(source=in_vid, conf=conf, iou=iou, stream=True)
         for r in results:
+            # acumular conteos
+            for b in r.boxes:
+                label = model.names[int(b.cls[0])]
+                counts[label] = counts.get(label, 0) + 1
             annotated = r.plot()  # frame anotado
+            if target_size:
+                annotated = cv2.resize(annotated, target_size)
             if writer is None:
                 h, w = annotated.shape[:2]
         if writer:
             writer.release()
+        if cap:
+            cap.release()
+        return {"video": out_vid, "classes": counts}
     # ─ Imagen ──────────────────────────────────────────────────────
     elif ext in [".jpg", ".jpeg", ".png", ".bmp"]:
         img = cv2.imread(media_path)
+        results = model.predict(source=media_path, conf=conf, iou=iou, save=False)
+        counts = {}
+        # Dibujamos cajas manualmente y contamos
         for box in results[0].boxes:
             x1, y1, x2, y2 = map(int, box.xyxy[0])
             cls_id = int(box.cls[0])
             label  = model.names[cls_id]
+            counts[label] = counts.get(label, 0) + 1
             # rectángulo
+            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
             # texto
             cv2.putText(img, label, (x1, y1 - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        if target_size:
+            img = cv2.resize(img, target_size)
+        out_path = os.path.join(tmpdir, "annotated.png")
+        cv2.imwrite(out_path, img)
+        return {"path": out_path, "classes": counts}
     else:
         raise ValueError(f"Formato no soportado: {ext}")
 def show_classes():
     """Devuelve las clases que el modelo conoce."""
     names = model.names
     output_video = gr.Video(label="Vídeo anotado")
     output_image = gr.Image(label="Imagen anotada")
+    # Componentes JSON ocultos para soportar API devolviendo dict y encadenar a la UI
+    json_video = gr.JSON(visible=False)
+    json_image = gr.JSON(visible=False)
     btn_detect = gr.Button("Detectar defectos")
+    # Endpoint API para vídeo: devuelve dict {video, classes}. UI: extrae solo el vídeo.
+    ev_video = btn_detect.click(fn=infer_media, inputs=video_input, outputs=json_video, api_name="/infer_media")
+    ev_video.then(lambda d: (d.get("video") if isinstance(d, dict) else d), inputs=json_video, outputs=output_video)
+    # Endpoint API para imagen: devuelve dict {path, classes}. UI: extrae solo la imagen.
+    ev_image = btn_detect.click(fn=infer_media, inputs=image_input, outputs=json_image, api_name="/infer_media_1")
+    ev_image.then(lambda d: (d.get("path") if isinstance(d, dict) else d), inputs=json_image, outputs=output_image)
     btn_classes = gr.Button("Mostrar clases del modelo")
     txt_classes = gr.Textbox(label="Clases cargadas", interactive=False)

**Make infer_media configurable, return counts and support scaling**

Make infer_media configurable, return counts and support scaling