Spaces:

rba28
/

dr-one

Runtime error

App Files Files Community

rba28 commited on Aug 10, 2025

Commit

2ebd3aa

verified ·

1 Parent(s): fdf5ad5

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -52

app.py CHANGED Viewed

@@ -12,33 +12,8 @@ REPO_ID = "mshamrai/yolov8s-visdrone"
 FILENAME = "weights/best.pt"
 SAMPLES_DIR = "samples"
-# Embedded samples (auto-downloaded on start)
-TEST_IMAGE = os.path.join(SAMPLES_DIR, "test_image.jpg")
-TEST_VIDEO = os.path.join(SAMPLES_DIR, "test_video.mp4")
-SAMPLE_URLS = {
-    TEST_IMAGE: "https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg",  # small image
-    TEST_VIDEO: "https://github.com/ultralytics/assets/releases/download/v0.0.0/drone.mp4",   # short drone clip
-}
-def ensure_samples():
-    os.makedirs(SAMPLES_DIR, exist_ok=True)
-    try:
-        import requests
-    except Exception:
-        return
-    for local_path, url in SAMPLE_URLS.items():
-        if os.path.exists(local_path):
-            continue
-        try:
-            r = requests.get(url, timeout=30)
-            r.raise_for_status()
-            with open(local_path, "wb") as f:
-                f.write(r.content)
-        except Exception:
-            pass
-ensure_samples()
 # -------------------
 # Lazy state
@@ -177,7 +152,7 @@ def _save_pdf(title: str, summary: str, counts: Dict[str, int], annotated_image_
 # -------------------
 def detect_image(image, conf: float, iou: float):
     if image is None:
-        return None, None, "No image provided.", None, None
     cv2 = _lazy_cv2()
     model = _get_model(conf, iou)
     results = model.predict(image, imgsz=960, verbose=False)
@@ -185,7 +160,7 @@ def detect_image(image, conf: float, iou: float):
     rows = _results_to_rows(results)
     annotated = r.plot()  # BGR ndarray
     counts = _count_by_class(rows)
-    summary = "Detections: " + ", ".join(f"{k}: {v}" for k, v in counts.items()) if rows else "No objects detected."
     tmp_img = os.path.join(tempfile.gettempdir(), f"annotated_{int(time.time())}.jpg")
     try:
         cv2.imwrite(tmp_img, annotated)
@@ -197,19 +172,19 @@ def detect_image(image, conf: float, iou: float):
 def detect_video(video_path: str, conf: float, iou: float, max_frames: int = 300):
     if not video_path:
-        return None, None, "No video provided.", None
     cv2 = _lazy_cv2()
     model = _get_model(conf, iou)
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
-        return None, None, "Failed to open video.", None
     fps = cap.get(cv2.CAP_PROP_FPS) or 24.0
     w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 1280)
     h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 720)
     writer, out_path = _write_video(os.path.join(tempfile.gettempdir(), f"annotated_{int(time.time())}"), fps, w, h)
     if writer is None or (hasattr(writer, "isOpened") and not writer.isOpened()):
         cap.release()
-        return None, None, "Video writer could not open. Try another format/resolution.", None
     totals: Dict[str, int] = {}
     frames = 0
     try:
@@ -229,7 +204,7 @@ def detect_video(video_path: str, conf: float, iou: float, max_frames: int = 300
     finally:
         cap.release()
         writer.release()
-    summary = "Detections (frame-wise tallies): " + ", ".join(f"{k}: {v}" for k, v in totals.items()) if totals else "No objects detected."
     tally_rows = [{"class": k, "count": v} for k, v in sorted(totals.items())]
     csv_path = _save_csv(tally_rows)
     return out_path, totals, summary, csv_path
@@ -243,38 +218,37 @@ def export_pdf_vid(summary: str, counts: dict):
     return _save_pdf("Airspace Drone Detector — Video Report", summary or "No summary.", counts or {}, None)
 # -------------------
-# UI (preloaded samples)
 # -------------------
-EXAMPLE_NOTE = (
-    "Tip: This model is trained on VisDrone-style aerial objects (small targets). "
-    "Use 640–1080p aerial footage where drones are visible."
 )
-with gr.Blocks(title="Airspace Drone Detector (YOLOv8 VisDrone)") as demo:
     gr.Markdown(
         """
-# Airspace Drone Detector (Pretrained YOLOv8 - VisDrone)
-The sample **image** and **video** are already loaded below — just click **Run**.
-**Exports:** CSV + PDF reports.
-**Note:** On CPU Spaces, long videos are truncated via **Max frames**.
         """
     )
     with gr.Tabs():
-        # IMAGE (preloaded)
         with gr.TabItem("Image"):
             with gr.Row():
                 image_in = gr.Image(
-                    value=TEST_IMAGE if os.path.exists(TEST_IMAGE) else None,
                     type="numpy",
-                    label="Input Image (preloaded)"
                 )
                 with gr.Column():
                     conf_img = gr.Slider(0.05, 0.8, 0.35, step=0.05, label="Confidence")
                     iou_img = gr.Slider(0.1, 0.9, 0.45, step=0.05, label="NMS IoU")
                     run_img = gr.Button("Run Detection")
-                    gr.Markdown(EXAMPLE_NOTE)
             image_out = gr.Image(label="Annotated Image")
             table_out = gr.Dataframe(headers=["class","confidence","x1","y1","x2","y2","width","height"], wrap=True)
@@ -287,6 +261,13 @@ The sample **image** and **video** are already loaded below — just click **Run
             annotated_tmp_img_path = gr.State(value=None)
             def _run_img(image, conf, iou):
                 return detect_image(image, conf, iou)
@@ -302,19 +283,20 @@ The sample **image** and **video** are already loaded below — just click **Run
                 outputs=[pdf_img_path],
             )
-        # VIDEO (preloaded)
         with gr.TabItem("Video"):
             with gr.Row():
                 video_in = gr.Video(
-                    value=TEST_VIDEO if os.path.exists(TEST_VIDEO) else None,
-                    label="Input Video (preloaded)"
                 )
                 with gr.Column():
                     conf_vid = gr.Slider(0.05, 0.8, 0.35, step=0.05, label="Confidence")
                     iou_vid = gr.Slider(0.1, 0.9, 0.45, step=0.05, label="NMS IoU")
                     max_frames = gr.Slider(60, 2000, 300, step=10, label="Max frames to process")
                     run_vid = gr.Button("Run Detection")
-                    gr.Markdown(EXAMPLE_NOTE)
             video_out = gr.Video(label="Annotated Video")
             counts_text = gr.Textbox(label="Per-class tally (JSON)", max_lines=20)
@@ -325,6 +307,13 @@ The sample **image** and **video** are already loaded below — just click **Run
                 pdf_vid_btn = gr.Button("Generate PDF Report")
                 pdf_vid_path = gr.File(label="PDF Report", interactive=False)
             def _run_vid(vpath, conf, iou, maxf):
                 out_path, counts, summary, csv_path = detect_video(vpath, conf, iou, int(maxf))
                 counts_str = json.dumps(counts or {}, ensure_ascii=False, indent=2)
@@ -352,7 +341,8 @@ The sample **image** and **video** are already loaded below — just click **Run
     gr.Markdown(
         f"""
 **Weights:** `{REPO_ID}/{FILENAME}` (downloaded lazily)
-**Diagnostics** — FFmpeg: {'Yes' if _ffmpeg_ok() else 'No'} • Python: 3.10
         """
     )

 FILENAME = "weights/best.pt"
 SAMPLES_DIR = "samples"
+EMBED_IMG = os.path.join(SAMPLES_DIR, "aerial_image.jpg")
+EMBED_VID = os.path.join(SAMPLES_DIR, "aerial_video.mp4")
 # -------------------
 # Lazy state
 # -------------------
 def detect_image(image, conf: float, iou: float):
     if image is None:
+        return None, [], "No image provided.", None, None
     cv2 = _lazy_cv2()
     model = _get_model(conf, iou)
     results = model.predict(image, imgsz=960, verbose=False)
     rows = _results_to_rows(results)
     annotated = r.plot()  # BGR ndarray
     counts = _count_by_class(rows)
+    summary = "Detections: " + (", ".join(f"{k}: {v}" for k, v in counts.items()) if rows else "none")
     tmp_img = os.path.join(tempfile.gettempdir(), f"annotated_{int(time.time())}.jpg")
     try:
         cv2.imwrite(tmp_img, annotated)
 def detect_video(video_path: str, conf: float, iou: float, max_frames: int = 300):
     if not video_path:
+        return None, {}, "No video provided.", None
     cv2 = _lazy_cv2()
     model = _get_model(conf, iou)
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
+        return None, {}, "Failed to open video.", None
     fps = cap.get(cv2.CAP_PROP_FPS) or 24.0
     w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 1280)
     h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 720)
     writer, out_path = _write_video(os.path.join(tempfile.gettempdir(), f"annotated_{int(time.time())}"), fps, w, h)
     if writer is None or (hasattr(writer, "isOpened") and not writer.isOpened()):
         cap.release()
+        return None, {}, "Video writer could not open. Try another format/resolution.", None
     totals: Dict[str, int] = {}
     frames = 0
     try:
     finally:
         cap.release()
         writer.release()
+    summary = "Detections (frame-wise tallies): " + (", ".join(f"{k}: {v}" for k, v in totals.items()) if totals else "none")
     tally_rows = [{"class": k, "count": v} for k, v in sorted(totals.items())]
     csv_path = _save_csv(tally_rows)
     return out_path, totals, summary, csv_path
     return _save_pdf("Airspace Drone Detector — Video Report", summary or "No summary.", counts or {}, None)
 # -------------------
+# UI (embedded-local samples + uploads)
 # -------------------
+NOTE = (
+    "Model: VisDrone (aerial **cars/pedestrians/vehicles**). It does **not** include a 'drone' class. "
+    "Use top‑down scenes with people/traffic for best results."
 )
+with gr.Blocks(title="Aerial Object Detector (VisDrone)") as demo:
     gr.Markdown(
         """
+# Aerial Object Detector (Pretrained on VisDrone)
+Use the **embedded samples** or your own uploads.
+Exports: **CSV** and **PDF** reports.
         """
     )
     with gr.Tabs():
+        # IMAGE
         with gr.TabItem("Image"):
             with gr.Row():
                 image_in = gr.Image(
+                    value=EMBED_IMG if os.path.exists(EMBED_IMG) else None,
                     type="numpy",
+                    label="Input Image (embedded or upload)"
                 )
                 with gr.Column():
                     conf_img = gr.Slider(0.05, 0.8, 0.35, step=0.05, label="Confidence")
                     iou_img = gr.Slider(0.1, 0.9, 0.45, step=0.05, label="NMS IoU")
+                    load_embed_img = gr.Button("Load Embedded Sample Image")
                     run_img = gr.Button("Run Detection")
+                    gr.Markdown(NOTE)
             image_out = gr.Image(label="Annotated Image")
             table_out = gr.Dataframe(headers=["class","confidence","x1","y1","x2","y2","width","height"], wrap=True)
             annotated_tmp_img_path = gr.State(value=None)
+            def _load_embed_img():
+                if os.path.exists(EMBED_IMG):
+                    return EMBED_IMG
+                return None
+            load_embed_img.click(fn=_load_embed_img, outputs=[image_in])
             def _run_img(image, conf, iou):
                 return detect_image(image, conf, iou)
                 outputs=[pdf_img_path],
             )
+        # VIDEO
         with gr.TabItem("Video"):
             with gr.Row():
                 video_in = gr.Video(
+                    value=EMBED_VID if os.path.exists(EMBED_VID) else None,
+                    label="Input Video (embedded or upload)"
                 )
                 with gr.Column():
                     conf_vid = gr.Slider(0.05, 0.8, 0.35, step=0.05, label="Confidence")
                     iou_vid = gr.Slider(0.1, 0.9, 0.45, step=0.05, label="NMS IoU")
                     max_frames = gr.Slider(60, 2000, 300, step=10, label="Max frames to process")
+                    load_embed_vid = gr.Button("Load Embedded Sample Video")
                     run_vid = gr.Button("Run Detection")
+                    gr.Markdown(NOTE)
             video_out = gr.Video(label="Annotated Video")
             counts_text = gr.Textbox(label="Per-class tally (JSON)", max_lines=20)
                 pdf_vid_btn = gr.Button("Generate PDF Report")
                 pdf_vid_path = gr.File(label="PDF Report", interactive=False)
+            def _load_embed_vid():
+                if os.path.exists(EMBED_VID):
+                    return EMBED_VID
+                return None
+            load_embed_vid.click(fn=_load_embed_vid, outputs=[video_in])
             def _run_vid(vpath, conf, iou, maxf):
                 out_path, counts, summary, csv_path = detect_video(vpath, conf, iou, int(maxf))
                 counts_str = json.dumps(counts or {}, ensure_ascii=False, indent=2)
     gr.Markdown(
         f"""
 **Weights:** `{REPO_ID}/{FILENAME}` (downloaded lazily)
+**Diagnostics** — FFmpeg: {'Yes' if _ffmpeg_ok() else 'No'} • Python: 3.10
+**Tip:** For true *drone* detection, I can swap in a UAV‑specific model. Say the word and I’ll rewire it.
         """
     )