Spaces:

lyimo
/

Car_Counting

Sleeping

App Files Files Community

lyimo commited on 21 days ago

Commit

912ec81

verified ·

1 Parent(s): e4c58c8

Create app.py

Browse files

Files changed (1) hide show

app.py +273 -0

app.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""
+RF-DETR Object Counter — Gradio app for Hugging Face Spaces.
+Counts people, bicycles, cars, trucks, and animals in video using
+RF-DETR Medium + ByteTrack (so each object is counted only once).
+"""
+import os
+import tempfile
+from collections import defaultdict
+import cv2
+import gradio as gr
+import numpy as np
+import supervision as sv
+from rfdetr import RFDETRMedium
+from rfdetr.assets.coco_classes import COCO_CLASSES
+# ---------------------------------------------------------------------------
+# Target classes (COCO indices) — exactly what the user asked for
+# ---------------------------------------------------------------------------
+TARGET_CLASSES = {
+    0:  "person",
+    1:  "bicycle",
+    2:  "car",
+    7:  "truck",
+    # animals
+    14: "bird",
+    15: "cat",
+    16: "dog",
+    17: "horse",
+    18: "sheep",
+    19: "cow",
+    20: "elephant",
+    21: "bear",
+    22: "zebra",
+    23: "giraffe",
+}
+TARGET_IDS = list(TARGET_CLASSES.keys())
+# Per-class colour palette (BGR) for the live overlay
+CLASS_COLORS = {
+    "person":   (66, 135, 245),
+    "bicycle":  (245, 173, 66),
+    "car":      (66, 245, 167),
+    "truck":    (245, 66, 161),
+    "bird":     (245, 230, 66),
+    "cat":      (200, 120, 245),
+    "dog":      (120, 245, 200),
+    "horse":    (245, 120, 120),
+    "sheep":    (220, 220, 220),
+    "cow":      (140, 90, 60),
+    "elephant": (160, 160, 200),
+    "bear":     (90, 60, 30),
+    "zebra":    (40, 40, 40),
+    "giraffe":  (220, 180, 90),
+}
+# Example video lives next to app.py
+APP_DIR = os.path.dirname(os.path.abspath(__file__))
+EXAMPLE_VIDEO = os.path.join(APP_DIR, "example.mp4")
+# ---------------------------------------------------------------------------
+# Load model once at startup
+# ---------------------------------------------------------------------------
+print("Loading RF-DETR Medium…")
+MODEL = RFDETRMedium()
+try:
+    MODEL.optimize_for_inference()  # speeds up subsequent predicts
+    print("Model optimized for inference.")
+except Exception as e:
+    print(f"(Optimization skipped: {e})")
+print("Model ready.")
+# Annotators
+BOX_ANNOTATOR = sv.BoxAnnotator(thickness=2)
+LABEL_ANNOTATOR = sv.LabelAnnotator(text_scale=0.45, text_thickness=1, text_padding=3)
+def draw_counter_panel(frame: np.ndarray, counts: dict) -> np.ndarray:
+    """Translucent counter panel in the top-left corner."""
+    active = [(name, n) for name, n in counts.items() if n > 0]
+    if not active:
+        active = [("No targets yet", 0)]
+    panel_w = 230
+    panel_h = 40 + 22 * len(active)
+    overlay = frame.copy()
+    cv2.rectangle(overlay, (12, 12), (12 + panel_w, 12 + panel_h), (20, 20, 20), -1)
+    frame = cv2.addWeighted(overlay, 0.65, frame, 0.35, 0)
+    cv2.putText(frame, "LIVE COUNTS", (24, 38),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2, cv2.LINE_AA)
+    y = 62
+    for name, n in active:
+        color = CLASS_COLORS.get(name, (200, 200, 200))
+        cv2.circle(frame, (28, y - 5), 5, color, -1)
+        cv2.putText(frame, f"{name}: {n}", (44, y),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (240, 240, 240), 1, cv2.LINE_AA)
+        y += 22
+    return frame
+def process_video(video_path, confidence, frame_stride, progress=gr.Progress(track_tqdm=True)):
+    if video_path is None:
+        return None, "⚠️ Please upload a video first.", []
+    video_info = sv.VideoInfo.from_video_path(video_path)
+    frame_gen = sv.get_video_frames_generator(video_path)
+    tracker = sv.ByteTrack(frame_rate=int(video_info.fps))
+    unique_ids = defaultdict(set)   # class_name -> {tracker_id, ...}
+    last_detections = sv.Detections.empty()
+    out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    with sv.VideoSink(target_path=out_path, video_info=video_info) as sink:
+        for i, frame in enumerate(progress.tqdm(frame_gen, total=video_info.total_frames,
+                                                desc="Analyzing video")):
+            # Detect every Nth frame; reuse previous detections in-between to keep video smooth
+            if i % frame_stride == 0:
+                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                detections = MODEL.predict(rgb, threshold=confidence)
+                # Keep only the classes we care about
+                if len(detections) > 0:
+                    mask = np.isin(detections.class_id, TARGET_IDS)
+                    detections = detections[mask]
+                detections = tracker.update_with_detections(detections)
+                last_detections = detections
+                # Register unique IDs per class
+                for cid, tid in zip(detections.class_id, detections.tracker_id):
+                    if tid is None:
+                        continue
+                    name = TARGET_CLASSES.get(int(cid))
+                    if name:
+                        unique_ids[name].add(int(tid))
+            else:
+                detections = last_detections
+            # Annotate
+            if len(detections) > 0:
+                labels = [
+                    f"#{tid} {TARGET_CLASSES.get(int(cid), 'obj')} {conf:.2f}"
+                    for cid, tid, conf in zip(
+                        detections.class_id,
+                        detections.tracker_id if detections.tracker_id is not None
+                            else [None] * len(detections),
+                        detections.confidence,
+                    )
+                ]
+                frame = BOX_ANNOTATOR.annotate(frame, detections)
+                frame = LABEL_ANNOTATOR.annotate(frame, detections, labels)
+            counts_now = {name: len(ids) for name, ids in unique_ids.items()}
+            frame = draw_counter_panel(frame, counts_now)
+            sink.write_frame(frame)
+    # Build summary outputs
+    total = sum(len(ids) for ids in unique_ids.values())
+    if total == 0:
+        summary_md = "### ℹ️ No target objects detected.\nTry lowering the confidence threshold."
+    else:
+        lines = [f"### ✅ Total unique objects detected: **{total}**", ""]
+        for name in TARGET_CLASSES.values():
+            n = len(unique_ids.get(name, set()))
+            if n > 0:
+                lines.append(f"- **{name.capitalize()}** — {n}")
+        summary_md = "\n".join(lines)
+    table = [[name.capitalize(), len(unique_ids.get(name, set()))]
+             for name in TARGET_CLASSES.values()
+             if len(unique_ids.get(name, set())) > 0]
+    if not table:
+        table = [["—", 0]]
+    return out_path, summary_md, table
+# ---------------------------------------------------------------------------
+# UI
+# ---------------------------------------------------------------------------
+CUSTOM_CSS = """
+.gradio-container {max-width: 1200px !important; margin: auto;}
+#title-row {text-align: center; padding: 8px 0 0 0;}
+#title-row h1 {font-weight: 700; letter-spacing: -0.5px; margin-bottom: 4px;}
+#title-row p {color: #6b7280; margin-top: 0;}
+.card {border: 1px solid #e5e7eb; border-radius: 14px; padding: 16px;
+       background: #ffffff;}
+footer {visibility: hidden;}
+"""
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="slate"),
+               css=CUSTOM_CSS, title="RF-DETR Object Counter") as demo:
+    with gr.Row(elem_id="title-row"):
+        gr.Markdown(
+            """
+            # 🚦 RF-DETR Object Counter
+            Count **people, bicycles, cars, trucks, and animals** in any video.
+            Powered by [RF-DETR Medium](https://github.com/roboflow/rf-detr) (Roboflow, ICLR 2026) and ByteTrack —
+            each object is counted **only once** as it moves across frames.
+            """
+        )
+    with gr.Row():
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes="card"):
+                gr.Markdown("### 📥 Input")
+                video_input = gr.Video(
+                    label="Upload a video",
+                    sources=["upload"],
+                    format="mp4",
+                    height=320,
+                )
+                with gr.Accordion("⚙️ Advanced settings", open=False):
+                    confidence = gr.Slider(
+                        minimum=0.1, maximum=0.9, value=0.5, step=0.05,
+                        label="Confidence threshold",
+                        info="Higher = fewer but more certain detections.",
+                    )
+                    frame_stride = gr.Slider(
+                        minimum=1, maximum=10, value=2, step=1,
+                        label="Frame stride",
+                        info="Process every Nth frame. Higher = faster, slightly less accurate.",
+                    )
+                submit_btn = gr.Button("🔍 Count Objects", variant="primary", size="lg")
+                gr.Markdown("#### 🎬 Example video")
+                gr.Examples(
+                    examples=[[EXAMPLE_VIDEO]],
+                    inputs=video_input,
+                    label=None,
+                    examples_per_page=4,
+                )
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes="card"):
+                gr.Markdown("### 📤 Annotated output")
+                video_output = gr.Video(label="Annotated video", height=320)
+                summary_output = gr.Markdown("Submit a video to see the results here.")
+                table_output = gr.Dataframe(
+                    headers=["Class", "Unique count"],
+                    datatype=["str", "number"],
+                    label="Per-class totals",
+                    interactive=False,
+                    wrap=True,
+                )
+    gr.Markdown(
+        """
+        ---
+        **Detected categories:** person · bicycle · car · truck · bird · cat · dog · horse ·
+        sheep · cow · elephant · bear · zebra · giraffe
+        **Tip:** the first run loads the model (≈45–90 s for Medium). Subsequent runs are much faster.
+        Use *Frame stride* if processing is slow on CPU.
+        """
+    )
+    submit_btn.click(
+        fn=process_video,
+        inputs=[video_input, confidence, frame_stride],
+        outputs=[video_output, summary_output, table_output],
+    )
+if __name__ == "__main__":
+    demo.queue(max_size=8).launch()