Spaces:

BiasLab2025
/

perception

Running

App Files Files Community

Zhen Ye commited on Jan 7

Commit

537aca9

1 Parent(s): 6c02470

added drone detector

Browse files

Files changed (4) hide show

app.py +7 -13
demo.html +26 -114
models/detectors/drone_yolo.py +70 -0
models/model_loader.py +2 -0

app.py CHANGED Viewed

@@ -82,11 +82,12 @@ async def detect_endpoint(
         queries: Comma-separated object classes for object_detection mode
         detector: Model to use (hf_yolov8, detr_resnet50, grounding_dino)
         segmenter: Segmentation model to use (sam3)
     Returns:
         - For object_detection: Processed video with bounding boxes
         - For segmentation: Processed video with masks rendered
-        - For drone_detection: JSON with "coming_soon" status
     """
     # Validate mode
     if mode not in VALID_MODES:
@@ -142,17 +143,7 @@ async def detect_endpoint(
             filename="segmented.mp4",
         )
-    if mode == "drone_detection":
-        return JSONResponse(
-            status_code=200,
-            content={
-                "status": "coming_soon",
-                "message": "Drone detection mode is under development. Stay tuned!",
-                "mode": "drone_detection"
-            }
-        )
-    # Handle object detection mode
     if video is None:
         raise HTTPException(status_code=400, detail="Video file is required.")
@@ -171,14 +162,17 @@ async def detect_endpoint(
     # Parse queries
     query_list = [q.strip() for q in queries.split(",") if q.strip()]
     # Run inference
     try:
         output_path = run_inference(
             input_path,
             output_path,
             query_list,
-            detector_name=detector,
         )
     except ValueError as exc:
         logging.exception("Video processing failed.")

         queries: Comma-separated object classes for object_detection mode
         detector: Model to use (hf_yolov8, detr_resnet50, grounding_dino)
         segmenter: Segmentation model to use (sam3)
+        drone_detection uses the dedicated drone_yolo model.
     Returns:
         - For object_detection: Processed video with bounding boxes
         - For segmentation: Processed video with masks rendered
+        - For drone_detection: Processed video with bounding boxes
     """
     # Validate mode
     if mode not in VALID_MODES:
             filename="segmented.mp4",
         )
+    # Handle object detection or drone detection mode
     if video is None:
         raise HTTPException(status_code=400, detail="Video file is required.")
     # Parse queries
     query_list = [q.strip() for q in queries.split(",") if q.strip()]
+    if mode == "drone_detection" and not query_list:
+        query_list = ["drone"]
     # Run inference
     try:
+        detector_name = "drone_yolo" if mode == "drone_detection" else detector
         output_path = run_inference(
             input_path,
             output_path,
             query_list,
+            detector_name=detector_name,
         )
     except ValueError as exc:
         logging.exception("Video processing failed.")

demo.html CHANGED Viewed

@@ -276,55 +276,6 @@
             display: none;
         }
-        /* Modal */
-        .modal {
-            display: none;
-            position: fixed;
-            z-index: 1000;
-            left: 0;
-            top: 0;
-            width: 100%;
-            height: 100%;
-            background: rgba(15, 23, 42, 0.5);
-            align-items: center;
-            justify-content: center;
-        }
-        .modal.show {
-            display: flex;
-        }
-        .modal-content {
-            background: white;
-            padding: 30px;
-            border-radius: 12px;
-            max-width: 500px;
-            text-align: center;
-        }
-        .modal-content h2 {
-            margin-bottom: 15px;
-            color: #333;
-        }
-        .modal-content p {
-            margin-bottom: 20px;
-            color: #666;
-        }
-        .modal-btn {
-            padding: 10px 24px;
-            background: #1f2933;
-            color: #f9fafb;
-            border: none;
-            border-radius: 6px;
-            cursor: pointer;
-            font-size: 1rem;
-        }
-        .modal-btn:hover {
-            background: #111827;
-        }
     </style>
 </head>
 <body>
@@ -346,10 +297,9 @@
                         <div class="mode-title">Segmentation</div>
                     </label>
-                    <label class="mode-card disabled">
                         <input type="radio" name="mode" value="drone_detection">
                         <div class="mode-title">Drone Detection</div>
-                        <span class="mode-badge">COMING SOON</span>
                     </label>
                 </div>
             </div>
@@ -391,6 +341,16 @@
                 </div>
             </div>
             <!-- Video Upload -->
             <div class="section">
                 <div class="input-group">
@@ -441,15 +401,6 @@
         </div>
     </div>
-    <!-- Coming Soon Modal -->
-    <div class="modal" id="comingSoonModal">
-        <div class="modal-content">
-            <h2>Coming Soon!</h2>
-            <p id="modalMessage"></p>
-            <button class="modal-btn" id="modalClose">Got it</button>
-        </div>
-    </div>
     <script>
         // State
         let selectedMode = 'object_detection';
@@ -462,6 +413,7 @@
         const queriesHint = document.getElementById('queriesHint');
         const detectorSection = document.getElementById('detectorSection');
         const segmenterSection = document.getElementById('segmenterSection');
         const fileInput = document.getElementById('videoFile');
         const fileLabel = document.getElementById('fileLabel');
         const processBtn = document.getElementById('processBtn');
@@ -470,23 +422,12 @@
         const originalVideo = document.getElementById('originalVideo');
         const processedVideo = document.getElementById('processedVideo');
         const downloadBtn = document.getElementById('downloadBtn');
-        const modal = document.getElementById('comingSoonModal');
-        const modalMessage = document.getElementById('modalMessage');
-        const modalClose = document.getElementById('modalClose');
         // Mode selection handler
         modeCards.forEach(card => {
             card.addEventListener('click', (e) => {
                 const input = card.querySelector('input[type="radio"]');
                 const mode = input.value;
-                // Check if disabled
-                if (card.classList.contains('disabled')) {
-                    e.preventDefault();
-                    showComingSoonModal(mode);
-                    return;
-                }
                 // Update selected state
                 modeCards.forEach(c => c.classList.remove('selected'));
                 card.classList.add('selected');
@@ -498,16 +439,19 @@
                     queriesHint.textContent = 'Example: person, car, dog, bicycle';
                     detectorSection.classList.remove('hidden');
                     segmenterSection.classList.add('hidden');
                 } else if (mode === 'segmentation') {
                     queriesLabel.textContent = 'Objects to Segment (comma-separated)';
                     queriesHint.textContent = 'Example: person, car, building, tree';
                     detectorSection.classList.add('hidden');
                     segmenterSection.classList.remove('hidden');
                 } else if (mode === 'drone_detection') {
-                    queriesLabel.textContent = 'Drone Types to Detect (comma-separated)';
-                    queriesHint.textContent = 'Example: quadcopter, fixed-wing, drone';
                     detectorSection.classList.add('hidden');
                     segmenterSection.classList.add('hidden');
                 }
                 // Always show queries section
@@ -555,20 +499,17 @@
                 });
                 if (response.ok) {
-                    const contentType = response.headers.get('content-type');
-                    if (contentType && contentType.includes('application/json')) {
-                        // Coming soon response
                         const data = await response.json();
-                        showComingSoonModal(data.mode);
-                    } else {
-                        // Video response
-                        const blob = await response.blob();
-                        const videoUrl = URL.createObjectURL(blob);
-                        processedVideo.src = videoUrl;
-                        downloadBtn.href = videoUrl;
-                        resultsSection.classList.remove('hidden');
                     }
                 } else {
                     const error = await response.json();
                     alert(`Error: ${error.detail || error.error || 'Processing failed'}`);
@@ -582,35 +523,6 @@
             }
         });
-        // Coming soon modal
-        function showComingSoonModal(mode) {
-            const messages = {
-                'drone_detection': 'Drone detection mode is under development. Stay tuned for specialized UAV and aerial object detection!'
-            };
-            modalMessage.textContent = messages[mode] || 'This feature is coming soon!';
-            modal.classList.add('show');
-        }
-        modalClose.addEventListener('click', () => {
-            modal.classList.remove('show');
-            // Reset to object detection
-            document.querySelector('input[value="object_detection"]').checked = true;
-            modeCards.forEach(c => c.classList.remove('selected'));
-            document.querySelector('input[value="object_detection"]').closest('.mode-card').classList.add('selected');
-            selectedMode = 'object_detection';
-            // Update labels for object detection mode
-            queriesLabel.textContent = 'Objects to Detect (comma-separated)';
-            queriesHint.textContent = 'Example: person, car, dog, bicycle';
-            detectorSection.classList.remove('hidden');
-            segmenterSection.classList.add('hidden');
-        });
-        // Close modal on background click
-        modal.addEventListener('click', (e) => {
-            if (e.target === modal) {
-                modalClose.click();
-            }
-        });
     </script>
 </body>
 </html>

             display: none;
         }
     </style>
 </head>
 <body>
                         <div class="mode-title">Segmentation</div>
                     </label>
+                    <label class="mode-card">
                         <input type="radio" name="mode" value="drone_detection">
                         <div class="mode-title">Drone Detection</div>
                     </label>
                 </div>
             </div>
                 </div>
             </div>
+            <!-- Drone Model Selection -->
+            <div class="section hidden" id="droneModelSection">
+                <div class="input-group">
+                    <label for="droneModel">2. Select Drone Model</label>
+                    <select id="droneModel" disabled>
+                        <option value="drone_yolo">Drone YOLO (HF pretrained)</option>
+                    </select>
+                </div>
+            </div>
             <!-- Video Upload -->
             <div class="section">
                 <div class="input-group">
         </div>
     </div>
     <script>
         // State
         let selectedMode = 'object_detection';
         const queriesHint = document.getElementById('queriesHint');
         const detectorSection = document.getElementById('detectorSection');
         const segmenterSection = document.getElementById('segmenterSection');
+        const droneModelSection = document.getElementById('droneModelSection');
         const fileInput = document.getElementById('videoFile');
         const fileLabel = document.getElementById('fileLabel');
         const processBtn = document.getElementById('processBtn');
         const originalVideo = document.getElementById('originalVideo');
         const processedVideo = document.getElementById('processedVideo');
         const downloadBtn = document.getElementById('downloadBtn');
         // Mode selection handler
         modeCards.forEach(card => {
             card.addEventListener('click', (e) => {
                 const input = card.querySelector('input[type="radio"]');
                 const mode = input.value;
                 // Update selected state
                 modeCards.forEach(c => c.classList.remove('selected'));
                 card.classList.add('selected');
                     queriesHint.textContent = 'Example: person, car, dog, bicycle';
                     detectorSection.classList.remove('hidden');
                     segmenterSection.classList.add('hidden');
+                    droneModelSection.classList.add('hidden');
                 } else if (mode === 'segmentation') {
                     queriesLabel.textContent = 'Objects to Segment (comma-separated)';
                     queriesHint.textContent = 'Example: person, car, building, tree';
                     detectorSection.classList.add('hidden');
                     segmenterSection.classList.remove('hidden');
+                    droneModelSection.classList.add('hidden');
                 } else if (mode === 'drone_detection') {
+                    queriesLabel.textContent = 'Optional Labels (comma-separated)';
+                    queriesHint.textContent = 'Example: drone, quadcopter';
                     detectorSection.classList.add('hidden');
                     segmenterSection.classList.add('hidden');
+                    droneModelSection.classList.remove('hidden');
                 }
                 // Always show queries section
                 });
                 if (response.ok) {
+                    const contentType = response.headers.get('content-type') || '';
+                    if (contentType.includes('application/json')) {
                         const data = await response.json();
+                        alert(data.message || 'Request completed.');
+                        return;
                     }
+                    const blob = await response.blob();
+                    const videoUrl = URL.createObjectURL(blob);
+                    processedVideo.src = videoUrl;
+                    downloadBtn.href = videoUrl;
+                    resultsSection.classList.remove('hidden');
                 } else {
                     const error = await response.json();
                     alert(`Error: ${error.detail || error.error || 'Processing failed'}`);
             }
         });
     </script>
 </body>
 </html>

models/detectors/drone_yolo.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import logging
+import os
+from typing import List, Sequence
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from ultralytics import YOLO
+from models.detectors.base import DetectionResult, ObjectDetector
+class DroneYoloDetector(ObjectDetector):
+    """Drone detector backed by a YOLO model on the Hugging Face Hub."""
+    REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
+    DEFAULT_WEIGHT = "best.pt"
+    def __init__(self, score_threshold: float = 0.3) -> None:
+        self.name = "drone_yolo"
+        self.score_threshold = score_threshold
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        weight_file = os.getenv("DRONE_YOLO_WEIGHT", self.DEFAULT_WEIGHT)
+        logging.info(
+            "Loading drone YOLO weights %s/%s onto %s",
+            self.REPO_ID,
+            weight_file,
+            self.device,
+        )
+        weight_path = hf_hub_download(repo_id=self.REPO_ID, filename=weight_file)
+        self.model = YOLO(weight_path)
+        self.model.to(self.device)
+        self.class_names = self.model.names
+    def _filter_indices(self, label_names: Sequence[str], queries: Sequence[str]) -> List[int]:
+        if not queries:
+            return list(range(len(label_names)))
+        allowed = {query.lower().strip() for query in queries if query}
+        keep = [idx for idx, name in enumerate(label_names) if name.lower() in allowed]
+        return keep or list(range(len(label_names)))
+    def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
+        device_arg = 0 if self.device.startswith("cuda") else "cpu"
+        results = self.model.predict(
+            source=frame,
+            device=device_arg,
+            conf=self.score_threshold,
+            verbose=False,
+        )
+        result = results[0]
+        boxes = result.boxes
+        if boxes is None or boxes.xyxy is None:
+            empty = np.empty((0, 4), dtype=np.float32)
+            return DetectionResult(empty, [], [], [])
+        xyxy = boxes.xyxy.cpu().numpy()
+        scores = boxes.conf.cpu().numpy().tolist()
+        label_ids = boxes.cls.cpu().numpy().astype(int).tolist()
+        label_names = [self.class_names.get(idx, f"class_{idx}") for idx in label_ids]
+        keep_indices = self._filter_indices(label_names, queries)
+        xyxy = xyxy[keep_indices] if len(xyxy) else xyxy
+        scores = [scores[i] for i in keep_indices]
+        label_ids = [label_ids[i] for i in keep_indices]
+        label_names = [label_names[i] for i in keep_indices]
+        return DetectionResult(
+            boxes=xyxy,
+            scores=scores,
+            labels=label_ids,
+            label_names=label_names,
+        )

models/model_loader.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Callable, Dict, Optional
 from models.detectors.base import ObjectDetector
 from models.detectors.detr import DetrDetector
 from models.detectors.grounding_dino import GroundingDinoDetector
 from models.detectors.yolov8 import HuggingFaceYoloV8Detector
@@ -13,6 +14,7 @@ _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
     "hf_yolov8": HuggingFaceYoloV8Detector,
     "detr_resnet50": DetrDetector,
     "grounding_dino": GroundingDinoDetector,
 }

 from models.detectors.base import ObjectDetector
 from models.detectors.detr import DetrDetector
+from models.detectors.drone_yolo import DroneYoloDetector
 from models.detectors.grounding_dino import GroundingDinoDetector
 from models.detectors.yolov8 import HuggingFaceYoloV8Detector
     "hf_yolov8": HuggingFaceYoloV8Detector,
     "detr_resnet50": DetrDetector,
     "grounding_dino": GroundingDinoDetector,
+    "drone_yolo": DroneYoloDetector,
 }