Spaces:
Paused
Paused
Zhen Ye
commited on
Commit
·
6c02470
1
Parent(s):
65dd451
removed owlv2
Browse files- app.py +2 -2
- demo.html +0 -1
- inference.py +2 -2
- models/detectors/owlv2.py +0 -56
- models/model_loader.py +1 -3
app.py
CHANGED
|
@@ -70,7 +70,7 @@ async def detect_endpoint(
|
|
| 70 |
video: UploadFile = File(...),
|
| 71 |
mode: str = Form(...),
|
| 72 |
queries: str = Form(""),
|
| 73 |
-
detector: str = Form("
|
| 74 |
segmenter: str = Form("sam3"),
|
| 75 |
):
|
| 76 |
"""
|
|
@@ -80,7 +80,7 @@ async def detect_endpoint(
|
|
| 80 |
video: Video file to process
|
| 81 |
mode: Detection mode (object_detection, segmentation, drone_detection)
|
| 82 |
queries: Comma-separated object classes for object_detection mode
|
| 83 |
-
detector: Model to use (
|
| 84 |
segmenter: Segmentation model to use (sam3)
|
| 85 |
|
| 86 |
Returns:
|
|
|
|
| 70 |
video: UploadFile = File(...),
|
| 71 |
mode: str = Form(...),
|
| 72 |
queries: str = Form(""),
|
| 73 |
+
detector: str = Form("hf_yolov8"),
|
| 74 |
segmenter: str = Form("sam3"),
|
| 75 |
):
|
| 76 |
"""
|
|
|
|
| 80 |
video: Video file to process
|
| 81 |
mode: Detection mode (object_detection, segmentation, drone_detection)
|
| 82 |
queries: Comma-separated object classes for object_detection mode
|
| 83 |
+
detector: Model to use (hf_yolov8, detr_resnet50, grounding_dino)
|
| 84 |
segmenter: Segmentation model to use (sam3)
|
| 85 |
|
| 86 |
Returns:
|
demo.html
CHANGED
|
@@ -374,7 +374,6 @@
|
|
| 374 |
<div class="input-group">
|
| 375 |
<label for="detector">2. Select Detection Model</label>
|
| 376 |
<select id="detector">
|
| 377 |
-
<option value="owlv2_base">OWLv2 (Open-vocabulary, Default)</option>
|
| 378 |
<option value="hf_yolov8">YOLOv8 (Fast, COCO classes)</option>
|
| 379 |
<option value="detr_resnet50">DETR ResNet-50 (Transformer-based)</option>
|
| 380 |
<option value="grounding_dino">Grounding DINO (Open-vocabulary)</option>
|
|
|
|
| 374 |
<div class="input-group">
|
| 375 |
<label for="detector">2. Select Detection Model</label>
|
| 376 |
<select id="detector">
|
|
|
|
| 377 |
<option value="hf_yolov8">YOLOv8 (Fast, COCO classes)</option>
|
| 378 |
<option value="detr_resnet50">DETR ResNet-50 (Transformer-based)</option>
|
| 379 |
<option value="grounding_dino">Grounding DINO (Open-vocabulary)</option>
|
inference.py
CHANGED
|
@@ -114,7 +114,7 @@ def run_inference(
|
|
| 114 |
output_video_path: Path to write processed video
|
| 115 |
queries: List of object classes to detect (e.g., ["person", "car"])
|
| 116 |
max_frames: Optional frame limit for testing
|
| 117 |
-
detector_name: Detector to use (default:
|
| 118 |
|
| 119 |
Returns:
|
| 120 |
Path to processed output video
|
|
@@ -133,7 +133,7 @@ def run_inference(
|
|
| 133 |
logging.info("Detection queries: %s", queries)
|
| 134 |
|
| 135 |
# Select detector
|
| 136 |
-
active_detector = detector_name or "
|
| 137 |
logging.info("Using detector: %s", active_detector)
|
| 138 |
|
| 139 |
# Process frames
|
|
|
|
| 114 |
output_video_path: Path to write processed video
|
| 115 |
queries: List of object classes to detect (e.g., ["person", "car"])
|
| 116 |
max_frames: Optional frame limit for testing
|
| 117 |
+
detector_name: Detector to use (default: hf_yolov8)
|
| 118 |
|
| 119 |
Returns:
|
| 120 |
Path to processed output video
|
|
|
|
| 133 |
logging.info("Detection queries: %s", queries)
|
| 134 |
|
| 135 |
# Select detector
|
| 136 |
+
active_detector = detector_name or "hf_yolov8"
|
| 137 |
logging.info("Using detector: %s", active_detector)
|
| 138 |
|
| 139 |
# Process frames
|
models/detectors/owlv2.py
DELETED
|
@@ -1,56 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
from typing import Sequence
|
| 3 |
-
|
| 4 |
-
import numpy as np
|
| 5 |
-
import torch
|
| 6 |
-
from transformers import Owlv2ForObjectDetection, Owlv2Processor
|
| 7 |
-
|
| 8 |
-
from models.detectors.base import DetectionResult, ObjectDetector
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class Owlv2Detector(ObjectDetector):
|
| 12 |
-
MODEL_NAME = "google/owlv2-base-patch32"
|
| 13 |
-
|
| 14 |
-
def __init__(self) -> None:
|
| 15 |
-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
-
logging.info("Loading %s onto %s", self.MODEL_NAME, self.device)
|
| 17 |
-
self.processor = Owlv2Processor.from_pretrained(self.MODEL_NAME)
|
| 18 |
-
torch_dtype = torch.float16 if self.device.type == "cuda" else torch.float32
|
| 19 |
-
self.model = Owlv2ForObjectDetection.from_pretrained(
|
| 20 |
-
self.MODEL_NAME, torch_dtype=torch_dtype
|
| 21 |
-
)
|
| 22 |
-
self.model.to(self.device)
|
| 23 |
-
self.model.eval()
|
| 24 |
-
self.name = "owlv2_base"
|
| 25 |
-
|
| 26 |
-
def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
|
| 27 |
-
inputs = self.processor(text=queries, images=frame, return_tensors="pt")
|
| 28 |
-
if hasattr(inputs, "to"):
|
| 29 |
-
inputs = inputs.to(self.device)
|
| 30 |
-
else:
|
| 31 |
-
inputs = {
|
| 32 |
-
key: value.to(self.device) if hasattr(value, "to") else value
|
| 33 |
-
for key, value in inputs.items()
|
| 34 |
-
}
|
| 35 |
-
with torch.no_grad():
|
| 36 |
-
outputs = self.model(**inputs)
|
| 37 |
-
processed = self.processor.post_process_object_detection(
|
| 38 |
-
outputs, threshold=0.3, target_sizes=[frame.shape[:2]]
|
| 39 |
-
)[0]
|
| 40 |
-
boxes = processed["boxes"]
|
| 41 |
-
scores = processed.get("scores", [])
|
| 42 |
-
labels = processed.get("labels", [])
|
| 43 |
-
boxes_np = boxes.cpu().numpy() if hasattr(boxes, "cpu") else np.asarray(boxes)
|
| 44 |
-
if hasattr(scores, "cpu"):
|
| 45 |
-
scores_seq = scores.cpu().numpy().tolist()
|
| 46 |
-
elif isinstance(scores, np.ndarray):
|
| 47 |
-
scores_seq = scores.tolist()
|
| 48 |
-
else:
|
| 49 |
-
scores_seq = list(scores)
|
| 50 |
-
if hasattr(labels, "cpu"):
|
| 51 |
-
labels_seq = labels.cpu().numpy().tolist()
|
| 52 |
-
elif isinstance(labels, np.ndarray):
|
| 53 |
-
labels_seq = labels.tolist()
|
| 54 |
-
else:
|
| 55 |
-
labels_seq = list(labels)
|
| 56 |
-
return DetectionResult(boxes=boxes_np, scores=scores_seq, labels=labels_seq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/model_loader.py
CHANGED
|
@@ -5,13 +5,11 @@ from typing import Callable, Dict, Optional
|
|
| 5 |
from models.detectors.base import ObjectDetector
|
| 6 |
from models.detectors.detr import DetrDetector
|
| 7 |
from models.detectors.grounding_dino import GroundingDinoDetector
|
| 8 |
-
from models.detectors.owlv2 import Owlv2Detector
|
| 9 |
from models.detectors.yolov8 import HuggingFaceYoloV8Detector
|
| 10 |
|
| 11 |
-
DEFAULT_DETECTOR = "
|
| 12 |
|
| 13 |
_REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
|
| 14 |
-
"owlv2_base": Owlv2Detector,
|
| 15 |
"hf_yolov8": HuggingFaceYoloV8Detector,
|
| 16 |
"detr_resnet50": DetrDetector,
|
| 17 |
"grounding_dino": GroundingDinoDetector,
|
|
|
|
| 5 |
from models.detectors.base import ObjectDetector
|
| 6 |
from models.detectors.detr import DetrDetector
|
| 7 |
from models.detectors.grounding_dino import GroundingDinoDetector
|
|
|
|
| 8 |
from models.detectors.yolov8 import HuggingFaceYoloV8Detector
|
| 9 |
|
| 10 |
+
DEFAULT_DETECTOR = "hf_yolov8"
|
| 11 |
|
| 12 |
_REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
|
|
|
|
| 13 |
"hf_yolov8": HuggingFaceYoloV8Detector,
|
| 14 |
"detr_resnet50": DetrDetector,
|
| 15 |
"grounding_dino": GroundingDinoDetector,
|