Zhen Ye commited on
Commit
6c02470
·
1 Parent(s): 65dd451

removed owlv2

Browse files
Files changed (5) hide show
  1. app.py +2 -2
  2. demo.html +0 -1
  3. inference.py +2 -2
  4. models/detectors/owlv2.py +0 -56
  5. models/model_loader.py +1 -3
app.py CHANGED
@@ -70,7 +70,7 @@ async def detect_endpoint(
70
  video: UploadFile = File(...),
71
  mode: str = Form(...),
72
  queries: str = Form(""),
73
- detector: str = Form("owlv2_base"),
74
  segmenter: str = Form("sam3"),
75
  ):
76
  """
@@ -80,7 +80,7 @@ async def detect_endpoint(
80
  video: Video file to process
81
  mode: Detection mode (object_detection, segmentation, drone_detection)
82
  queries: Comma-separated object classes for object_detection mode
83
- detector: Model to use (owlv2_base, hf_yolov8, detr_resnet50, grounding_dino)
84
  segmenter: Segmentation model to use (sam3)
85
 
86
  Returns:
 
70
  video: UploadFile = File(...),
71
  mode: str = Form(...),
72
  queries: str = Form(""),
73
+ detector: str = Form("hf_yolov8"),
74
  segmenter: str = Form("sam3"),
75
  ):
76
  """
 
80
  video: Video file to process
81
  mode: Detection mode (object_detection, segmentation, drone_detection)
82
  queries: Comma-separated object classes for object_detection mode
83
+ detector: Model to use (hf_yolov8, detr_resnet50, grounding_dino)
84
  segmenter: Segmentation model to use (sam3)
85
 
86
  Returns:
demo.html CHANGED
@@ -374,7 +374,6 @@
374
  <div class="input-group">
375
  <label for="detector">2. Select Detection Model</label>
376
  <select id="detector">
377
- <option value="owlv2_base">OWLv2 (Open-vocabulary, Default)</option>
378
  <option value="hf_yolov8">YOLOv8 (Fast, COCO classes)</option>
379
  <option value="detr_resnet50">DETR ResNet-50 (Transformer-based)</option>
380
  <option value="grounding_dino">Grounding DINO (Open-vocabulary)</option>
 
374
  <div class="input-group">
375
  <label for="detector">2. Select Detection Model</label>
376
  <select id="detector">
 
377
  <option value="hf_yolov8">YOLOv8 (Fast, COCO classes)</option>
378
  <option value="detr_resnet50">DETR ResNet-50 (Transformer-based)</option>
379
  <option value="grounding_dino">Grounding DINO (Open-vocabulary)</option>
inference.py CHANGED
@@ -114,7 +114,7 @@ def run_inference(
114
  output_video_path: Path to write processed video
115
  queries: List of object classes to detect (e.g., ["person", "car"])
116
  max_frames: Optional frame limit for testing
117
- detector_name: Detector to use (default: owlv2_base)
118
 
119
  Returns:
120
  Path to processed output video
@@ -133,7 +133,7 @@ def run_inference(
133
  logging.info("Detection queries: %s", queries)
134
 
135
  # Select detector
136
- active_detector = detector_name or "owlv2_base"
137
  logging.info("Using detector: %s", active_detector)
138
 
139
  # Process frames
 
114
  output_video_path: Path to write processed video
115
  queries: List of object classes to detect (e.g., ["person", "car"])
116
  max_frames: Optional frame limit for testing
117
+ detector_name: Detector to use (default: hf_yolov8)
118
 
119
  Returns:
120
  Path to processed output video
 
133
  logging.info("Detection queries: %s", queries)
134
 
135
  # Select detector
136
+ active_detector = detector_name or "hf_yolov8"
137
  logging.info("Using detector: %s", active_detector)
138
 
139
  # Process frames
models/detectors/owlv2.py DELETED
@@ -1,56 +0,0 @@
1
- import logging
2
- from typing import Sequence
3
-
4
- import numpy as np
5
- import torch
6
- from transformers import Owlv2ForObjectDetection, Owlv2Processor
7
-
8
- from models.detectors.base import DetectionResult, ObjectDetector
9
-
10
-
11
- class Owlv2Detector(ObjectDetector):
12
- MODEL_NAME = "google/owlv2-base-patch32"
13
-
14
- def __init__(self) -> None:
15
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
- logging.info("Loading %s onto %s", self.MODEL_NAME, self.device)
17
- self.processor = Owlv2Processor.from_pretrained(self.MODEL_NAME)
18
- torch_dtype = torch.float16 if self.device.type == "cuda" else torch.float32
19
- self.model = Owlv2ForObjectDetection.from_pretrained(
20
- self.MODEL_NAME, torch_dtype=torch_dtype
21
- )
22
- self.model.to(self.device)
23
- self.model.eval()
24
- self.name = "owlv2_base"
25
-
26
- def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
27
- inputs = self.processor(text=queries, images=frame, return_tensors="pt")
28
- if hasattr(inputs, "to"):
29
- inputs = inputs.to(self.device)
30
- else:
31
- inputs = {
32
- key: value.to(self.device) if hasattr(value, "to") else value
33
- for key, value in inputs.items()
34
- }
35
- with torch.no_grad():
36
- outputs = self.model(**inputs)
37
- processed = self.processor.post_process_object_detection(
38
- outputs, threshold=0.3, target_sizes=[frame.shape[:2]]
39
- )[0]
40
- boxes = processed["boxes"]
41
- scores = processed.get("scores", [])
42
- labels = processed.get("labels", [])
43
- boxes_np = boxes.cpu().numpy() if hasattr(boxes, "cpu") else np.asarray(boxes)
44
- if hasattr(scores, "cpu"):
45
- scores_seq = scores.cpu().numpy().tolist()
46
- elif isinstance(scores, np.ndarray):
47
- scores_seq = scores.tolist()
48
- else:
49
- scores_seq = list(scores)
50
- if hasattr(labels, "cpu"):
51
- labels_seq = labels.cpu().numpy().tolist()
52
- elif isinstance(labels, np.ndarray):
53
- labels_seq = labels.tolist()
54
- else:
55
- labels_seq = list(labels)
56
- return DetectionResult(boxes=boxes_np, scores=scores_seq, labels=labels_seq)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/model_loader.py CHANGED
@@ -5,13 +5,11 @@ from typing import Callable, Dict, Optional
5
  from models.detectors.base import ObjectDetector
6
  from models.detectors.detr import DetrDetector
7
  from models.detectors.grounding_dino import GroundingDinoDetector
8
- from models.detectors.owlv2 import Owlv2Detector
9
  from models.detectors.yolov8 import HuggingFaceYoloV8Detector
10
 
11
- DEFAULT_DETECTOR = "owlv2_base"
12
 
13
  _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
14
- "owlv2_base": Owlv2Detector,
15
  "hf_yolov8": HuggingFaceYoloV8Detector,
16
  "detr_resnet50": DetrDetector,
17
  "grounding_dino": GroundingDinoDetector,
 
5
  from models.detectors.base import ObjectDetector
6
  from models.detectors.detr import DetrDetector
7
  from models.detectors.grounding_dino import GroundingDinoDetector
 
8
  from models.detectors.yolov8 import HuggingFaceYoloV8Detector
9
 
10
+ DEFAULT_DETECTOR = "hf_yolov8"
11
 
12
  _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
 
13
  "hf_yolov8": HuggingFaceYoloV8Detector,
14
  "detr_resnet50": DetrDetector,
15
  "grounding_dino": GroundingDinoDetector,