socks22 commited on
Commit
a2e9c4d
·
1 Parent(s): c00168f

fix detect

Browse files
.gitignore CHANGED
@@ -6,4 +6,5 @@ training/output/
6
  training/occupied_spot_training/
7
 
8
 
9
- .DS_Store
 
 
6
  training/occupied_spot_training/
7
 
8
 
9
+ .DS_Store
10
+ __pycache__
frontend/src/app.tsx CHANGED
@@ -15,7 +15,7 @@ export function App() {
15
  setState('loading');
16
  setError('');
17
  try {
18
- const threshold = model === 'spots' ? 0.83 : 0.95;
19
  const result = await detectCars(file, threshold, model);
20
  setData(result);
21
  setState('results');
@@ -51,6 +51,9 @@ export function App() {
51
  Spot Occupancy
52
  </button>
53
  </div>
 
 
 
54
  <Upload onFile={handleFile} />
55
  </>
56
  )}
 
15
  setState('loading');
16
  setError('');
17
  try {
18
+ const threshold = model === 'spots' ? 0.16 : 0.2;
19
  const result = await detectCars(file, threshold, model);
20
  setData(result);
21
  setState('results');
 
51
  Spot Occupancy
52
  </button>
53
  </div>
54
+ <p class="beta-disclaimer">
55
+ BETA: Models were quickly trained and not threshold-tested extensively.
56
+ </p>
57
  <Upload onFile={handleFile} />
58
  </>
59
  )}
frontend/src/style.css CHANGED
@@ -26,6 +26,13 @@ h1 {
26
  color: #fff;
27
  }
28
 
 
 
 
 
 
 
 
29
  /* Upload zone */
30
  .upload-zone {
31
  border: 2px dashed #555;
 
26
  color: #fff;
27
  }
28
 
29
+ /* Beta disclaimer */
30
+ .beta-disclaimer {
31
+ font-size: 0.8rem;
32
+ color: #999;
33
+ margin-bottom: 1rem;
34
+ }
35
+
36
  /* Upload zone */
37
  .upload-zone {
38
  border: 2px dashed #555;
server/detect.py CHANGED
@@ -83,20 +83,20 @@ def postprocess(
83
 
84
  Each dict has keys: "bbox" (list[float] xyxy), "score" (float),
85
  "class_id" (int), "class_name" (str).
 
 
 
86
  """
87
  boxes = outputs["dets"].reshape(-1, 4)
88
  logits = outputs["labels"].reshape(boxes.shape[0], -1)
 
89
 
90
- num_classes = logits.shape[1] - 1 # last column is no-object
91
-
92
- # Softmax over all columns
93
- exp = np.exp(logits - logits.max(axis=1, keepdims=True))
94
- probs = exp / exp.sum(axis=1, keepdims=True)
95
 
96
- # Best real class per detection
97
- real_probs = probs[:, :num_classes]
98
- class_ids = real_probs.argmax(axis=1)
99
- scores = real_probs[np.arange(len(class_ids)), class_ids]
100
 
101
  # Normalized cxcywh -> pixel xyxy
102
  orig_h, orig_w = orig_hw
 
83
 
84
  Each dict has keys: "bbox" (list[float] xyxy), "score" (float),
85
  "class_id" (int), "class_name" (str).
86
+
87
+ RF-DETR uses per-class sigmoid (not softmax). Each logit column is an
88
+ independent binary classifier — there is no "no-object" column.
89
  """
90
  boxes = outputs["dets"].reshape(-1, 4)
91
  logits = outputs["labels"].reshape(boxes.shape[0], -1)
92
+ num_classes = logits.shape[1]
93
 
94
+ # Sigmoid per logit (independent binary classifiers)
95
+ probs = 1.0 / (1.0 + np.exp(-logits))
 
 
 
96
 
97
+ # Best class per detection
98
+ class_ids = probs.argmax(axis=1)
99
+ scores = probs[np.arange(len(class_ids)), class_ids]
 
100
 
101
  # Normalized cxcywh -> pixel xyxy
102
  orig_h, orig_w = orig_hw
training/inference.py CHANGED
@@ -17,13 +17,15 @@ MODEL_CLASSES: dict[str, type] = {
17
  "large": rfdetr.RFDETRLarge,
18
  }
19
 
 
 
20
 
21
  def run_inference(
22
  image_paths: list[Path],
23
  checkpoint: str | Path,
24
  model_size: str = "medium",
25
  threshold: float = 0.5,
26
- output_dir: str | Path = "./inference_output",
27
  ) -> None:
28
  """Load an RF-DETR checkpoint and run detection on input images.
29
 
@@ -53,7 +55,10 @@ def run_inference(
53
  detections: sv.Detections = model.predict(str(image_path), threshold=threshold)
54
 
55
  image = cv2.imread(str(image_path))
56
- labels = [f"car {conf:.2f}" for conf in detections.confidence]
 
 
 
57
 
58
  annotated = box_annotator.annotate(scene=image.copy(), detections=detections)
59
  annotated = label_annotator.annotate(
 
17
  "large": rfdetr.RFDETRLarge,
18
  }
19
 
20
+ prediction_classes = {0: "empty_spot", 1: "parked_car"}
21
+
22
 
23
  def run_inference(
24
  image_paths: list[Path],
25
  checkpoint: str | Path,
26
  model_size: str = "medium",
27
  threshold: float = 0.5,
28
+ output_dir: str | Path = "./inference_output2",
29
  ) -> None:
30
  """Load an RF-DETR checkpoint and run detection on input images.
31
 
 
55
  detections: sv.Detections = model.predict(str(image_path), threshold=threshold)
56
 
57
  image = cv2.imread(str(image_path))
58
+ labels = [
59
+ f"{prediction_classes[detections.class_id[i]]} {conf:.2f}"
60
+ for i, conf in enumerate(detections.confidence)
61
+ ]
62
 
63
  annotated = box_annotator.annotate(scene=image.copy(), detections=detections)
64
  annotated = label_annotator.annotate(
training/inference_onnx.py CHANGED
@@ -34,17 +34,18 @@ def postprocess(
34
  orig_hw: tuple[int, int],
35
  threshold: float,
36
  ) -> sv.Detections:
37
- """Convert ONNX outputs to supervision Detections."""
38
- # RF-DETR ONNX outputs: dets [1,300,4] (normalized cxcywh), labels [1,300,num_classes+1] (raw logits)
 
 
 
39
  boxes = outputs["dets"].reshape(-1, 4)
40
  logits = outputs["labels"].reshape(boxes.shape[0], -1)
41
 
42
- # Softmax to get probabilities; last column is no-object
43
- exp = np.exp(logits - logits.max(axis=1, keepdims=True))
44
- probs = exp / exp.sum(axis=1, keepdims=True)
45
- class_probs = probs[:, :-1]
46
- class_ids = class_probs.argmax(axis=1)
47
- scores = class_probs.max(axis=1)
48
 
49
  # Convert from normalized cxcywh to xyxy in pixel coordinates
50
  orig_h, orig_w = orig_hw
 
34
  orig_hw: tuple[int, int],
35
  threshold: float,
36
  ) -> sv.Detections:
37
+ """Convert ONNX outputs to supervision Detections.
38
+
39
+ RF-DETR uses per-class sigmoid (not softmax). Each logit column is an
40
+ independent binary classifier — there is no "no-object" column.
41
+ """
42
  boxes = outputs["dets"].reshape(-1, 4)
43
  logits = outputs["labels"].reshape(boxes.shape[0], -1)
44
 
45
+ # Sigmoid per logit (independent binary classifiers)
46
+ probs = 1.0 / (1.0 + np.exp(-logits))
47
+ class_ids = probs.argmax(axis=1)
48
+ scores = probs.max(axis=1)
 
 
49
 
50
  # Convert from normalized cxcywh to xyxy in pixel coordinates
51
  orig_h, orig_w = orig_hw