monesh2212 commited on
Commit
2964636
·
verified ·
1 Parent(s): a836cf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -122
app.py CHANGED
@@ -1,188 +1,125 @@
1
- # app.py
2
  import gradio as gr
3
  import onnxruntime as ort
4
  import numpy as np
5
  from PIL import Image, ImageDraw, ImageFont
6
  import time, os
 
7
 
8
  # ---------------------------
9
  # CONFIG
10
  # ---------------------------
11
- MODEL_PATH = "model.onnx" # Ensure model.onnx is in repo
12
- LABELS_PATH = "labels.txt" # Optional: one label per line
13
- CONF_THRESHOLD = 0.35
14
- PREVIEW_INPUT_SIZE = (640, 640) # Change if model expects different input size
15
 
16
- # ---------------------------
17
- # LOAD LABELS
18
- # ---------------------------
19
- if os.path.exists(LABELS_PATH):
20
- with open(LABELS_PATH, "r") as f:
21
- LABELS = [l.strip() for l in f.readlines() if l.strip()]
22
- else:
23
- LABELS = None
24
 
25
- # ---------------------------
26
- # LOAD MODEL
27
- # ---------------------------
28
  print(f"Loading ONNX model from: {MODEL_PATH}")
29
  sess = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
30
-
31
  print("\nONNX Model Inputs:")
32
  for i, inp in enumerate(sess.get_inputs()):
33
- print(f" input[{i}] name={inp.name}, shape={inp.shape}, dtype={inp.type}")
34
-
35
  print("\nONNX Model Outputs:")
36
  for i, out in enumerate(sess.get_outputs()):
37
- print(f" output[{i}] name={out.name}, shape={out.shape}, dtype={out.type}")
38
 
39
- # ---------------------------
40
- # PREPROCESS FUNCTION
41
- # ---------------------------
42
- def preprocess_frame(frame_np, input_size=PREVIEW_INPUT_SIZE):
43
  img = Image.fromarray(frame_np.astype("uint8"), "RGB")
44
- img_resized = img.resize(input_size)
45
- arr = np.array(img_resized).astype(np.float32) / 255.0 # normalize 0..1
46
- arr = np.transpose(arr, (2, 0, 1))[np.newaxis, ...] # to NCHW
47
  return arr
48
 
49
- # ---------------------------
50
- # POSTPROCESS FUNCTION (FIXED)
51
- # ---------------------------
52
- def postprocess_outputs(outputs, orig_w, orig_h, conf_thresh=0.35, debug=False):
53
- outs = [o if isinstance(o, np.ndarray) else np.array(o) for o in outputs]
54
- if len(outs) == 0:
55
- return []
56
-
57
- cand = None
58
- for o in outs:
59
- if o.ndim >= 2 and o.shape[-1] >= 4:
60
- cand = o
61
- break
62
- if cand is None:
63
- cand = outs[0]
64
-
65
  if cand.ndim == 3 and cand.shape[0] == 1:
66
  cand = cand[0]
67
 
68
  detections = []
69
-
70
- if debug:
71
- print("Raw chosen output shape:", cand.shape)
72
- try:
73
- print("Sample rows:", cand.reshape(-1, cand.shape[-1])[:5])
74
- except Exception:
75
- pass
76
-
77
- # Case 1: Nx6
78
- if cand.ndim == 2 and cand.shape[1] == 6:
79
- for r in cand:
80
- x1, y1, x2, y2, score, cls = r
81
- if score < conf_thresh:
82
- continue
83
- if max(x1, y1, x2, y2) <= 1.0:
84
- x1, y1, x2, y2 = x1*orig_w, y1*orig_h, x2*orig_w, y2*orig_h
85
- detections.append({"box": [x1, y1, x2, y2], "score": float(score), "class": int(cls)})
86
- return detections
87
-
88
- # Case 2: YOLO-style Nx(5+num_classes)
89
  if cand.ndim == 2 and cand.shape[1] >= 6:
90
- for r in cand:
91
- cx, cy, w, h = r[0], r[1], r[2], r[3]
92
- obj_conf = float(r[4])
93
- class_probs = r[5:]
94
  best_idx = int(np.argmax(class_probs)) if class_probs.size > 0 else 0
95
  cls_conf = float(class_probs[best_idx]) if class_probs.size > 0 else 1.0
96
  score = obj_conf * cls_conf
97
  if score < conf_thresh:
98
  continue
99
  if max(cx, cy, w, h) <= 1.0:
100
- x1 = (cx - w/2) * orig_w
101
- y1 = (cy - h/2) * orig_h
102
- x2 = (cx + w/2) * orig_w
103
- y2 = (cy + h/2) * orig_h
104
  else:
105
  x1, y1, x2, y2 = cx - w/2, cy - h/2, cx + w/2, cy + h/2
106
  detections.append({"box": [x1, y1, x2, y2], "score": score, "class": best_idx})
107
- return detections
108
-
109
- # Case 3: Separate outputs (boxes, scores, labels)
110
- if len(outs) >= 3:
111
- boxes_arr = next((o for o in outs if o.ndim == 2 and o.shape[1] == 4), None)
112
- scores_arr = next((o for o in outs if o.ndim <= 2 and o.size == boxes_arr.shape[0]), None) if boxes_arr is not None else None
113
- labels_arr = next((o for o in outs if o.ndim <= 2 and o.size == boxes_arr.shape[0]), None) if boxes_arr is not None else None
114
- if boxes_arr is not None:
115
- for i, bx in enumerate(boxes_arr):
116
- score = float(scores_arr[i]) if scores_arr is not None else 1.0
117
- if score < conf_thresh:
118
- continue
119
- if max(bx) <= 1.0:
120
- x1, y1, x2, y2 = bx[0]*orig_w, bx[1]*orig_h, bx[2]*orig_w, bx[3]*orig_h
121
- else:
122
- x1, y1, x2, y2 = bx
123
- detections.append({"box": [x1, y1, x2, y2], "score": score, "class": int(labels_arr[i]) if labels_arr is not None else 0})
124
- return detections
125
-
126
- if debug:
127
- print("Could not parse model outputs automatically.")
128
  return detections
129
 
130
- # ---------------------------
131
- # DRAW BOXES ON IMAGE
132
- # ---------------------------
133
  def draw_boxes_on_image(pil_img, detections):
134
  img = pil_img.convert("RGB")
135
  draw = ImageDraw.Draw(img)
136
  font = ImageFont.load_default()
137
  for d in detections:
138
  x1, y1, x2, y2 = d["box"]
139
- label = str(d["class"])
140
- if LABELS and 0 <= d["class"] < len(LABELS):
141
- label = LABELS[d["class"]]
142
  txt = f"{label} {d['score']:.2f}"
143
  draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
144
  draw.text((x1, max(0, y1 - 12)), txt, fill="red", font=font)
145
  return img
146
 
147
- # ---------------------------
148
- # MAIN PREDICT FUNCTION
149
- # ---------------------------
150
- def predict_live(frame):
 
 
 
 
 
 
 
 
 
 
 
151
  if frame is None:
152
  return None, "No frame"
153
- t0 = time.time()
154
- orig_h, orig_w = frame.shape[0], frame.shape[1]
155
- input_tensor = preprocess_frame(frame, PREVIEW_INPUT_SIZE)
156
  input_name = sess.get_inputs()[0].name
157
- try:
158
- outputs = sess.run(None, {input_name: input_tensor})
159
- except Exception as e:
160
- return None, f"ONNX runtime error: {e}"
161
 
162
- detections = postprocess_outputs(outputs, orig_w, orig_h, conf_thresh=CONF_THRESHOLD, debug=True)
163
  pil_img = Image.fromarray(frame.astype("uint8"), "RGB")
164
  out_img = draw_boxes_on_image(pil_img, detections)
165
 
166
- t1 = time.time()
 
167
  debug_txt = (
168
  f"Model: {os.path.basename(MODEL_PATH)}\n"
169
- f"Input shape: {sess.get_inputs()[0].shape}\n"
170
- f"Output(s): {[o.shape for o in sess.get_outputs()]}\n"
171
- f"Detections: {len(detections)}\n"
172
- f"Inference time: {(t1 - t0)*1000:.1f} ms"
173
  )
174
  return out_img, debug_txt
175
 
176
- # ---------------------------
177
- # GRADIO INTERFACE
178
- # ---------------------------
179
  iface = gr.Interface(
180
  fn=predict_live,
181
- inputs=gr.Image(source="webcam", type="numpy"),
182
- outputs=[gr.Image(type="pil"), gr.Textbox(lines=6)],
 
 
 
183
  live=True,
184
- title="ONNX Live Detection",
185
- description="Real-time detection using your ONNX model. Adjust CONF_THRESHOLD or input size if needed."
186
  )
187
 
188
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
  import onnxruntime as ort
3
  import numpy as np
4
  from PIL import Image, ImageDraw, ImageFont
5
  import time, os
6
+ import pyttsx3 # for optional voice alerts
7
 
8
  # ---------------------------
9
  # CONFIG
10
  # ---------------------------
11
+ MODEL_PATH = "model.onnx"
12
+ INPUT_SIZE = (640, 640)
13
+ CONF_THRESHOLD_DEFAULT = 0.35
 
14
 
15
+ # Initialize voice engine
16
+ engine = pyttsx3.init()
17
+ engine.setProperty("rate", 180)
 
 
 
 
 
18
 
19
+ # Load model
 
 
20
  print(f"Loading ONNX model from: {MODEL_PATH}")
21
  sess = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
 
22
  print("\nONNX Model Inputs:")
23
  for i, inp in enumerate(sess.get_inputs()):
24
+ print(f" Input[{i}] name={inp.name}, shape={inp.shape}, dtype={inp.type}")
 
25
  print("\nONNX Model Outputs:")
26
  for i, out in enumerate(sess.get_outputs()):
27
+ print(f" Output[{i}] name={out.name}, shape={out.shape}, dtype={out.type}")
28
 
29
+ # Preprocess
30
+ def preprocess_frame(frame_np):
 
 
31
  img = Image.fromarray(frame_np.astype("uint8"), "RGB")
32
+ img_resized = img.resize(INPUT_SIZE)
33
+ arr = np.array(img_resized).astype(np.float32) / 255.0
34
+ arr = np.transpose(arr, (2, 0, 1))[np.newaxis, ...] # NCHW
35
  return arr
36
 
37
+ # Postprocess
38
+ def postprocess_outputs(outputs, orig_w, orig_h, conf_thresh=0.35):
39
+ outs = [np.array(o) for o in outputs]
40
+ cand = outs[0]
 
 
 
 
 
 
 
 
 
 
 
 
41
  if cand.ndim == 3 and cand.shape[0] == 1:
42
  cand = cand[0]
43
 
44
  detections = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  if cand.ndim == 2 and cand.shape[1] >= 6:
46
+ for row in cand:
47
+ cx, cy, w, h = row[0], row[1], row[2], row[3]
48
+ obj_conf = float(row[4])
49
+ class_probs = row[5:]
50
  best_idx = int(np.argmax(class_probs)) if class_probs.size > 0 else 0
51
  cls_conf = float(class_probs[best_idx]) if class_probs.size > 0 else 1.0
52
  score = obj_conf * cls_conf
53
  if score < conf_thresh:
54
  continue
55
  if max(cx, cy, w, h) <= 1.0:
56
+ x1 = (cx - w / 2) * orig_w
57
+ y1 = (cy - h / 2) * orig_h
58
+ x2 = (cx + w / 2) * orig_w
59
+ y2 = (cy + h / 2) * orig_h
60
  else:
61
  x1, y1, x2, y2 = cx - w/2, cy - h/2, cx + w/2, cy + h/2
62
  detections.append({"box": [x1, y1, x2, y2], "score": score, "class": best_idx})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  return detections
64
 
65
+ # Draw boxes
 
 
66
  def draw_boxes_on_image(pil_img, detections):
67
  img = pil_img.convert("RGB")
68
  draw = ImageDraw.Draw(img)
69
  font = ImageFont.load_default()
70
  for d in detections:
71
  x1, y1, x2, y2 = d["box"]
72
+ label = f"Class {d['class']}"
 
 
73
  txt = f"{label} {d['score']:.2f}"
74
  draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
75
  draw.text((x1, max(0, y1 - 12)), txt, fill="red", font=font)
76
  return img
77
 
78
+ # Voice alert
79
+ last_spoken = ""
80
+ def speak_alert(detections):
81
+ global last_spoken
82
+ if not detections:
83
+ return
84
+ labels_detected = [f"class {d['class']}" for d in detections]
85
+ msg = ", ".join(set(labels_detected))
86
+ if msg != last_spoken:
87
+ engine.say(f"Detected: {msg}")
88
+ engine.runAndWait()
89
+ last_spoken = msg
90
+
91
+ # Main function
92
+ def predict_live(frame, conf_threshold):
93
  if frame is None:
94
  return None, "No frame"
95
+ orig_h, orig_w = frame.shape[:2]
96
+ input_tensor = preprocess_frame(frame)
 
97
  input_name = sess.get_inputs()[0].name
98
+ outputs = sess.run(None, {input_name: input_tensor})
99
+ detections = postprocess_outputs(outputs, orig_w, orig_h, conf_thresh=conf_threshold)
 
 
100
 
 
101
  pil_img = Image.fromarray(frame.astype("uint8"), "RGB")
102
  out_img = draw_boxes_on_image(pil_img, detections)
103
 
104
+ speak_alert(detections)
105
+
106
  debug_txt = (
107
  f"Model: {os.path.basename(MODEL_PATH)}\n"
108
+ f"Detections: {len(detections)}"
 
 
 
109
  )
110
  return out_img, debug_txt
111
 
112
+ # Gradio interface with webcam + slider
 
 
113
  iface = gr.Interface(
114
  fn=predict_live,
115
+ inputs=[
116
+ gr.Image(sources=["webcam"], type="numpy", label="Live Camera"),
117
+ gr.Slider(0.05, 0.9, value=CONF_THRESHOLD_DEFAULT, step=0.05, label="Confidence Threshold")
118
+ ],
119
+ outputs=[gr.Image(type="pil"), gr.Textbox(lines=4)],
120
  live=True,
121
+ title="ONNX Live Camera Detection",
122
+ description="Continuous live detection with bounding boxes + voice alerts"
123
  )
124
 
125
  if __name__ == "__main__":