Spaces:

monesh2212
/

irisbuddy

Runtime error

App Files Files Community

irisbuddy / app.py

monesh2212

Update app.py

2964636 verified 4 months ago

raw

history blame contribute delete

4.28 kB

	import gradio as gr
	import onnxruntime as ort
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import time, os
	import pyttsx3 # for optional voice alerts

	# ---------------------------
	# CONFIG
	# ---------------------------
	MODEL_PATH = "model.onnx"
	INPUT_SIZE = (640, 640)
	CONF_THRESHOLD_DEFAULT = 0.35

	# Initialize voice engine
	engine = pyttsx3.init()
	engine.setProperty("rate", 180)

	# Load model
	print(f"Loading ONNX model from: {MODEL_PATH}")
	sess = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
	print("\nONNX Model Inputs:")
	for i, inp in enumerate(sess.get_inputs()):
	print(f" Input[{i}] name={inp.name}, shape={inp.shape}, dtype={inp.type}")
	print("\nONNX Model Outputs:")
	for i, out in enumerate(sess.get_outputs()):
	print(f" Output[{i}] name={out.name}, shape={out.shape}, dtype={out.type}")

	# Preprocess
	def preprocess_frame(frame_np):
	img = Image.fromarray(frame_np.astype("uint8"), "RGB")
	img_resized = img.resize(INPUT_SIZE)
	arr = np.array(img_resized).astype(np.float32) / 255.0
	arr = np.transpose(arr, (2, 0, 1))[np.newaxis, ...] # NCHW
	return arr

	# Postprocess
	def postprocess_outputs(outputs, orig_w, orig_h, conf_thresh=0.35):
	outs = [np.array(o) for o in outputs]
	cand = outs[0]
	if cand.ndim == 3 and cand.shape[0] == 1:
	cand = cand[0]

	detections = []
	if cand.ndim == 2 and cand.shape[1] >= 6:
	for row in cand:
	cx, cy, w, h = row[0], row[1], row[2], row[3]
	obj_conf = float(row[4])
	class_probs = row[5:]
	best_idx = int(np.argmax(class_probs)) if class_probs.size > 0 else 0
	cls_conf = float(class_probs[best_idx]) if class_probs.size > 0 else 1.0
	score = obj_conf * cls_conf
	if score < conf_thresh:
	continue
	if max(cx, cy, w, h) <= 1.0:
	x1 = (cx - w / 2) * orig_w
	y1 = (cy - h / 2) * orig_h
	x2 = (cx + w / 2) * orig_w
	y2 = (cy + h / 2) * orig_h
	else:
	x1, y1, x2, y2 = cx - w/2, cy - h/2, cx + w/2, cy + h/2
	detections.append({"box": [x1, y1, x2, y2], "score": score, "class": best_idx})
	return detections

	# Draw boxes
	def draw_boxes_on_image(pil_img, detections):
	img = pil_img.convert("RGB")
	draw = ImageDraw.Draw(img)
	font = ImageFont.load_default()
	for d in detections:
	x1, y1, x2, y2 = d["box"]
	label = f"Class {d['class']}"
	txt = f"{label} {d['score']:.2f}"
	draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
	draw.text((x1, max(0, y1 - 12)), txt, fill="red", font=font)
	return img

	# Voice alert
	last_spoken = ""
	def speak_alert(detections):
	global last_spoken
	if not detections:
	return
	labels_detected = [f"class {d['class']}" for d in detections]
	msg = ", ".join(set(labels_detected))
	if msg != last_spoken:
	engine.say(f"Detected: {msg}")
	engine.runAndWait()
	last_spoken = msg

	# Main function
	def predict_live(frame, conf_threshold):
	if frame is None:
	return None, "No frame"
	orig_h, orig_w = frame.shape[:2]
	input_tensor = preprocess_frame(frame)
	input_name = sess.get_inputs()[0].name
	outputs = sess.run(None, {input_name: input_tensor})
	detections = postprocess_outputs(outputs, orig_w, orig_h, conf_thresh=conf_threshold)

	pil_img = Image.fromarray(frame.astype("uint8"), "RGB")
	out_img = draw_boxes_on_image(pil_img, detections)

	speak_alert(detections)

	debug_txt = (
	f"Model: {os.path.basename(MODEL_PATH)}\n"
	f"Detections: {len(detections)}"
	)
	return out_img, debug_txt

	# Gradio interface with webcam + slider
	iface = gr.Interface(
	fn=predict_live,
	inputs=[
	gr.Image(sources=["webcam"], type="numpy", label="Live Camera"),
	gr.Slider(0.05, 0.9, value=CONF_THRESHOLD_DEFAULT, step=0.05, label="Confidence Threshold")
	],
	outputs=[gr.Image(type="pil"), gr.Textbox(lines=4)],
	live=True,
	title="ONNX Live Camera Detection",
	description="Continuous live detection with bounding boxes + voice alerts"
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)