Spaces:

Atulsinghbirla
/

VisionAIGradio

Sleeping

App Files Files Community

VisionAIGradio / app.py

Atulsinghbirla

Rename cuda.py to app.py

8f8b3fa verified 23 days ago

raw

history blame contribute delete

3.36 kB

	import gradio as gr
	import cv2
	import torch
	import numpy as np
	import re
	from ultralytics import YOLO

	# =====================
	# DEVICE SETUP
	# =====================
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# =====================
	# LOAD MODELS
	# =====================
	object_model = YOLO("yolov8s.pt").to(device)

	try:
	currency_model = YOLO("best.pt").to(device)
	except:
	currency_model = None

	OBJECT_CONF_THRESHOLD = 0.5
	CURRENCY_CONF_THRESHOLD = 0.65


	# =====================
	# CORE LOGIC
	# =====================
	def process_image(image, mode):
	if image is None:
	return None, "No image uploaded."

	frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
	spoken_texts = []

	if mode == "Object Detection":
	results = object_model.predict(
	frame, conf=OBJECT_CONF_THRESHOLD, device=device, verbose=False
	)[0]

	frame = results.plot()

	for box in results.boxes:
	cls_id = int(box.cls[0])
	name = object_model.names[cls_id]

	x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
	center_x = (x1 + x2) / 2
	width = frame.shape[1]

	if center_x < width / 3:
	pos = "left"
	elif center_x > 2 * width / 3:
	pos = "right"
	else:
	pos = "center"

	spoken_texts.append(f"{name} on {pos}")

	elif mode == "Currency Detection":
	if currency_model is None:
	return image, "Currency model not found (best.pt missing)."

	results = currency_model.predict(
	frame, conf=CURRENCY_CONF_THRESHOLD, device=device, verbose=False
	)[0]

	frame = results.plot()

	best_conf = 0
	best_name = ""

	for box in results.boxes:
	conf = float(box.conf[0])
	if conf > best_conf:
	best_conf = conf
	cls_id = int(box.cls[0])
	best_name = currency_model.names[cls_id]

	if best_name:
	try:
	val = re.findall(r"\d+", best_name)[0]
	spoken_texts.append(f"{val} rupees")
	except:
	spoken_texts.append(best_name)

	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	if not spoken_texts:
	spoken_texts.append("Nothing detected")

	return frame, " \| ".join(spoken_texts)


	# =====================
	# GRADIO UI
	# =====================
	with gr.Blocks(title="Blind Assist System – NIELIT Ropar") as demo:
	gr.Markdown(
	"""
	# 🦯 Blind Assist System
	GPU Accelerated \| YOLOv8 \| NIELIT Ropar (2025)

	Upload an image and choose detection mode.
	"""
	)

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="numpy", label="Upload Image")
	mode = gr.Radio(
	["Object Detection", "Currency Detection"],
	value="Object Detection",
	label="Detection Mode"
	)
	run_btn = gr.Button("Run Detection")

	with gr.Column():
	image_output = gr.Image(label="Processed Output")
	text_output = gr.Textbox(label="Detected Information")

	run_btn.click(
	fn=process_image,
	inputs=[image_input, mode],
	outputs=[image_output, text_output]
	)

	demo.launch()