VisionAIGradio / app.py
Atulsinghbirla's picture
Rename cuda.py to app.py
8f8b3fa verified
import gradio as gr
import cv2
import torch
import numpy as np
import re
from ultralytics import YOLO
# =====================
# DEVICE SETUP
# =====================
device = "cuda" if torch.cuda.is_available() else "cpu"
# =====================
# LOAD MODELS
# =====================
object_model = YOLO("yolov8s.pt").to(device)
try:
currency_model = YOLO("best.pt").to(device)
except:
currency_model = None
OBJECT_CONF_THRESHOLD = 0.5
CURRENCY_CONF_THRESHOLD = 0.65
# =====================
# CORE LOGIC
# =====================
def process_image(image, mode):
if image is None:
return None, "No image uploaded."
frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
spoken_texts = []
if mode == "Object Detection":
results = object_model.predict(
frame, conf=OBJECT_CONF_THRESHOLD, device=device, verbose=False
)[0]
frame = results.plot()
for box in results.boxes:
cls_id = int(box.cls[0])
name = object_model.names[cls_id]
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
center_x = (x1 + x2) / 2
width = frame.shape[1]
if center_x < width / 3:
pos = "left"
elif center_x > 2 * width / 3:
pos = "right"
else:
pos = "center"
spoken_texts.append(f"{name} on {pos}")
elif mode == "Currency Detection":
if currency_model is None:
return image, "Currency model not found (best.pt missing)."
results = currency_model.predict(
frame, conf=CURRENCY_CONF_THRESHOLD, device=device, verbose=False
)[0]
frame = results.plot()
best_conf = 0
best_name = ""
for box in results.boxes:
conf = float(box.conf[0])
if conf > best_conf:
best_conf = conf
cls_id = int(box.cls[0])
best_name = currency_model.names[cls_id]
if best_name:
try:
val = re.findall(r"\d+", best_name)[0]
spoken_texts.append(f"{val} rupees")
except:
spoken_texts.append(best_name)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if not spoken_texts:
spoken_texts.append("Nothing detected")
return frame, " | ".join(spoken_texts)
# =====================
# GRADIO UI
# =====================
with gr.Blocks(title="Blind Assist System – NIELIT Ropar") as demo:
gr.Markdown(
"""
# 🦯 Blind Assist System
**GPU Accelerated | YOLOv8 | NIELIT Ropar (2025)**
Upload an image and choose detection mode.
"""
)
with gr.Row():
with gr.Column():
image_input = gr.Image(type="numpy", label="Upload Image")
mode = gr.Radio(
["Object Detection", "Currency Detection"],
value="Object Detection",
label="Detection Mode"
)
run_btn = gr.Button("Run Detection")
with gr.Column():
image_output = gr.Image(label="Processed Output")
text_output = gr.Textbox(label="Detected Information")
run_btn.click(
fn=process_image,
inputs=[image_input, mode],
outputs=[image_output, text_output]
)
demo.launch()