import gradio as gr
import cv2
import torch
import numpy as np
import re
from ultralytics import YOLO

# =====================
# DEVICE SETUP
# =====================
device = "cuda" if torch.cuda.is_available() else "cpu"

# =====================
# LOAD MODELS
# =====================
object_model = YOLO("yolov8s.pt").to(device)

try:
    currency_model = YOLO("best.pt").to(device)
except:
    currency_model = None

OBJECT_CONF_THRESHOLD = 0.5
CURRENCY_CONF_THRESHOLD = 0.65


# =====================
# CORE LOGIC
# =====================
def process_image(image, mode):
    if image is None:
        return None, "No image uploaded."

    frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    spoken_texts = []

    if mode == "Object Detection":
        results = object_model.predict(
            frame, conf=OBJECT_CONF_THRESHOLD, device=device, verbose=False
        )[0]

        frame = results.plot()

        for box in results.boxes:
            cls_id = int(box.cls[0])
            name = object_model.names[cls_id]

            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            center_x = (x1 + x2) / 2
            width = frame.shape[1]

            if center_x < width / 3:
                pos = "left"
            elif center_x > 2 * width / 3:
                pos = "right"
            else:
                pos = "center"

            spoken_texts.append(f"{name} on {pos}")

    elif mode == "Currency Detection":
        if currency_model is None:
            return image, "Currency model not found (best.pt missing)."

        results = currency_model.predict(
            frame, conf=CURRENCY_CONF_THRESHOLD, device=device, verbose=False
        )[0]

        frame = results.plot()

        best_conf = 0
        best_name = ""

        for box in results.boxes:
            conf = float(box.conf[0])
            if conf > best_conf:
                best_conf = conf
                cls_id = int(box.cls[0])
                best_name = currency_model.names[cls_id]

        if best_name:
            try:
                val = re.findall(r"\d+", best_name)[0]
                spoken_texts.append(f"{val} rupees")
            except:
                spoken_texts.append(best_name)

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    if not spoken_texts:
        spoken_texts.append("Nothing detected")

    return frame, " | ".join(spoken_texts)


# =====================
# GRADIO UI
# =====================
with gr.Blocks(title="Blind Assist System – NIELIT Ropar") as demo:
    gr.Markdown(
        """
        # 🦯 Blind Assist System  
        **GPU Accelerated | YOLOv8 | NIELIT Ropar (2025)**  

        Upload an image and choose detection mode.
        """
    )

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="numpy", label="Upload Image")
            mode = gr.Radio(
                ["Object Detection", "Currency Detection"],
                value="Object Detection",
                label="Detection Mode"
            )
            run_btn = gr.Button("Run Detection")

        with gr.Column():
            image_output = gr.Image(label="Processed Output")
            text_output = gr.Textbox(label="Detected Information")

    run_btn.click(
        fn=process_image,
        inputs=[image_input, mode],
        outputs=[image_output, text_output]
    )

demo.launch()