Spaces:

claytonsds
/

VisionGauge

Sleeping

File size: 3,422 Bytes

76f601a

import gradio as gr
import torch
import numpy as np
import cv2
import json
from VisionGauge.models import VisionGauge

model = VisionGauge()

def VisionGauge_Inference(imagem):
    if imagem is None:
        return None, "No image received."

    frame_rgb = imagem.copy()

    # Resize
    target_width = 640 
    h, w = frame_rgb.shape[:2]
    scale = target_width / w
    new_h = int(h * scale)
    frame_rgb = cv2.resize(frame_rgb, (target_width, new_h))

    # Convert to tensor
    img_tensor = (
        torch.from_numpy(frame_rgb)
        .permute(2, 0, 1)
        .float()
        .unsqueeze(0)
    )

    # Model inference
    boxes, preds = model.predict(img_tensor)
    boxes = boxes[0]
    preds = preds[0]

    # Annotate frame
    frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
    annotated_bgr = model.annotate_frame(
        frame_bgr,
        boxes,
        preds.squeeze(-1),
        frame_color="#551bb3",
        font_color="#ffffff",
        fontsize=10,
        frame_thickness=4,
    )
    annotated_rgb = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB)

    # Prepare JSON results
    resultados = {}  # dictionary to store boxes indexed by ID

    for i in range(boxes.shape[0]):
        x1, y1, x2, y2 = boxes[i].int().tolist()

        # Skip invalid boxes
        if x1 == y1 == x2 == y2 == 0:
            continue

        pred = preds[i].item()

        resultados[str(i)] = {
            "coords": {
                "x1": x1,
                "y1": y1,
                "x2": x2,
                "y2": y2
            },
            "h_p": round(pred, 2)
        }

    # If no objects detected, return empty image dictionary
    if not resultados:
        resultado_json = json.dumps({"values": {}}, indent=2)
    else:
        resultado_json = json.dumps({"values": resultados}, indent=2)

    return annotated_rgb, resultado_json


def update_mode(mode):
    if mode == "Image":
        return (
            gr.update(visible=True),
            gr.update(visible=False),
            gr.update(visible=True),
        )
    else:
        return (
            gr.update(visible=False),
            gr.update(visible=True),
            gr.update(visible=False),
        )


with gr.Blocks() as demo:

    gr.Markdown("# VisionGauge Demo")

    mode_selector = gr.Radio(
        ["Image", "Live Capture"],
        value="Image",
        label="Select Input Mode"
    )

    input_img = gr.Image(
        sources=["upload"],
        type="numpy",
        visible=True, webcam_options=gr.WebcamOptions(mirror=False)
    )

    webcam_img = gr.Image(
        sources=["webcam"],
        type="numpy",
        streaming=True,
        visible=False, webcam_options=gr.WebcamOptions(mirror=False),
    )

    output_img = gr.Image(label="Result")
    output_txt = gr.Textbox(label="Predictions", show_label=True, buttons=["copy"])

    btn = gr.Button("Run model", visible=True)

    # Update interface when changing mode
    mode_selector.change(
        update_mode,
        inputs=mode_selector,
        outputs=[input_img, webcam_img, btn]
    )
    
    # IMAGE mode (button)
    btn.click(
        VisionGauge_Inference,
        inputs=input_img,
        outputs=[output_img, output_txt]
    )
    
    # LIVE mode (automatic stream)
    webcam_img.stream(
        VisionGauge_Inference,
        inputs=webcam_img,
        outputs=[output_img, output_txt],
    )

demo.launch(share=True)