VisionGauge / app.py
claytonsds's picture
Create app.py
76f601a verified
import gradio as gr
import torch
import numpy as np
import cv2
import json
from VisionGauge.models import VisionGauge
model = VisionGauge()
def VisionGauge_Inference(imagem):
if imagem is None:
return None, "No image received."
frame_rgb = imagem.copy()
# Resize
target_width = 640
h, w = frame_rgb.shape[:2]
scale = target_width / w
new_h = int(h * scale)
frame_rgb = cv2.resize(frame_rgb, (target_width, new_h))
# Convert to tensor
img_tensor = (
torch.from_numpy(frame_rgb)
.permute(2, 0, 1)
.float()
.unsqueeze(0)
)
# Model inference
boxes, preds = model.predict(img_tensor)
boxes = boxes[0]
preds = preds[0]
# Annotate frame
frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
annotated_bgr = model.annotate_frame(
frame_bgr,
boxes,
preds.squeeze(-1),
frame_color="#551bb3",
font_color="#ffffff",
fontsize=10,
frame_thickness=4,
)
annotated_rgb = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB)
# Prepare JSON results
resultados = {} # dictionary to store boxes indexed by ID
for i in range(boxes.shape[0]):
x1, y1, x2, y2 = boxes[i].int().tolist()
# Skip invalid boxes
if x1 == y1 == x2 == y2 == 0:
continue
pred = preds[i].item()
resultados[str(i)] = {
"coords": {
"x1": x1,
"y1": y1,
"x2": x2,
"y2": y2
},
"h_p": round(pred, 2)
}
# If no objects detected, return empty image dictionary
if not resultados:
resultado_json = json.dumps({"values": {}}, indent=2)
else:
resultado_json = json.dumps({"values": resultados}, indent=2)
return annotated_rgb, resultado_json
def update_mode(mode):
if mode == "Image":
return (
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=True),
)
else:
return (
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=False),
)
with gr.Blocks() as demo:
gr.Markdown("# VisionGauge Demo")
mode_selector = gr.Radio(
["Image", "Live Capture"],
value="Image",
label="Select Input Mode"
)
input_img = gr.Image(
sources=["upload"],
type="numpy",
visible=True, webcam_options=gr.WebcamOptions(mirror=False)
)
webcam_img = gr.Image(
sources=["webcam"],
type="numpy",
streaming=True,
visible=False, webcam_options=gr.WebcamOptions(mirror=False),
)
output_img = gr.Image(label="Result")
output_txt = gr.Textbox(label="Predictions", show_label=True, buttons=["copy"])
btn = gr.Button("Run model", visible=True)
# Update interface when changing mode
mode_selector.change(
update_mode,
inputs=mode_selector,
outputs=[input_img, webcam_img, btn]
)
# IMAGE mode (button)
btn.click(
VisionGauge_Inference,
inputs=input_img,
outputs=[output_img, output_txt]
)
# LIVE mode (automatic stream)
webcam_img.stream(
VisionGauge_Inference,
inputs=webcam_img,
outputs=[output_img, output_txt],
)
demo.launch(share=True)