import gradio as gr import torch import numpy as np import cv2 import json from VisionGauge.models import VisionGauge model = VisionGauge() def VisionGauge_Inference(imagem): if imagem is None: return None, "No image received." frame_rgb = imagem.copy() # Resize target_width = 640 h, w = frame_rgb.shape[:2] scale = target_width / w new_h = int(h * scale) frame_rgb = cv2.resize(frame_rgb, (target_width, new_h)) # Convert to tensor img_tensor = ( torch.from_numpy(frame_rgb) .permute(2, 0, 1) .float() .unsqueeze(0) ) # Model inference boxes, preds = model.predict(img_tensor) boxes = boxes[0] preds = preds[0] # Annotate frame frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR) annotated_bgr = model.annotate_frame( frame_bgr, boxes, preds.squeeze(-1), frame_color="#551bb3", font_color="#ffffff", fontsize=10, frame_thickness=4, ) annotated_rgb = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB) # Prepare JSON results resultados = {} # dictionary to store boxes indexed by ID for i in range(boxes.shape[0]): x1, y1, x2, y2 = boxes[i].int().tolist() # Skip invalid boxes if x1 == y1 == x2 == y2 == 0: continue pred = preds[i].item() resultados[str(i)] = { "coords": { "x1": x1, "y1": y1, "x2": x2, "y2": y2 }, "h_p": round(pred, 2) } # If no objects detected, return empty image dictionary if not resultados: resultado_json = json.dumps({"values": {}}, indent=2) else: resultado_json = json.dumps({"values": resultados}, indent=2) return annotated_rgb, resultado_json def update_mode(mode): if mode == "Image": return ( gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), ) else: return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), ) with gr.Blocks() as demo: gr.Markdown("# VisionGauge Demo") mode_selector = gr.Radio( ["Image", "Live Capture"], value="Image", label="Select Input Mode" ) input_img = gr.Image( sources=["upload"], type="numpy", visible=True, webcam_options=gr.WebcamOptions(mirror=False) ) webcam_img = gr.Image( sources=["webcam"], type="numpy", streaming=True, visible=False, webcam_options=gr.WebcamOptions(mirror=False), ) output_img = gr.Image(label="Result") output_txt = gr.Textbox(label="Predictions", show_label=True, buttons=["copy"]) btn = gr.Button("Run model", visible=True) # Update interface when changing mode mode_selector.change( update_mode, inputs=mode_selector, outputs=[input_img, webcam_img, btn] ) # IMAGE mode (button) btn.click( VisionGauge_Inference, inputs=input_img, outputs=[output_img, output_txt] ) # LIVE mode (automatic stream) webcam_img.stream( VisionGauge_Inference, inputs=webcam_img, outputs=[output_img, output_txt], ) demo.launch(share=True)