from ultralytics import YOLO import gradio as gr # Pre-load all the models at the start to reduce inference time yolo = { "nano": YOLO("yolo11n-pose.pt"), "small": YOLO("yolo11s-pose.pt"), "medium": YOLO("yolo11m-pose.pt"), "large": YOLO("yolo11l-pose.pt"), "extra": YOLO("yolo11x-pose.pt"), } def inference(frame, model, height, confidence, overlap): results = yolo[model].predict( source=frame, imgsz=(height, height*2), # 2:1 resizing conf=confidence, iou=overlap, half=False, # True is supposed to speed up inference, but does the opposite for some reason. ) # Draw the new frame here (only pose markers + thicc lines) output = results[0].plot( labels=False, boxes=False, conf=False, line_width=10, kpt_radius=10 ) return output with gr.Blocks(fill_height=True) as demo: gr.Markdown( """ # ME 2: Real-Time Human Body Keypoint Tracking Live demo of YOLO11's pose estimation model. """ ) with gr.Row(): with gr.Column(scale=6): frame = gr.Image( label="Camera", type="numpy", sources="webcam", streaming=True, ) with gr.Column(scale=4): model = gr.Radio( label="Model Size", choices=["nano", "small", "medium", "large", "extra"], value="nano", ) height = gr.Slider( label="Inference Resolution", minimum=32.0, maximum=640.0, step=32.0, value=320, ) confidence = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.25, ) overlap = gr.Slider( label="Overlap Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.45, ) frame.stream( inference, inputs=[frame, model, height, confidence, overlap], outputs=frame, stream_every=0.1, # FPS is capped at 10, and higher polling rates can cause lag in stream ) demo.launch()