from ultralytics import YOLO
import gradio as gr

# Pre-load all the models at the start to reduce inference time
yolo = {
    "nano": YOLO("yolo11n-pose.pt"),
    "small": YOLO("yolo11s-pose.pt"),
    "medium": YOLO("yolo11m-pose.pt"),
    "large": YOLO("yolo11l-pose.pt"),
    "extra": YOLO("yolo11x-pose.pt"),
}

def inference(frame, model, height, confidence, overlap):
    results = yolo[model].predict(
        source=frame,
        imgsz=(height, height*2), # 2:1 resizing
        conf=confidence,
        iou=overlap,
        half=False, # True is supposed to speed up inference, but does the opposite for some reason.
    )
    # Draw the new frame here (only pose markers + thicc lines)
    output = results[0].plot(
        labels=False, boxes=False, conf=False,
        line_width=10, kpt_radius=10
    )
    return output

with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        """
        # ME 2: Real-Time Human Body Keypoint Tracking
        Live demo of YOLO11's pose estimation model.
        """
    )
    with gr.Row():
        with gr.Column(scale=6):
            frame = gr.Image(
                label="Camera",
                type="numpy", sources="webcam", streaming=True,
            )
        with gr.Column(scale=4):
            model = gr.Radio(
                label="Model Size",
                choices=["nano", "small", "medium", "large", "extra"],
                value="nano",
            )
            height = gr.Slider(
                label="Inference Resolution",
                minimum=32.0, maximum=640.0, step=32.0,
                value=320,
            )
            confidence = gr.Slider(
                label="Confidence Threshold",
                minimum=0.0, maximum=1.0, step=0.05,
                value=0.25,
                
            )
            overlap = gr.Slider(
                label="Overlap Threshold",
                minimum=0.0, maximum=1.0, step=0.05,
                value=0.45,
            )
    frame.stream(
        inference,
        inputs=[frame, model, height, confidence, overlap],
        outputs=frame,
        stream_every=0.1, # FPS is capped at 10, and higher polling rates can cause lag in stream
    )
demo.launch()