import gradio as gr
import spaces
import supervision as sv
import torch
from transformers import (
    AutoImageProcessor,
    RfDetrForInstanceSegmentation,
    RfDetrForObjectDetection,
)


def _is_segmentation_model(model_basename: str) -> bool:
    return "seg" in model_basename


@spaces.GPU
def infer(model_name, image, confidence_threshold):
    # Dynamically scale text and boxes based on image size
    width, height = image.size
    text_scale = (width / 1000) * 0.5
    text_thickness = max(1, int(round(width / 500)))

    label_annotator = sv.LabelAnnotator(
        text_padding=4,
        text_scale=text_scale,
        text_thickness=text_thickness,
        smart_position=True,
    )
    box_annotator = sv.BoxAnnotator()
    mask_annotator = sv.MaskAnnotator(
        color_lookup=sv.ColorLookup.CLASS,
        opacity=0.5,
    )

    hub_model_id = f"stevenbucaille/{model_name}"
    segmentation = _is_segmentation_model(model_name)
    processor = AutoImageProcessor.from_pretrained(hub_model_id)
    if segmentation:
        model = RfDetrForInstanceSegmentation.from_pretrained(hub_model_id)
    else:
        model = RfDetrForObjectDetection.from_pretrained(hub_model_id)

    inputs = processor(images=image, return_tensors="pt")
    device = next(model.parameters()).device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model(**inputs)

    if segmentation:
        target_sizes = [image.size[::-1]]
        results = processor.post_process_instance_segmentation(
            outputs,
            target_sizes=target_sizes,
            threshold=confidence_threshold,
        )[0]
    else:
        target_sizes = torch.tensor([image.size[::-1]])
        results = processor.post_process_object_detection(
            outputs, target_sizes=target_sizes, threshold=confidence_threshold
        )[0]

    detections = sv.Detections.from_transformers(
        transformers_results=results, id2label=model.config.id2label
    )
    labels = [
        f"{class_name} {confidence:.2f}"
        for class_name, confidence in zip(
            detections["class_name"], detections.confidence
        )
    ]
    if segmentation:
        image = mask_annotator.annotate(image, detections)
        image = label_annotator.annotate(image, detections, labels)
    else:
        image = box_annotator.annotate(image, detections)
        image = label_annotator.annotate(image, detections, labels)
    return image


with gr.Blocks() as demo:
    gr.Markdown("# RF-DETR Object Detection")
    gr.Markdown(
        "RF-DETR is a transformer-based object detection model that is trained on the Objects365 and COCO datasets."
    )
    gr.Markdown(
        "This space is a demo of the RF-DETR model. You can select a model and an image and see the results."
    )
    with gr.Row():
        with gr.Column():
            model = gr.Radio(
                [
                    "rf-detr-base",
                    "rf-detr-base-2",
                    "rf-detr-large",
                    "rf-detr-medium",
                    "rf-detr-nano",
                    "rf-detr-seg-large",
                    "rf-detr-seg-medium",
                    "rf-detr-seg-nano",
                    "rf-detr-seg-preview",
                    "rf-detr-seg-small",
                    "rf-detr-seg-xlarge",
                    "rf-detr-seg-xxlarge",
                    "rf-detr-segmentation",
                    "rf-detr-small",
                ],
                value="rf-detr-base",
                label="Model",
            )
            confidence_threshold = gr.Slider(
                minimum=0.0,
                maximum=1.0,
                value=0.3,
                step=0.1,
                label="Confidence Threshold",
            )
            input_image = gr.Image(label="Input Image", type="pil")
            send_btn = gr.Button("Infer", variant="primary")
        with gr.Column():
            output_image = gr.Image(label="Output Image", type="pil")

    gr.Examples(
        examples=[
            "samples/cats.jpg",
            "samples/detectron2.png",
            "samples/cat.jpg",
            "samples/hotdog.jpg",
        ],
        inputs=input_image,
    )
    send_btn.click(
        fn=infer,
        inputs=[model, input_image, confidence_threshold],
        outputs=[output_image],
    )


demo.launch(debug=True)