# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "numpy",
#     "torch==2.8.0",
#     "kernels-benchmark-tools",
#     "kernels",
# ]
#
# [tool.uv.sources]
# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
# ///
import torch
import sys
from kernels_benchmark_tools import KernelTypeEnum, run_benchmark
from kernels import get_kernel

# Load the deformable DETR kernel
deformable_detr = get_kernel("kernels-community/deformable-detr")


def hf_kernels_deformable_detr(
    value, spatial_shapes, level_start_index, sampling_locations, attention_weights, im2col_step=64
):
    """HuggingFace Kernels Deformable DETR Multi-Scale Deformable Attention"""
    return deformable_detr.ms_deform_attn_forward(
        value=value,
        spatial_shapes=spatial_shapes,
        level_start_index=level_start_index,
        sampling_loc=sampling_locations,
        attn_weight=attention_weights,
        im2col_step=im2col_step
    )


run_benchmark(
    kernel_type=KernelTypeEnum.DEFORMABLE_DETR,
    impl_name="hf_kernels_deformable_detr",
    impl_tags={"family": "hf-kernels", "backend": "cuda"},
    impl_func=hf_kernels_deformable_detr,
    dtype="float32",
)