{"ts": "2025-12-19T23:02:21Z", "run": "bd3674eb0704484693460041fd14f59b", "impl": "hf_kernels_deformable_detr", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_Q100_H8_E256_L4_P4", "batch_size": 1, "num_queries": 100, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.03611099987210764, "p50": 0.037491000057343626, "p90": 0.038670999856549315, "mean": 0.03807699995377334, "iqr": 0.0014299998838396277, "raw_times": [0.04087100001015642, 0.038670999856549315, 0.03724099997270969, 0.037491000057343626, 0.03611099987210764], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04587100011121947, "peak_bytes": 2264064, "ok": true, "absmax": 7.152557373046875e-07, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 7.152557373046875e-07, "mae": 5.539113701047427e-08, "mse": 6.418638644407112e-15, "ref": "deformable_detr_torch"}, "err": null} {"ts": "2025-12-19T23:02:21Z", "run": "bd3674eb0704484693460041fd14f59b", "impl": "hf_kernels_deformable_detr", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_Q300_H8_E256_L4_P4", "batch_size": 1, "num_queries": 300, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0427610000315326, "p50": 0.04391099992062664, "p90": 0.04453099995771481, "mean": 0.043983000023217755, "iqr": 0.0007099997674231417, "raw_times": [0.0427610000315326, 0.04453099995771481, 0.044891000015923055, 0.04382100019029167, 0.04391099992062664], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04723100005321612, "peak_bytes": 4004864, "ok": true, "absmax": 7.152557373046875e-07, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 7.152557373046875e-07, "mae": 5.559346050176828e-08, "mse": 6.4289483059246175e-15, "ref": "deformable_detr_torch"}, "err": null} {"ts": "2025-12-19T23:02:21Z", "run": "bd3674eb0704484693460041fd14f59b", "impl": "hf_kernels_deformable_detr", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_Q100_H8_E256_L4_P4", "batch_size": 2, "num_queries": 100, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04207100005260145, "p50": 0.04387099988889531, "p90": 0.044481000031737494, "mean": 0.04371499999251682, "iqr": 0.0019200001588615123, "raw_times": [0.04387099988889531, 0.044481000031737494, 0.04559100011647388, 0.04256099987287598, 0.04207100005260145], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04643099987333699, "peak_bytes": 5459968, "ok": true, "absmax": 7.152557373046875e-07, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 7.152557373046875e-07, "mae": 5.555110149657594e-08, "mse": 6.418781369458724e-15, "ref": "deformable_detr_torch"}, "err": null} {"ts": "2025-12-19T23:02:21Z", "run": "bd3674eb0704484693460041fd14f59b", "impl": "hf_kernels_deformable_detr", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_Q300_H8_E256_L4_P4", "batch_size": 2, "num_queries": 300, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.58-82.121.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04534100003183994, "p50": 0.04615100010596507, "p90": 0.04615100010596507, "mean": 0.045852800030843355, "iqr": 0.0007410001217067474, "raw_times": [0.04615100010596507, 0.04540999998425832, 0.04615100010596507, 0.04534100003183994, 0.04621099992618838], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04675100012718758, "peak_bytes": 8008704, "ok": true, "absmax": 7.152557373046875e-07, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 7.152557373046875e-07, "mae": 5.5905669427147586e-08, "mse": 6.485184940875199e-15, "ref": "deformable_detr_torch"}, "err": null}