Spaces:

kernels-community
/

kernels-benchmarks

Running

drbh HF Staff

Upload folder using huggingface_hub

d87c146 verified 1 day ago

877 Bytes

	# /// script
	# requires-python = ">=3.10"
	# dependencies = [
	# "numpy",
	# "torch==2.8.0",
	# "kernels",
	# "kernels-benchmark-tools",
	# ]
	#
	# [tool.uv.sources]
	# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
	# ///
	import torch
	import sys
	from kernels_benchmark_tools import KernelTypeEnum, run_benchmark
	from kernels import get_kernel

	# Load the sage attention kernel
	hf_kernels_sage_attn = get_kernel("kernels-community/sage_attention")


	def sage_attention(query, key, value):
	"""SageAttention with INT8 Q/K quantization and FP16 P/V"""
	return hf_kernels_sage_attn.fwd(query, key, value, is_causal=False)[0]


	run_benchmark(
	kernel_type=KernelTypeEnum.ATTENTION,
	impl_name="sage_int8_fp16",
	impl_tags={"family": "sageattention", "backend": "int8_fp16_cuda", "compile": "none"},
	impl_func=sage_attention,
	)