File size: 2,132 Bytes
61ba51e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | {
"sglang": {
"llama": {
"gemm|nvjet": "gemm",
"fused_moe_kernel|GroupProblemShape|group_gemm_starts|bmm_|GemmUniversal": "moe_gemm",
"moe|sigmoid": "moe",
"CatArrayBatched|prepare_inputs": "prepare_next",
"ncclDevKernel|cross_device_reduce": "nccl_and_custom_ar",
"_norm_|Norm": "norm",
"topk": "topk",
"act_and_mul_": "activation",
"Rotary": "rope",
"SoftMax": "softmax",
"flash|fmha": "attn",
"elementwise": "elementwise",
"fp8_quant|cvt_|quantize": "quantize",
"reduce_kernel": "reduce",
"triton": "triton_kernel",
"CUDA mem": "non-gpu-H_D_memops",
".*": "misc"
},
"ds": {
"block_fp8_matmul": "block_fp8_gemm",
"gemm|matmul|nvjet": "gemm",
"fused_moe_kernel": "moe_gemm",
"moe|expert|sigmoid": "moe",
"CatArrayBatched|write_req_to": "prepare_next",
"ncclDevKernel|cross_device_reduce|all_gather": "nccl_and_custom_ar",
"Norm": "norm",
"topk": "topk",
"activation|act_and_mul": "activation",
"compute_position_kernel": "rope",
"elementwise": "elementwise",
"fp8_quant|quant_fp8|quantize": "quantize",
"SoftMax": "softmax",
"reduce": "reduce",
"_fwd_|create_flash|::mla::|KVCache": "attn",
"CUDA mem": "non-gpu-H_D_memops",
".*": "misc"
},
"gpt-oss": {
"gemm|nvjet": "gemm",
"fused_moe_kernel|_group_gemm|GroupProblemShape|GemmUniversal|bmm_|matmul_ogs_|_topk_forward|_combined_routing|_sum_bitmatrix_rows|_compute_writeback_idx": "moe_gemm",
"moe|sigmoid": "moe",
"CatArrayBatched|prepare_inputs": "prepare_next",
"_norm_|Norm": "norm",
"ncclDevKernel|cross_device_reduce|allreduce": "nccl_and_custom_ar",
"topk|TopK": "topk",
"act_and_mul_": "activation",
"Rotary": "rope",
"SoftMax": "softmax",
"flash|fmha": "attn",
"elementwise": "elementwise",
"fp8_quant|cvt_|quantize": "quantize",
"reduce_kernel": "reduce",
"triton": "triton_kernel",
"CUDA mem": "non-gpu-H_D_memops",
".*": "misc"
}
}
}
|