| // Auto-Generated Mamba Kernel | |
| // Tile: 256, Warps: 4 (Threads: 128), Items: 2, Registers: 128 | |
| // Instantiation for Tile 256 | |
| template void selective_scan_fwd_cuda<at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); | |
| // Auto-Generated Mamba Kernel | |
| // Tile: 256, Warps: 4 (Threads: 128), Items: 2, Registers: 128 | |
| // Instantiation for Tile 256 | |
| template void selective_scan_fwd_cuda<at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); | |