| // Auto-Generated Mamba Kernel | |
| // Tile: 1024, Warps: 32 (Threads: 1024), Items: 1, Registers: Max | |
| // Instantiation for Tile 1024 | |
| template void selective_scan_fwd_cuda<at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); | |
| // Auto-Generated Mamba Kernel | |
| // Tile: 1024, Warps: 32 (Threads: 1024), Items: 1, Registers: Max | |
| // Instantiation for Tile 1024 | |
| template void selective_scan_fwd_cuda<at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); | |