| // Auto-Generated Mamba Kernel | |
| // Tile: 512, Warps: 8 (Threads: 256), Items: 2, Registers: Max | |
| // Instantiation for Tile 512 | |
| template void selective_scan_fwd_cuda<at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); | |
| // Auto-Generated Mamba Kernel | |
| // Tile: 512, Warps: 8 (Threads: 256), Items: 2, Registers: Max | |
| // Instantiation for Tile 512 | |
| template void selective_scan_fwd_cuda<at::Half, float>(SSMParamsBase ¶ms, cudaStream_t stream); | |