| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include "cluster_launch.h" |
| | #include <stdio.h> |
| |
|
| | namespace fireecho { |
| |
|
| | |
| |
|
| | void print_cluster_info() { |
| | if (!supports_clusters()) { |
| | printf("Thread Block Clusters: NOT SUPPORTED\n"); |
| | return; |
| | } |
| | |
| | ClusterProperties props = get_cluster_properties(); |
| | |
| | printf("=== SM120 Thread Block Cluster Info ===\n"); |
| | printf("Max Cluster Size: %d\n", props.max_cluster_size); |
| | printf("Max Blocks/SM: %d\n", props.max_blocks_per_sm); |
| | printf("Shared Memory/Block: %d KB\n", props.shared_memory_per_block / 1024); |
| | printf("Registers/Block: %d\n", props.registers_per_block); |
| | printf("Distributed SMEM: %s\n", props.supports_dshem ? "YES" : "NO"); |
| | printf("========================================\n"); |
| | } |
| |
|
| | } |
| |
|
| | |
| | #ifdef TEST_CLUSTER_LAUNCH |
| | int main() { |
| | |
| | cudaSetDevice(0); |
| | |
| | fireecho::print_cluster_info(); |
| | |
| | if (fireecho::supports_clusters()) { |
| | printf("\n✅ This GPU supports Thread Block Clusters!\n"); |
| | printf(" Max cluster size: %d CTAs\n", fireecho::get_max_cluster_size()); |
| | } else { |
| | printf("\n❌ This GPU does NOT support Thread Block Clusters.\n"); |
| | } |
| | |
| | return 0; |
| | } |
| | #endif |
| |
|