File size: 1,570 Bytes
b5bff9c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | /**
* FireEcho Kernel - SM120 Cluster Launch Implementation
*
* Compile with:
* nvcc -shared -o libfireecho_cluster.so cluster_launch.cpp \
* -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcuda -lcudart \
* --compiler-options '-fPIC' -arch=sm_120
*/
#include "cluster_launch.h"
#include <stdio.h>
namespace fireecho {
// Implementation of helper functions that need compilation
void print_cluster_info() {
if (!supports_clusters()) {
printf("Thread Block Clusters: NOT SUPPORTED\n");
return;
}
ClusterProperties props = get_cluster_properties();
printf("=== SM120 Thread Block Cluster Info ===\n");
printf("Max Cluster Size: %d\n", props.max_cluster_size);
printf("Max Blocks/SM: %d\n", props.max_blocks_per_sm);
printf("Shared Memory/Block: %d KB\n", props.shared_memory_per_block / 1024);
printf("Registers/Block: %d\n", props.registers_per_block);
printf("Distributed SMEM: %s\n", props.supports_dshem ? "YES" : "NO");
printf("========================================\n");
}
} // namespace fireecho
// Standalone test
#ifdef TEST_CLUSTER_LAUNCH
int main() {
// Initialize CUDA
cudaSetDevice(0);
fireecho::print_cluster_info();
if (fireecho::supports_clusters()) {
printf("\n✅ This GPU supports Thread Block Clusters!\n");
printf(" Max cluster size: %d CTAs\n", fireecho::get_max_cluster_size());
} else {
printf("\n❌ This GPU does NOT support Thread Block Clusters.\n");
}
return 0;
}
#endif
|