FireEcho / FireEcho Engine /csrc /cluster_launch.cpp
Joysulem's picture
Upload 3258 files
b5bff9c verified
/**
* FireEcho Kernel - SM120 Cluster Launch Implementation
*
* Compile with:
* nvcc -shared -o libfireecho_cluster.so cluster_launch.cpp \
* -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcuda -lcudart \
* --compiler-options '-fPIC' -arch=sm_120
*/
#include "cluster_launch.h"
#include <stdio.h>
namespace fireecho {
// Implementation of helper functions that need compilation
void print_cluster_info() {
if (!supports_clusters()) {
printf("Thread Block Clusters: NOT SUPPORTED\n");
return;
}
ClusterProperties props = get_cluster_properties();
printf("=== SM120 Thread Block Cluster Info ===\n");
printf("Max Cluster Size: %d\n", props.max_cluster_size);
printf("Max Blocks/SM: %d\n", props.max_blocks_per_sm);
printf("Shared Memory/Block: %d KB\n", props.shared_memory_per_block / 1024);
printf("Registers/Block: %d\n", props.registers_per_block);
printf("Distributed SMEM: %s\n", props.supports_dshem ? "YES" : "NO");
printf("========================================\n");
}
} // namespace fireecho
// Standalone test
#ifdef TEST_CLUSTER_LAUNCH
int main() {
// Initialize CUDA
cudaSetDevice(0);
fireecho::print_cluster_info();
if (fireecho::supports_clusters()) {
printf("\n✅ This GPU supports Thread Block Clusters!\n");
printf(" Max cluster size: %d CTAs\n", fireecho::get_max_cluster_size());
} else {
printf("\n❌ This GPU does NOT support Thread Block Clusters.\n");
}
return 0;
}
#endif