FireEcho Engine/csrc/cluster_launch.cpp · Joysulem/FireEcho at main

Upload 3258 files

b5bff9c verified 16 days ago

1.57 kB

	/**
	* FireEcho Kernel - SM120 Cluster Launch Implementation
	*
	* Compile with:
	* nvcc -shared -o libfireecho_cluster.so cluster_launch.cpp \
	* -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcuda -lcudart \
	* --compiler-options '-fPIC' -arch=sm_120
	*/

	#include "cluster_launch.h"
	#include <stdio.h>

	namespace fireecho {

	// Implementation of helper functions that need compilation

	void print_cluster_info() {
	if (!supports_clusters()) {
	printf("Thread Block Clusters: NOT SUPPORTED\n");
	return;
	}

	ClusterProperties props = get_cluster_properties();

	printf("=== SM120 Thread Block Cluster Info ===\n");
	printf("Max Cluster Size: %d\n", props.max_cluster_size);
	printf("Max Blocks/SM: %d\n", props.max_blocks_per_sm);
	printf("Shared Memory/Block: %d KB\n", props.shared_memory_per_block / 1024);
	printf("Registers/Block: %d\n", props.registers_per_block);
	printf("Distributed SMEM: %s\n", props.supports_dshem ? "YES" : "NO");
	printf("========================================\n");
	}

	} // namespace fireecho

	// Standalone test
	#ifdef TEST_CLUSTER_LAUNCH
	int main() {
	// Initialize CUDA
	cudaSetDevice(0);

	fireecho::print_cluster_info();

	if (fireecho::supports_clusters()) {
	printf("\n✅ This GPU supports Thread Block Clusters!\n");
	printf(" Max cluster size: %d CTAs\n", fireecho::get_max_cluster_size());
	} else {
	printf("\n❌ This GPU does NOT support Thread Block Clusters.\n");
	}

	return 0;
	}
	#endif