Spaces:
Sleeping
Sleeping
| """ | |
| Test for hyperrealistic multi-chip GPU system with full SM and tensor core realism, | |
| using WebSocket-based storage for zero CPU usage. | |
| """ | |
| import time | |
| import numpy as np | |
| from gpu_arch import Chip, OpticalInterconnect | |
| def test_multi_chip_gpu(): | |
| print("\n=== Multi-Chip GPU System with WebSocket Storage Test ===") | |
| num_chips = 2 # Use 2 for realism, scale up as needed | |
| num_sms = 4 # Use 4 for realism, scale up as needed | |
| # Initialize WebSocket storage for all chips | |
| from websocket_storage import WebSocketGPUStorage | |
| storage = WebSocketGPUStorage() | |
| if not storage.wait_for_connection(): | |
| raise RuntimeError("Could not connect to GPU storage server") | |
| chips = [Chip( | |
| chip_id=i, | |
| num_sms=num_sms, | |
| vram_size_gb=None # Use unlimited WebSocket storage | |
| ) for i in range(num_chips)] | |
| print(f"Created {num_chips} chips with unlimited WebSocket storage, each with {num_sms} SMs.") | |
| # Connect chips in a ring topology with optical interconnect | |
| optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1) | |
| for i in range(num_chips): | |
| chips[i].connect_chip(chips[(i+1)%num_chips], optical_link) | |
| # Initialize shared WebSocket storage for cross-chip communication | |
| for chip in chips: | |
| chip_state = { | |
| "chip_id": chip.chip_id, | |
| "num_sms": num_sms, | |
| "connected_chips": [(c.chip_id, "optical") for c in chip.connected_chips] | |
| } | |
| storage.store_state(f"chips/{chip.chip_id}", "config", chip_state) | |
| # Run tensor core operations with WebSocket-backed storage | |
| print("\n=== Testing WebSocket-backed Multi-Chip Operations ===") | |
| # Create test matrices | |
| matrix_a = [[1.0, 2.0], [3.0, 4.0]] | |
| matrix_b = [[5.0, 6.0], [7.0, 8.0]] | |
| for chip in chips: | |
| print(f"\n--- Chip {chip.chip_id} ---") | |
| # Store matrices in WebSocket storage for this chip | |
| storage.store_tensor(f"chip_{chip.chip_id}/matrix_a", np.array(matrix_a)) | |
| storage.store_tensor(f"chip_{chip.chip_id}/matrix_b", np.array(matrix_b)) | |
| # Process using each SM | |
| for sm_id in range(num_sms): | |
| sm = chip.get_sm(sm_id) | |
| # Load matrices from WebSocket storage | |
| matrix_a_data = storage.load_tensor(f"chip_{chip.chip_id}/matrix_a") | |
| matrix_b_data = storage.load_tensor(f"chip_{chip.chip_id}/matrix_b") | |
| # Perform tensor core operation | |
| result = sm.tensor_core_matmul(matrix_a_data.tolist(), matrix_b_data.tolist()) | |
| # Store result back in WebSocket storage | |
| storage.store_tensor(f"chip_{chip.chip_id}/sm_{sm_id}/result", np.array(result)) | |
| print(f"SM {sm_id} tensor core matmul result: {result}") | |
| # Test cross-chip communication | |
| if len(chip.connected_chips) > 0: | |
| next_chip, link = chip.connected_chips[0] | |
| # Get result from this chip | |
| result_data = storage.load_tensor(f"chip_{chip.chip_id}/sm_0/result") | |
| # Transfer to next chip through optical link | |
| transfer_id = f"transfer_chip_{chip.chip_id}_to_{next_chip.chip_id}" | |
| storage.store_tensor(transfer_id, result_data) | |
| print(f"Transferred result from Chip {chip.chip_id} to Chip {next_chip.chip_id} via {link.__class__.__name__}") | |
| for i in range(len(sm.register_file)): | |
| for j in range(len(sm.register_file[0])): | |
| sm.register_file[i][j] = float(i + j) | |
| for addr in range(sm.shared_mem.size): | |
| sm.shared_mem.write(addr, float(addr % 10)) | |
| for addr in range(sm.global_mem.size_bytes if sm.global_mem else 0): | |
| sm.global_mem.write(addr, float(addr % 100)) | |
| # Test tensor core matmul from registers | |
| reg_result = sm.tensor_core_matmul_from_memory('register', 0, 'register', 0, (2,2), (2,2)) | |
| print(f"SM {sm.sm_id} tensor core matmul from registers: {reg_result}") | |
| # Test tensor core matmul from shared memory | |
| shared_result = sm.tensor_core_matmul_from_memory('shared', 0, 'shared', 0, (2,2), (2,2)) | |
| print(f"SM {sm.sm_id} tensor core matmul from shared memory: {shared_result}") | |
| # Test tensor core matmul from global memory | |
| global_result = sm.tensor_core_matmul_from_memory('global', 0, 'global', 0, (2,2), (2,2)) | |
| print(f"SM {sm.sm_id} tensor core matmul from global memory: {global_result}") | |
| print("\n=== Multi-Chip GPU System Test Complete ===") | |
| if __name__ == "__main__": | |
| start = time.time() | |
| test_multi_chip_gpu() | |
| print(f"Test runtime: {time.time()-start:.3f} seconds") | |