""" Test for hyperrealistic multi-chip GPU system with full SM and tensor core realism, using WebSocket-based storage for zero CPU usage. """ import time import numpy as np from gpu_arch import Chip, OpticalInterconnect def test_multi_chip_gpu(): print("\n=== Multi-Chip GPU System with WebSocket Storage Test ===") num_chips = 2 # Use 2 for realism, scale up as needed num_sms = 4 # Use 4 for realism, scale up as needed # Initialize WebSocket storage for all chips from websocket_storage import WebSocketGPUStorage storage = WebSocketGPUStorage() if not storage.wait_for_connection(): raise RuntimeError("Could not connect to GPU storage server") chips = [Chip( chip_id=i, num_sms=num_sms, vram_size_gb=None # Use unlimited WebSocket storage ) for i in range(num_chips)] print(f"Created {num_chips} chips with unlimited WebSocket storage, each with {num_sms} SMs.") # Connect chips in a ring topology with optical interconnect optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1) for i in range(num_chips): chips[i].connect_chip(chips[(i+1)%num_chips], optical_link) # Initialize shared WebSocket storage for cross-chip communication for chip in chips: chip_state = { "chip_id": chip.chip_id, "num_sms": num_sms, "connected_chips": [(c.chip_id, "optical") for c in chip.connected_chips] } storage.store_state(f"chips/{chip.chip_id}", "config", chip_state) # Run tensor core operations with WebSocket-backed storage print("\n=== Testing WebSocket-backed Multi-Chip Operations ===") # Create test matrices matrix_a = [[1.0, 2.0], [3.0, 4.0]] matrix_b = [[5.0, 6.0], [7.0, 8.0]] for chip in chips: print(f"\n--- Chip {chip.chip_id} ---") # Store matrices in WebSocket storage for this chip storage.store_tensor(f"chip_{chip.chip_id}/matrix_a", np.array(matrix_a)) storage.store_tensor(f"chip_{chip.chip_id}/matrix_b", np.array(matrix_b)) # Process using each SM for sm_id in range(num_sms): sm = chip.get_sm(sm_id) # Load matrices from WebSocket storage matrix_a_data = storage.load_tensor(f"chip_{chip.chip_id}/matrix_a") matrix_b_data = storage.load_tensor(f"chip_{chip.chip_id}/matrix_b") # Perform tensor core operation result = sm.tensor_core_matmul(matrix_a_data.tolist(), matrix_b_data.tolist()) # Store result back in WebSocket storage storage.store_tensor(f"chip_{chip.chip_id}/sm_{sm_id}/result", np.array(result)) print(f"SM {sm_id} tensor core matmul result: {result}") # Test cross-chip communication if len(chip.connected_chips) > 0: next_chip, link = chip.connected_chips[0] # Get result from this chip result_data = storage.load_tensor(f"chip_{chip.chip_id}/sm_0/result") # Transfer to next chip through optical link transfer_id = f"transfer_chip_{chip.chip_id}_to_{next_chip.chip_id}" storage.store_tensor(transfer_id, result_data) print(f"Transferred result from Chip {chip.chip_id} to Chip {next_chip.chip_id} via {link.__class__.__name__}") for i in range(len(sm.register_file)): for j in range(len(sm.register_file[0])): sm.register_file[i][j] = float(i + j) for addr in range(sm.shared_mem.size): sm.shared_mem.write(addr, float(addr % 10)) for addr in range(sm.global_mem.size_bytes if sm.global_mem else 0): sm.global_mem.write(addr, float(addr % 100)) # Test tensor core matmul from registers reg_result = sm.tensor_core_matmul_from_memory('register', 0, 'register', 0, (2,2), (2,2)) print(f"SM {sm.sm_id} tensor core matmul from registers: {reg_result}") # Test tensor core matmul from shared memory shared_result = sm.tensor_core_matmul_from_memory('shared', 0, 'shared', 0, (2,2), (2,2)) print(f"SM {sm.sm_id} tensor core matmul from shared memory: {shared_result}") # Test tensor core matmul from global memory global_result = sm.tensor_core_matmul_from_memory('global', 0, 'global', 0, (2,2), (2,2)) print(f"SM {sm.sm_id} tensor core matmul from global memory: {global_result}") print("\n=== Multi-Chip GPU System Test Complete ===") if __name__ == "__main__": start = time.time() test_multi_chip_gpu() print(f"Test runtime: {time.time()-start:.3f} seconds")