Factor Studios commited on
Commit
b2a7b82
·
verified ·
1 Parent(s): 3abf206

Upload 2 files

Browse files
Files changed (2) hide show
  1. ai.py +8 -2
  2. test_ai_integration.py +40 -21
ai.py CHANGED
@@ -138,9 +138,13 @@ class AIAccelerator:
138
  raise RuntimeError("VRAM not properly configured")
139
 
140
  # Test tensor core functionality with a small computation
141
- test_input = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
 
 
 
 
142
  test_result = self.tensor_core_array.matmul(test_input, test_input)
143
- if test_result is None or test_result.size == 0:
144
  raise RuntimeError("Tensor core test computation failed")
145
 
146
  self.tensor_cores_initialized = True
@@ -698,3 +702,5 @@ class AIAccelerator:
698
  except Exception as e:
699
  print(f"[ERROR] WebSocket-based inference failed for idx={idx}: {e}")
700
  return None
 
 
 
138
  raise RuntimeError("VRAM not properly configured")
139
 
140
  # Test tensor core functionality with a small computation
141
+ test_input = [[1.0, 2.0], [3.0, 4.0]]
142
+ # Convert input to numpy array if needed
143
+ if isinstance(test_input, list):
144
+ test_input = np.array(test_input, dtype=np.float32)
145
+
146
  test_result = self.tensor_core_array.matmul(test_input, test_input)
147
+ if test_result is None or not isinstance(test_result, (np.ndarray, list)) or len(test_result) == 0:
148
  raise RuntimeError("Tensor core test computation failed")
149
 
150
  self.tensor_cores_initialized = True
 
702
  except Exception as e:
703
  print(f"[ERROR] WebSocket-based inference failed for idx={idx}: {e}")
704
  return None
705
+
706
+
test_ai_integration.py CHANGED
@@ -207,40 +207,59 @@ def test_ai_integration():
207
  ai_accelerators = []
208
 
209
  try:
210
- # Use WebSocket connection manager for all chips
211
- with websocket_manager() as shared_storage:
212
- # Initialize high-performance chip array with WebSocket storage
213
- total_sms = 0
214
- total_cores = 0
215
-
216
- # Create optical interconnect for chip communication
217
- from gpu_arch import OpticalInterconnect
218
- optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1)
219
-
220
- # Create shared VRAM instance for all chips
 
 
 
 
 
 
 
 
 
 
221
  shared_vram = VirtualVRAM()
222
- shared_vram.storage = shared_storage
223
 
224
  for i in range(num_chips):
225
- # Configure each chip with unlimited WebSocket storage
226
- chip = Chip(chip_id=i, vram_size_gb=None) # Unlimited WebSocket storage
227
  chips.append(chip)
228
 
229
  # Connect chips in a ring topology
230
  if i > 0:
231
  chip.connect_chip(chips[i-1], optical_link)
232
 
233
- # Initialize AI accelerator with shared WebSocket storage
234
  ai_accelerator = chip.ai_accelerator
235
- ai_accelerator.vram = shared_vram # Use shared VRAM instance
 
236
  ai_accelerators.append(ai_accelerator)
237
 
 
 
 
 
238
  # Load model weights from WebSocket storage (no CPU transfer)
239
- ai_accelerator.load_model(model_id, None, None) # Model already in WebSocket storage
240
-
241
- # Track total processing units
242
- total_sms += chip.num_sms
243
- total_cores += chip.num_sms * chip.cores_per_sm
 
 
 
 
244
 
245
  # Store chip configuration in WebSocket storage
246
  storage.store_state(f"chips/{i}/config", "state", {
 
207
  ai_accelerators = []
208
 
209
  try:
210
+ # Reuse the existing storage connection from the previous test
211
+ if not components['storage'] or not components['storage'].wait_for_connection():
212
+ # If connection lost, try to reconnect
213
+ with websocket_manager() as shared_storage:
214
+ if not shared_storage or not shared_storage.wait_for_connection():
215
+ raise RuntimeError("Could not establish WebSocket connection")
216
+ components['storage'] = shared_storage
217
+
218
+ shared_storage = components['storage']
219
+
220
+ # Initialize high-performance chip array with WebSocket storage
221
+ total_sms = 0
222
+ total_cores = 0
223
+
224
+ # Create optical interconnect for chip communication
225
+ from gpu_arch import OpticalInterconnect
226
+ optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1)
227
+
228
+ # Reuse existing VRAM instance with shared storage
229
+ shared_vram = components['vram']
230
+ if shared_vram is None:
231
  shared_vram = VirtualVRAM()
232
+ shared_vram.storage = shared_storage
233
 
234
  for i in range(num_chips):
235
+ # Configure each chip with shared WebSocket storage
236
+ chip = Chip(chip_id=i, vram_size_gb=None, storage=shared_storage)
237
  chips.append(chip)
238
 
239
  # Connect chips in a ring topology
240
  if i > 0:
241
  chip.connect_chip(chips[i-1], optical_link)
242
 
243
+ # Initialize AI accelerator with shared resources
244
  ai_accelerator = chip.ai_accelerator
245
+ ai_accelerator.vram = shared_vram
246
+ ai_accelerator.storage = shared_storage # Ensure storage is set
247
  ai_accelerators.append(ai_accelerator)
248
 
249
+ # Verify WebSocket connection before loading model
250
+ if not shared_storage.wait_for_connection():
251
+ raise RuntimeError(f"Lost WebSocket connection during chip {i} initialization")
252
+
253
  # Load model weights from WebSocket storage (no CPU transfer)
254
+ try:
255
+ ai_accelerator.load_model(model_id, None, None) # Model already in WebSocket storage
256
+ except Exception as e:
257
+ print(f"Warning: Failed to load model on chip {i}: {e}")
258
+ continue
259
+
260
+ # Track total processing units
261
+ total_sms += chip.num_sms
262
+ total_cores += chip.num_sms * chip.cores_per_sm
263
 
264
  # Store chip configuration in WebSocket storage
265
  storage.store_state(f"chips/{i}/config", "state", {