Factor Studios commited on
Commit
c6c924f
·
verified ·
1 Parent(s): 5f61e76

Update test_ai_integration_http.py

Browse files
Files changed (1) hide show
  1. test_ai_integration_http.py +366 -271
test_ai_integration_http.py CHANGED
@@ -1,10 +1,10 @@
1
  """
2
- Test AI integration with HTTP-based storage and zero CPU memory usage.
3
  All operations are performed through HTTP storage with direct tensor core access.
4
  """
5
  import asyncio
6
  from gpu_arch import Chip
7
- from ai_http import AIAccelerator
8
  from virtual_vram import VirtualVRAM
9
  from PIL import Image
10
  import numpy as np
@@ -15,6 +15,7 @@ import platform
15
  import contextlib
16
  import atexit
17
  import logging
 
18
 
19
  # Configure logging
20
  logging.basicConfig(
@@ -22,9 +23,18 @@ logging.basicConfig(
22
  format='%(asctime)s - %(levelname)s - %(message)s'
23
  )
24
 
25
- # HTTP connection manager with retry
 
 
 
 
 
 
 
 
 
26
  @contextlib.contextmanager
27
- def http_storage_manager(max_retries=5, retry_delay=2, timeout=30.0):
28
  storage = None
29
  last_error = None
30
 
@@ -35,25 +45,37 @@ def http_storage_manager(max_retries=5, retry_delay=2, timeout=30.0):
35
  storage.close()
36
  except:
37
  pass
38
- storage = HTTPGPUStorage()
39
- return storage.wait_for_connection(timeout=timeout)
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Initial connection attempts
42
  for attempt in range(max_retries):
43
  try:
44
  if try_connect():
45
- logging.info("Successfully connected to GPU storage server via HTTP")
46
  break
47
  else:
48
- logging.warning(f"HTTP connection attempt {attempt + 1} failed, retrying in {retry_delay}s...")
49
  time.sleep(retry_delay)
50
  except Exception as e:
51
  last_error = str(e)
52
- logging.error(f"HTTP connection attempt {attempt + 1} failed with error: {e}")
53
  time.sleep(retry_delay)
54
 
55
  if attempt == max_retries - 1:
56
- error_msg = f"Could not connect to GPU storage server via HTTP after {max_retries} attempts"
57
  if last_error:
58
  error_msg += f". Last error: {last_error}"
59
  raise RuntimeError(error_msg)
@@ -62,10 +84,10 @@ def http_storage_manager(max_retries=5, retry_delay=2, timeout=30.0):
62
  # Yield the storage connection
63
  yield storage
64
  except Exception as e:
65
- logging.error(f"HTTP operation failed: {e}")
66
  # Try to reconnect once if operation fails
67
  if try_connect():
68
- logging.info("Successfully reconnected to GPU storage server via HTTP")
69
  yield storage
70
  else:
71
  raise
@@ -76,16 +98,31 @@ def http_storage_manager(max_retries=5, retry_delay=2, timeout=30.0):
76
  except:
77
  pass
78
 
79
- # Cleanup handler
80
  def cleanup_resources():
81
  import gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  gc.collect()
83
 
84
  # Register cleanup handler
85
  atexit.register(cleanup_resources)
86
 
87
- def test_ai_integration_http():
88
- print("\n--- Testing HTTP-Based AI Integration with Zero CPU Usage ---")
89
  from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
90
 
91
  # Initialize components dictionary to store GPU resources
@@ -97,7 +134,14 @@ def test_ai_integration_http():
97
  'storage': None,
98
  'model_config': None,
99
  'tensor_registry': {},
100
- 'initialized': False
 
 
 
 
 
 
 
101
  }
102
 
103
  # Initialize global tensor registry
@@ -110,6 +154,9 @@ def test_ai_integration_http():
110
  'active_tensors': 0
111
  }
112
  }
 
 
 
113
 
114
  print(f"\nElectron-Speed Architecture Parameters:")
115
  print(f"Target switches/sec: {TARGET_SWITCHES_PER_SEC:.2e}")
@@ -117,141 +164,195 @@ def test_ai_integration_http():
117
  print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
118
  print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
119
 
120
- # Test 1: HTTP-Based Model Loading
121
- print("\nTest 1: Model Loading with HTTP Storage")
122
  try:
123
  # Use HTTP connection manager for proper resource handling
124
- with http_storage_manager() as storage:
125
  components['storage'] = storage # Save storage reference
126
 
127
- # Initialize virtual GPU stack with unlimited HTTP storage and shared connection
128
- chip_for_loading = Chip(chip_id=0, vram_size_gb=None, storage=storage) # Pass shared storage
129
  components['chips'].append(chip_for_loading)
130
 
131
- # Initialize VRAM with shared HTTP storage
132
- vram = VirtualVRAM(storage=storage) # Pass shared storage instance
133
  components['vram'] = vram
134
 
135
- # Set up AI accelerator with HTTP storage
136
- ai_accelerator_for_loading = AIAccelerator(vram=vram, storage=storage)
137
- ai_accelerator_for_loading.initialize_tensor_cores() # Ensure tensor cores are ready
 
138
  components['ai_accelerators'].append(ai_accelerator_for_loading)
139
 
140
  # Initialize model registry in HTTP storage
141
- storage.store_state("model_registry", "state", {
142
  "initialized": True,
143
- "max_vram": None, # Unlimited
144
  "active_models": {}
145
  })
146
 
147
- # Load BLIP-2 Large model directly to HTTP storage
 
148
  model_id = "microsoft/florence-2-large"
149
- print(f"Loading model {model_id} directly to HTTP storage...")
150
 
151
  try:
152
- # Simulate model loading (in real scenario, would load actual model)
153
- model_data = {
154
- "model_name": model_id,
155
- "model_type": "florence-2-large",
156
- "parameters": 771000000, # Approximate parameter count
157
- "architecture": "vision-language",
158
- "loaded_at": time.time()
159
- }
 
160
 
161
- # Ensure HTTP connection is active before proceeding
162
- if not ai_accelerator_for_loading.storage.wait_for_connection():
163
- raise RuntimeError("HTTP connection lost - please retry")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  # Calculate model size for proper VRAM allocation
166
- model_size = model_data["parameters"] * 4 # 4 bytes per parameter (float32)
167
  print(f"Model size: {model_size / (1024**3):.2f} GB")
168
 
169
- # Load model directly using AIAccelerator's load_model method
170
- success = ai_accelerator_for_loading.load_model(
171
- model_id=model_id,
172
- model=model_data,
173
- processor=None
174
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- if success:
177
- print(f"Model '{model_id}' loaded successfully to HTTP storage.")
178
- assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
179
-
180
- # Store model parameters in components dict
181
- components['model_id'] = model_id
182
- components['model_size'] = model_size
183
- components['model_config'] = model_data
184
- else:
185
- raise RuntimeError("Failed to load model via HTTP storage")
 
 
186
 
187
  except Exception as e:
188
  print(f"Detailed model loading error: {str(e)}")
189
- print("Falling back to placeholder model mode...")
190
- # Try loading with placeholder model
191
  try:
192
- placeholder_model = {
193
- "model_name": model_id,
194
- "model_type": "placeholder",
195
- "parameters": 1000000, # Small placeholder
196
- "architecture": "test",
197
- "loaded_at": time.time()
198
- }
 
 
199
 
200
- success = ai_accelerator_for_loading.load_model(
201
- model_id=model_id,
202
- model=placeholder_model,
203
- processor=None
 
 
 
204
  )
205
 
206
- if success:
207
- components['model_id'] = model_id
208
- components['model_config'] = placeholder_model
209
- print("Successfully loaded placeholder model via HTTP")
210
- else:
211
- raise RuntimeError("Placeholder model loading also failed")
212
-
 
 
 
 
 
 
 
213
  except Exception as e2:
214
- print(f"Placeholder fallback also failed: {str(e2)}")
215
  raise
216
 
217
  except Exception as e:
218
  print(f"Model loading test failed: {e}")
219
  return
220
-
221
- # Test 2: HTTP-Based Multi-Chip Processing
222
  print("\nTest 2: HTTP-Based Parallel Processing across Multiple Chips")
223
  num_chips = 4 # Using multiple chips for maximum parallelization
224
  chips = []
225
  ai_accelerators = []
226
 
227
  try:
228
- # Try to reuse existing connection with verification
229
  shared_storage = None
230
  max_connection_attempts = 3
231
 
232
  for attempt in range(max_connection_attempts):
233
  try:
234
- if (components['storage'] and
235
- components['storage'].wait_for_connection(timeout=10.0)):
236
  shared_storage = components['storage']
237
  logging.info("Successfully reused existing HTTP connection")
238
  break
239
  else:
240
- logging.warning("Existing connection unavailable, creating new HTTP connection...")
241
- with http_storage_manager(timeout=30.0) as new_storage:
242
- if new_storage and new_storage.wait_for_connection(timeout=10.0):
243
- components['storage'] = new_storage
244
- shared_storage = new_storage
245
- logging.info("Successfully established new HTTP connection")
246
- break
247
  except Exception as e:
248
- logging.error(f"HTTP connection attempt {attempt + 1} failed: {e}")
249
  if attempt < max_connection_attempts - 1:
250
  time.sleep(2)
251
  continue
252
  raise RuntimeError(f"Failed to establish HTTP connection after {max_connection_attempts} attempts")
253
 
254
- # Initialize high-performance chip array with HTTP storage
255
  total_sms = 0
256
  total_cores = 0
257
 
@@ -262,54 +363,80 @@ def test_ai_integration_http():
262
  # Reuse existing VRAM instance with shared storage
263
  shared_vram = components['vram']
264
  if shared_vram is None:
265
- shared_vram = VirtualVRAM(storage=shared_storage)
266
  shared_vram.storage = shared_storage
267
 
268
  for i in range(num_chips):
269
  # Configure each chip with shared HTTP storage
270
- chip = Chip(chip_id=i, vram_size_gb=None, storage=shared_storage)
271
  chips.append(chip)
272
 
273
  # Connect chips in a ring topology
274
  if i > 0:
275
  chip.connect_chip(chips[i-1], optical_link)
276
 
277
- # Initialize AI accelerator with shared resources
278
- ai_accelerator = AIAccelerator(vram=shared_vram, storage=shared_storage)
 
 
279
  ai_accelerators.append(ai_accelerator)
280
 
281
- # Verify and potentially repair HTTP connection
282
- max_retry = 3
283
- for retry in range(max_retry):
284
- try:
285
- if not shared_storage.wait_for_connection(timeout=5.0):
286
- logging.warning(f"Connection check failed for chip {i}, attempt {retry + 1}")
287
- shared_storage.reconnect() # Attempt to reconnect
288
- time.sleep(1)
289
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- # Load model weights from HTTP storage (no CPU transfer)
292
- success = ai_accelerator.load_model(components['model_id'], components['model_config'], None)
293
- if success:
294
- logging.info(f"Successfully initialized chip {i} with model via HTTP")
295
- break
296
- else:
297
- raise RuntimeError("Model loading failed")
 
 
 
 
 
298
 
 
 
 
299
  except Exception as e:
300
- if retry < max_retry - 1:
301
- logging.warning(f"Error initializing chip {i}, attempt {retry + 1}: {e}")
302
- time.sleep(1)
303
- continue
304
- else:
305
- logging.error(f"Failed to initialize chip {i} after {max_retry} attempts: {e}")
306
- raise
307
-
308
  # Track total processing units
309
  total_sms += chip.num_sms
310
  total_cores += chip.num_sms * chip.cores_per_sm
311
 
312
- # Store chip configuration in HTTP storage
313
  shared_storage.store_state(f"chips/{i}/config", "state", {
314
  "num_sms": chip.num_sms,
315
  "cores_per_sm": chip.cores_per_sm,
@@ -317,172 +444,140 @@ def test_ai_integration_http():
317
  "connected_chips": [c.chip_id for c in chip.connected_chips]
318
  })
319
 
320
- print(f"Chip {i} initialized with HTTP storage and optical interconnect")
 
 
 
 
 
 
 
 
321
 
322
  print(f"\nTotal Processing Units:")
323
  print(f"- Streaming Multiprocessors: {total_sms:,}")
324
  print(f"- CUDA Cores: {total_cores:,}")
325
  print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
326
 
327
- # Test multi-chip parallel inference with HTTP storage
328
- print(f"\nRunning HTTP-based inference simulation")
329
-
330
- # Create test input data
331
- test_image = np.random.rand(224, 224, 3).astype(np.float32)
332
- print(f"Created test image with shape: {test_image.shape}")
333
-
334
- # Store input image in HTTP storage
335
- input_tensor_id = "test_input_image"
336
- if shared_storage.store_tensor(input_tensor_id, test_image):
337
- print(f"Successfully stored test image in HTTP storage")
338
- else:
339
- raise RuntimeError("Failed to store test image")
340
-
341
- # Synchronize all chips through HTTP storage
342
- start_time = time.time()
343
-
344
- # Distribute workload across chips using HTTP storage
345
- batch_size = test_image.shape[0] // num_chips if test_image.shape[0] >= num_chips else 1
346
- results = []
347
-
348
- for i, accelerator in enumerate(ai_accelerators):
349
- try:
350
- # Run inference using HTTP-stored weights
351
- result = accelerator.inference(components['model_id'], input_tensor_id)
352
-
353
- if result is not None:
354
- # Store result in HTTP storage
355
- result_id = f"results/chip_{i}/test_image"
356
- if shared_storage.store_tensor(result_id, result):
357
- results.append(result)
358
- print(f"Chip {i} completed inference and stored result")
359
- else:
360
- print(f"Chip {i} inference succeeded but result storage failed")
361
- else:
362
- print(f"Chip {i} inference failed")
363
-
364
- except Exception as e:
365
- print(f"Error in chip {i} inference: {e}")
366
-
367
- elapsed = time.time() - start_time
368
-
369
- # Calculate performance metrics
370
- ops_per_inference = total_cores * 1024 # FMA ops per core
371
- from electron_speed import drift_velocity, TARGET_SWITCHES_PER_SEC
372
- electron_transit_time = 1 / (drift_velocity * TARGET_SWITCHES_PER_SEC)
373
- theoretical_time = electron_transit_time * ops_per_inference / total_cores
374
-
375
- print(f"\nHTTP-Based Multi-Chip Inference Results:")
376
- print(f"- Chips used: {num_chips}")
377
- print(f"- Results collected: {len(results)}")
378
- print(f"- Total time: {elapsed:.4f}s")
379
- print(f"- Theoretical electron-speed time: {theoretical_time:.6f}s")
380
- print(f"- Speed ratio: {theoretical_time/elapsed:.2f}x theoretical")
381
- print(f"- Operations per second: {ops_per_inference/elapsed:.2e}")
382
-
383
- # Test 3: HTTP Storage Performance
384
- print(f"\nTest 3: HTTP Storage Performance Evaluation")
385
-
386
- # Test tensor storage/retrieval performance
387
- test_sizes = [1024, 4096, 16384, 65536] # Different tensor sizes
388
- storage_times = []
389
- retrieval_times = []
390
-
391
- for size in test_sizes:
392
- test_tensor = np.random.rand(size).astype(np.float32)
393
- tensor_id = f"perf_test_{size}"
394
 
395
- # Test storage time
396
- start = time.time()
397
- success = shared_storage.store_tensor(tensor_id, test_tensor)
398
- storage_time = time.time() - start
 
399
 
400
- if success:
401
- storage_times.append(storage_time)
 
 
 
 
 
 
 
 
 
 
 
 
402
 
403
- # Test retrieval time
404
- start = time.time()
405
- retrieved = shared_storage.load_tensor(tensor_id)
406
- retrieval_time = time.time() - start
407
 
408
- if retrieved is not None and np.array_equal(test_tensor, retrieved):
409
- retrieval_times.append(retrieval_time)
410
- print(f"Size {size}: Store {storage_time:.4f}s, Retrieve {retrieval_time:.4f}s")
411
- else:
412
- print(f"Size {size}: Retrieval verification failed")
413
- else:
414
- print(f"Size {size}: Storage failed")
415
-
416
- if storage_times and retrieval_times:
417
- avg_storage = sum(storage_times) / len(storage_times)
418
- avg_retrieval = sum(retrieval_times) / len(retrieval_times)
419
- print(f"Average storage time: {avg_storage:.4f}s")
420
- print(f"Average retrieval time: {avg_retrieval:.4f}s")
421
-
422
- # Test 4: Multi-chip coordination via HTTP
423
- print(f"\nTest 4: Multi-Chip Coordination via HTTP")
424
-
425
- # Test cross-chip data transfer
426
- test_data_id = "cross_chip_test_data"
427
- test_data = np.array([1, 2, 3, 4, 5], dtype=np.float32)
428
-
429
- if shared_storage.store_tensor(test_data_id, test_data):
430
- print("Stored test data for cross-chip transfer")
431
 
432
- # Transfer data between chips
433
- new_data_id = shared_storage.transfer_between_chips(0, 1, test_data_id)
434
- if new_data_id:
435
- print(f"Successfully transferred data from chip 0 to chip 1: {new_data_id}")
436
-
437
- # Verify transferred data
438
- transferred_data = shared_storage.load_tensor(new_data_id)
439
- if transferred_data is not None and np.array_equal(test_data, transferred_data):
440
- print("Cross-chip transfer verification successful")
441
- else:
442
- print("Cross-chip transfer verification failed")
443
- else:
444
- print("Cross-chip transfer failed")
445
-
446
- # Test synchronization barriers
447
- barrier_id = "test_barrier"
448
- num_participants = num_chips
449
-
450
- if shared_storage.create_sync_barrier(barrier_id, num_participants):
451
- print(f"Created synchronization barrier for {num_participants} participants")
452
 
453
- # Simulate participants arriving at barrier
454
- for i in range(num_participants):
455
- result = shared_storage.wait_sync_barrier(barrier_id)
456
- if i == num_participants - 1:
457
- if result:
458
- print("All participants reached barrier - synchronization successful")
 
459
  else:
460
- print("Barrier synchronization failed")
461
- else:
462
- print(f"Participant {i+1} reached barrier")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
- print(f"\nHTTP-based AI integration test completed successfully!")
 
 
 
465
 
466
- # Final statistics
467
- final_stats = {
468
- "chips_initialized": len(chips),
469
- "ai_accelerators": len(ai_accelerators),
470
- "total_cores": total_cores,
471
- "model_loaded": components['model_id'] is not None,
472
- "storage_type": "HTTP",
473
- "connection_status": shared_storage.get_connection_status()
474
- }
475
 
476
- print(f"\nFinal System Statistics:")
477
- for key, value in final_stats.items():
478
- print(f"- {key}: {value}")
 
 
479
 
 
 
 
 
 
480
  except Exception as e:
481
- print(f"Multi-chip processing test failed: {e}")
482
- import traceback
483
- traceback.print_exc()
484
  return
485
 
486
- if __name__ == "__main__":
487
- test_ai_integration_http()
488
-
 
1
  """
2
+ Test AI integration with HTTP-based storage for Florence model inference.
3
  All operations are performed through HTTP storage with direct tensor core access.
4
  """
5
  import asyncio
6
  from gpu_arch import Chip
7
+ from ai_http import AIAcceleratorHTTP
8
  from virtual_vram import VirtualVRAM
9
  from PIL import Image
10
  import numpy as np
 
15
  import contextlib
16
  import atexit
17
  import logging
18
+ import torch
19
 
20
  # Configure logging
21
  logging.basicConfig(
 
23
  format='%(asctime)s - %(levelname)s - %(message)s'
24
  )
25
 
26
+ # Increase system file descriptor limit
27
+ def increase_file_limit():
28
+ try:
29
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
30
+ resource.setrlimit(resource.RLIMIT_NOFILE, (hard, hard))
31
+ print(f"Increased file descriptor limit from {soft} to {hard}")
32
+ except Exception as e:
33
+ print(f"Warning: Could not increase file descriptor limit: {e}")
34
+
35
+ # HTTP connection manager with retry and keep-alive
36
  @contextlib.contextmanager
37
+ def http_manager(max_retries=5, retry_delay=2, timeout=300): # Increased timeout to 5 minutes
38
  storage = None
39
  last_error = None
40
 
 
45
  storage.close()
46
  except:
47
  pass
48
+ storage = HTTPGPUStorage(
49
+ keep_alive=True,
50
+ timeout=timeout,
51
+ max_retries=max_retries
52
+ )
53
+ connected = storage.connect()
54
+ if connected:
55
+ storage.configure({
56
+ 'keep_alive': True,
57
+ 'timeout': timeout,
58
+ 'chunk_size': 2 * 1024 * 1024 * 1024, # 2GB chunks for network optimization
59
+ 'network_buffer_size': 4 * 1024 * 1024 * 1024 # 4GB network buffer
60
+ })
61
+ return connected
62
 
63
  # Initial connection attempts
64
  for attempt in range(max_retries):
65
  try:
66
  if try_connect():
67
+ logging.info("Successfully connected to HTTP GPU storage server with keep-alive")
68
  break
69
  else:
70
+ logging.warning(f"Connection attempt {attempt + 1} failed, retrying in {retry_delay}s...")
71
  time.sleep(retry_delay)
72
  except Exception as e:
73
  last_error = str(e)
74
+ logging.error(f"Connection attempt {attempt + 1} failed with error: {e}")
75
  time.sleep(retry_delay)
76
 
77
  if attempt == max_retries - 1:
78
+ error_msg = f"Could not connect to HTTP GPU storage server after {max_retries} attempts"
79
  if last_error:
80
  error_msg += f". Last error: {last_error}"
81
  raise RuntimeError(error_msg)
 
84
  # Yield the storage connection
85
  yield storage
86
  except Exception as e:
87
+ logging.error(f"WebSocket operation failed: {e}")
88
  # Try to reconnect once if operation fails
89
  if try_connect():
90
+ logging.info("Successfully reconnected to GPU storage server")
91
  yield storage
92
  else:
93
  raise
 
98
  except:
99
  pass
100
 
101
+ # Cleanup handler with HTTP connection handling
102
  def cleanup_resources():
103
  import gc
104
+ # Close any open HTTP connections
105
+ try:
106
+ from http_storage import HTTPGPUStorage
107
+ HTTPGPUStorage.close_all_connections()
108
+ except Exception as e:
109
+ logging.error(f"Error during HTTP connection cleanup: {e}")
110
+
111
+ # Clear CUDA cache if available
112
+ if torch.cuda.is_available():
113
+ try:
114
+ torch.cuda.empty_cache()
115
+ except Exception as e:
116
+ logging.error(f"Error clearing CUDA cache: {e}")
117
+
118
+ # Force garbage collection
119
  gc.collect()
120
 
121
  # Register cleanup handler
122
  atexit.register(cleanup_resources)
123
 
124
+ def test_ai_integration():
125
+ print("\n--- Testing HTTP-Based AI Integration with Florence Model ---")
126
  from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
127
 
128
  # Initialize components dictionary to store GPU resources
 
134
  'storage': None,
135
  'model_config': None,
136
  'tensor_registry': {},
137
+ 'initialized': False,
138
+ 'http_config': {
139
+ 'chunk_size': 2 * 1024 * 1024 * 1024, # 2GB chunks for network optimization
140
+ 'timeout': 600, # 10 minutes to handle larger chunks
141
+ 'keep_alive': True,
142
+ 'max_retries': 5,
143
+ 'retry_delay': 2
144
+ }
145
  }
146
 
147
  # Initialize global tensor registry
 
154
  'active_tensors': 0
155
  }
156
  }
157
+
158
+ # Increase file descriptor limit
159
+ increase_file_limit()
160
 
161
  print(f"\nElectron-Speed Architecture Parameters:")
162
  print(f"Target switches/sec: {TARGET_SWITCHES_PER_SEC:.2e}")
 
164
  print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
165
  print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
166
 
167
+ # Test 1: HTTP-Based Model Loading with Florence
168
+ print("\nTest 1: Loading Florence Model with HTTP Storage")
169
  try:
170
  # Use HTTP connection manager for proper resource handling
171
+ with http_manager() as storage:
172
  components['storage'] = storage # Save storage reference
173
 
174
+ # Initialize virtual GPU stack with HTTP storage
175
+ chip_for_loading = Chip(chip_id=0, vram_size_gb=32, storage=storage) # Allocate sufficient VRAM
176
  components['chips'].append(chip_for_loading)
177
 
178
+ # Initialize VRAM with HTTP storage
179
+ vram = VirtualVRAM(storage=storage)
180
  components['vram'] = vram
181
 
182
+ # Set up AI accelerator with HTTP support
183
+ ai_accelerator_for_loading = AIAcceleratorHTTP(chip=chip_for_loading)
184
+ ai_accelerator_for_loading.vram = vram
185
+ ai_accelerator_for_loading.initialize_tensor_cores()
186
  components['ai_accelerators'].append(ai_accelerator_for_loading)
187
 
188
  # Initialize model registry in HTTP storage
189
+ storage.store_model_state({
190
  "initialized": True,
191
+ "max_vram": 32 * 1024 * 1024 * 1024, # 32GB in bytes
192
  "active_models": {}
193
  })
194
 
195
+ # Load Florence-2 model with HTTP storage
196
+ from transformers import AutoModelForCausalLM, AutoProcessor
197
  model_id = "microsoft/florence-2-large"
198
+ print(f"Loading model {model_id} with HTTP storage...")
199
 
200
  try:
201
+ # Load model and processor with HTTP optimization
202
+ model = AutoModelForCausalLM.from_pretrained(
203
+ model_id,
204
+ trust_remote_code=True,
205
+ device_map="auto",
206
+ torch_dtype=torch.float16, # Use FP16 for better memory efficiency
207
+ low_cpu_mem_usage=True,
208
+ offload_folder="model_cache" # Enable disk offloading if needed
209
+ )
210
 
211
+ processor = AutoProcessor.from_pretrained(
212
+ model_id,
213
+ trust_remote_code=True
214
+ )
215
+
216
+ # Configure HTTP transfer settings
217
+ ai_accelerator_for_loading.configure_http({
218
+ 'chunk_size': components['http_config']['chunk_size'],
219
+ 'timeout': components['http_config']['timeout'],
220
+ 'keep_alive': True,
221
+ 'streaming': True
222
+ })
223
+
224
+ # Verify HTTP connection before proceeding
225
+ if not ai_accelerator_for_loading.storage.verify_connection():
226
+ # Try to reestablish connection
227
+ if not ai_accelerator_for_loading.storage.reconnect():
228
+ raise RuntimeError("HTTP connection lost and reconnection failed")
229
 
230
  # Calculate model size for proper VRAM allocation
231
+ model_size = sum(p.numel() * p.element_size() for p in model.parameters())
232
  print(f"Model size: {model_size / (1024**3):.2f} GB")
233
 
234
+ # Store model in WebSocket storage with size information
235
+ # Load model with robust HTTP handling
236
+ def load_model_with_retry(max_retries=3):
237
+ for attempt in range(max_retries):
238
+ try:
239
+ # Configure HTTP parameters for model loading
240
+ ai_accelerator_for_loading.configure_http({
241
+ 'chunk_size': components['http_config']['chunk_size'],
242
+ 'timeout': components['http_config']['timeout'],
243
+ 'keep_alive': True
244
+ })
245
+
246
+ # Load model with HTTP optimizations
247
+ ai_accelerator_for_loading.load_model(
248
+ model_id=model_id,
249
+ model=model,
250
+ processor=processor,
251
+ http_transfer=True,
252
+ streaming=True # Enable streaming for large model
253
+ )
254
+ return True
255
+ except Exception as e:
256
+ logging.error(f"Model loading attempt {attempt + 1} failed: {e}")
257
+ if attempt < max_retries - 1:
258
+ time.sleep(components['http_config']['retry_delay'])
259
+ # Attempt to refresh HTTP connection
260
+ ai_accelerator_for_loading.refresh_http_connection()
261
+ continue
262
+ return False
263
+
264
+ if not load_model_with_retry():
265
+ raise RuntimeError("Failed to load model after multiple attempts")
266
 
267
+ print(f"Model '{model_id}' loaded successfully to WebSocket storage.")
268
+ assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
269
+
270
+ # Store model parameters in components dict
271
+ components['model_id'] = model_id
272
+ components['model_size'] = model_size
273
+
274
+ # Clear any CPU-side model data
275
+ model = None
276
+ processor = None
277
+ import gc
278
+ gc.collect()
279
 
280
  except Exception as e:
281
  print(f"Detailed model loading error: {str(e)}")
282
+ print("Attempting to load with alternative configuration...")
 
283
  try:
284
+ # Try loading with optimized network settings
285
+ ai_accelerator_for_loading.configure_http({
286
+ 'chunk_size': 2 * 1024 * 1024 * 1024, # 2GB chunks
287
+ 'timeout': 600, # 10 minutes timeout
288
+ 'keep_alive': True,
289
+ 'streaming': True,
290
+ 'retry_on_failure': True,
291
+ 'network_buffer_size': 4 * 1024 * 1024 * 1024 # 4GB network buffer
292
+ })
293
 
294
+ model = AutoModelForCausalLM.from_pretrained(
295
+ model_id,
296
+ trust_remote_code=True,
297
+ device_map="auto",
298
+ torch_dtype=torch.float16,
299
+ low_cpu_mem_usage=True,
300
+ max_memory={'cpu': '16GB'}
301
  )
302
 
303
+ processor = AutoProcessor.from_pretrained(
304
+ model_id,
305
+ trust_remote_code=True
306
+ )
307
+
308
+ # Attempt load with new configuration
309
+ ai_accelerator_for_loading.load_model(
310
+ model_id=model_id,
311
+ model=model,
312
+ processor=processor,
313
+ force_reload=True
314
+ )
315
+ components['model_id'] = model_id
316
+ print("Successfully loaded model with alternative configuration")
317
  except Exception as e2:
318
+ print(f"Alternative loading configuration failed: {str(e2)}")
319
  raise
320
 
321
  except Exception as e:
322
  print(f"Model loading test failed: {e}")
323
  return
324
+ # Test 2: HTTP-Based Multi-Chip Processing for Florence Inference
 
325
  print("\nTest 2: HTTP-Based Parallel Processing across Multiple Chips")
326
  num_chips = 4 # Using multiple chips for maximum parallelization
327
  chips = []
328
  ai_accelerators = []
329
 
330
  try:
331
+ # Try to reuse existing HTTP connection with verification
332
  shared_storage = None
333
  max_connection_attempts = 3
334
 
335
  for attempt in range(max_connection_attempts):
336
  try:
337
+ if components['storage']:
 
338
  shared_storage = components['storage']
339
  logging.info("Successfully reused existing HTTP connection")
340
  break
341
  else:
342
+ logging.warning("Existing connection unavailable, creating new connection...")
343
+ with http_manager() as new_storage:
344
+ components['storage'] = new_storage
345
+ shared_storage = new_storage
346
+ logging.info("Successfully established new HTTP connection")
347
+ break
 
348
  except Exception as e:
349
+ logging.error(f"Connection attempt {attempt + 1} failed: {e}")
350
  if attempt < max_connection_attempts - 1:
351
  time.sleep(2)
352
  continue
353
  raise RuntimeError(f"Failed to establish HTTP connection after {max_connection_attempts} attempts")
354
 
355
+ # Initialize high-performance chip array with HTTP storage for Florence
356
  total_sms = 0
357
  total_cores = 0
358
 
 
363
  # Reuse existing VRAM instance with shared storage
364
  shared_vram = components['vram']
365
  if shared_vram is None:
366
+ shared_vram = VirtualVRAM()
367
  shared_vram.storage = shared_storage
368
 
369
  for i in range(num_chips):
370
  # Configure each chip with shared HTTP storage
371
+ chip = Chip(chip_id=i, vram_size_gb=32, storage=shared_storage) # 32GB VRAM per chip
372
  chips.append(chip)
373
 
374
  # Connect chips in a ring topology
375
  if i > 0:
376
  chip.connect_chip(chips[i-1], optical_link)
377
 
378
+ # Initialize AI accelerator with HTTP support
379
+ ai_accelerator = AIAcceleratorHTTP(chip=chip)
380
+ ai_accelerator.vram = shared_vram
381
+ ai_accelerator.storage = shared_storage
382
  ai_accelerators.append(ai_accelerator)
383
 
384
+ # Initialize tensor cores for Florence model
385
+ ai_accelerator.initialize_tensor_cores()
386
+
387
+ print("\nTest 3: Florence Model Inference with HTTP Storage")
388
+ try:
389
+ # Load test image
390
+ image_path = "test_image.jpg" # Make sure this image exists
391
+ if os.path.exists(image_path):
392
+ image = Image.open(image_path)
393
+
394
+ # Prepare input for Florence model
395
+ inputs = processor(image, return_tensors="pt")
396
+
397
+ # Run inference using HTTP storage
398
+ outputs = ai_accelerator.run_inference(
399
+ model_id="microsoft/florence-2-large",
400
+ inputs=inputs,
401
+ use_http=True
402
+ )
403
+
404
+ # Process outputs
405
+ if outputs is not None:
406
+ predicted_caption = processor.decode(outputs[0], skip_special_tokens=True)
407
+ print(f"\nFlorence Model Caption: {predicted_caption}")
408
+ else:
409
+ print("Inference failed to produce output")
410
 
411
+ else:
412
+ print(f"Test image not found at {image_path}")
413
+
414
+ except Exception as e:
415
+ print(f"Inference test failed: {str(e)}")
416
+ finally:
417
+ # Cleanup
418
+ for ai_accelerator in ai_accelerators:
419
+ try:
420
+ ai_accelerator.cleanup()
421
+ except Exception as e:
422
+ print(f"Cleanup error: {str(e)}")
423
 
424
+ if shared_storage:
425
+ try:
426
+ shared_storage.close()
427
  except Exception as e:
428
+ print(f"Storage cleanup error: {str(e)}")
429
+
430
+ # Clear any remaining GPU memory
431
+ if torch.cuda.is_available():
432
+ torch.cuda.empty_cache()
433
+
434
+
 
435
  # Track total processing units
436
  total_sms += chip.num_sms
437
  total_cores += chip.num_sms * chip.cores_per_sm
438
 
439
+ # Store chip configuration in WebSocket storage
440
  shared_storage.store_state(f"chips/{i}/config", "state", {
441
  "num_sms": chip.num_sms,
442
  "cores_per_sm": chip.cores_per_sm,
 
444
  "connected_chips": [c.chip_id for c in chip.connected_chips]
445
  })
446
 
447
+ print(f"Chip {i} initialized with WebSocket storage and optical interconnect")
448
+
449
+ # Get all image files in sample_task folder
450
+ image_folder = os.path.join(os.path.dirname(__file__), '..', 'sample_task')
451
+ image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
452
+ image_files.sort()
453
+ if not image_files:
454
+ print("No images found in sample_task folder.")
455
+ return
456
 
457
  print(f"\nTotal Processing Units:")
458
  print(f"- Streaming Multiprocessors: {total_sms:,}")
459
  print(f"- CUDA Cores: {total_cores:,}")
460
  print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
461
 
462
+ # Test multi-chip parallel inference with WebSocket storage
463
+ for img_name in image_files[:1]: # Test with first image
464
+ img_path = os.path.join(image_folder, img_name)
465
+ raw_image = Image.open(img_path).convert('RGB')
466
+ print(f"\nRunning WebSocket-based inference for image: {img_name}")
467
+
468
+ # Store input image in WebSocket storage
469
+ image_array = np.array(raw_image)
470
+
471
+ # Use shared VRAM's storage for tensor operations
472
+ shared_vram.storage.store_tensor(f"input_image/{img_name}", image_array)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
 
474
+ # Free CPU memory immediately
475
+ raw_image = None
476
+ image_array_shape = image_array.shape
477
+ image_array = None
478
+ gc.collect()
479
 
480
+ # Synchronize all chips through WebSocket storage
481
+ start_time = time.time()
482
+
483
+ # Distribute workload across chips using WebSocket storage
484
+ batch_size = image_array_shape[0] // num_chips
485
+ results = []
486
+
487
+ # Ensure all connections are properly managed
488
+ for accelerator in ai_accelerators:
489
+ accelerator.vram.storage = shared_vram.storage
490
+
491
+ for i, accelerator in enumerate(ai_accelerators):
492
+ # Load image section from WebSocket storage
493
+ tensor_id = f"input_image/{img_name}"
494
 
495
+ # Run inference using WebSocket-stored weights
496
+ result = accelerator.inference(model_id, tensor_id)
 
 
497
 
498
+ # Store result in WebSocket storage
499
+ if result is not None:
500
+ storage.store_tensor(f"results/chip_{i}/{img_name}", result)
501
+ results.append(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
+ elapsed = time.time() - start_time
504
+
505
+ # Calculate performance metrics
506
+ ops_per_inference = total_cores * 1024 # FMA ops per core
507
+ electron_transit_time = 1 / (drift_velocity * TARGET_SWITCHES_PER_SEC)
508
+ theoretical_time = electron_transit_time * ops_per_inference / total_cores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
+ # Combine results from all chips through WebSocket storage
511
+ final_result = None
512
+ for i in range(num_chips):
513
+ chip_result = storage.load_tensor(f"results/chip_{i}/{img_name}")
514
+ if chip_result is not None:
515
+ if final_result is None:
516
+ final_result = chip_result
517
  else:
518
+ final_result = np.concatenate([final_result, chip_result])
519
+
520
+ print(f"\nWebSocket-Based Performance Metrics:")
521
+ print(f"- Final result shape: {final_result.shape if final_result is not None else 'None'}")
522
+ print(f"- Wall clock time: {elapsed*1000:.3f} ms")
523
+ print(f"- Theoretical electron transit time: {theoretical_time*1e12:.3f} ps")
524
+ print(f"- Effective TFLOPS: {(ops_per_inference / elapsed) / 1e12:.2f}")
525
+ print(f"- Number of chips used: {num_chips}")
526
+
527
+ assert final_result is not None, "WebSocket-based inference returned None"
528
+ assert isinstance(result, str), "Inference result is not a string"
529
+ print("Multi-chip inference test on all images (virtual GPU stack) successful.")
530
+
531
+ except Exception as e:
532
+ print(f"Multi-chip inference test failed: {e}")
533
+ return
534
+ return
535
+
536
+
537
+ # Test 3: Electron-Speed Matrix Operations
538
+ print("\nTest 3: Electron-Speed Matrix Operations")
539
+ try:
540
+ # Create large matrices to demonstrate parallel processing
541
+ size = 1024 # Large enough to show parallelization benefits
542
+ matrix_a = [[float(i+j) for j in range(size)] for i in range(size)]
543
+ matrix_b = [[float(i*j+1) for j in range(size)] for i in range(size)]
544
+
545
+ print("\nLoading matrices into virtual VRAM...")
546
+ matrix_a_id = ai_accelerator_for_loading.load_matrix(matrix_a, "matrix_A")
547
+ matrix_b_id = ai_accelerator_for_loading.load_matrix(matrix_b, "matrix_B")
548
+
549
+ print("\nPerforming electron-speed matrix multiplication...")
550
+ start_time = time.time()
551
+ result_matrix_id = ai_accelerator_for_loading.matrix_multiply(matrix_a_id, matrix_b_id, "result_C")
552
+ result_matrix = ai_accelerator_for_loading.get_matrix(result_matrix_id)
553
+
554
+ elapsed = time.time() - start_time
555
 
556
+ # Calculate electron-speed performance metrics
557
+ ops = size * size * size * 2 # Total multiply-add operations
558
+ electron_transit_time = 1 / (drift_velocity * TARGET_SWITCHES_PER_SEC)
559
+ theoretical_time = electron_transit_time * ops / (total_cores * 8) # 8 tensor cores per CUDA core
560
 
561
+ print("\nElectron-Speed Matrix Operation Metrics:")
562
+ print(f"Matrix size: {size}x{size}")
563
+ print(f"Total operations: {ops:,}")
564
+ print(f"Wall clock time: {elapsed*1000:.3f} ms")
565
+ print(f"Theoretical electron transit time: {theoretical_time*1e12:.3f} ps")
566
+ print(f"Effective TFLOPS: {(ops / elapsed) / 1e12:.2f}")
 
 
 
567
 
568
+ # Verify first few elements for correctness
569
+ print("\nValidating results (first 2x2 corner):")
570
+ print(f"Result[0:2,0:2] = ")
571
+ for i in range(min(2, len(result_matrix))):
572
+ print(result_matrix[i][:2])
573
 
574
+ # Validate dimensions
575
+ assert len(result_matrix) == size, "Result matrix has incorrect dimensions"
576
+ assert len(result_matrix[0]) == size, "Result matrix has incorrect dimensions"
577
+ print("\nMatrix operations at electron speed successful.")
578
+
579
  except Exception as e:
580
+ print(f"Matrix operations test failed: {e}")
 
 
581
  return
582
 
583
+ print("\n--- All AI Integration Tests Completed ---")