Factor Studios commited on
Commit
2d21971
·
verified ·
1 Parent(s): a31170e

Upload 2 files

Browse files
Files changed (2) hide show
  1. ai_http.py +77 -35
  2. test_ai_integration_http.py +12 -6
ai_http.py CHANGED
@@ -25,21 +25,31 @@ class AIAccelerator:
25
  """
26
 
27
  def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
28
- """Initialize AI Accelerator with electron-speed awareness and shared HTTP storage."""
29
  from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
30
 
31
  self.storage = storage # Use the shared storage instance
32
  if self.storage is None:
33
- from http_storage import HTTPGPUStorage
34
- self.storage = HTTPGPUStorage() # Create HTTP storage instead of WebSocket
35
  if not self.storage.wait_for_connection():
36
- raise RuntimeError("Could not connect to GPU storage server")
37
 
38
  self.vram = vram
39
  self.num_sms = num_sms
40
  self.cores_per_sm = cores_per_sm
41
  self.total_cores = num_sms * cores_per_sm
42
 
 
 
 
 
 
 
 
 
 
 
43
  # Configure for maximum parallel processing at electron speed
44
  total_tensor_cores = num_sms * cores_per_sm # Use ALL cores for tensor operations
45
  self.tensor_core_array = TensorCoreArray(
@@ -50,6 +60,15 @@ class AIAccelerator:
50
  self.tensor_cores_initialized = False
51
  self._vram_allocated = 0
52
 
 
 
 
 
 
 
 
 
 
53
  def pre_allocate_vram(self, size_bytes: int) -> bool:
54
  """Pre-allocate VRAM for model loading"""
55
  if not self.vram:
@@ -73,31 +92,39 @@ class AIAccelerator:
73
 
74
  def has_model(self, model_id: str) -> bool:
75
  """Check if a model is loaded"""
76
- if not self.storage:
77
  return False
78
- return self.storage.is_model_loaded(model_id)
79
 
80
- # Initialize model, tensor, and tokenizer tracking
81
- self.model_registry: Dict[str, Dict[str, Any]] = {} # Track loaded models
82
- self.tensor_registry: Dict[str, Dict[str, Any]] = {} # Track tensor metadata
83
- self.tokenizer_registry: Dict[str, Any] = {} # Track tokenizers
84
- self.resource_monitor = {
85
- 'vram_used': 0,
86
- 'active_tensors': 0,
87
- 'loaded_models': set()
88
- }
89
-
90
- # AI operation statistics
91
- self.operations_performed = 0
92
- self.total_compute_time = 0.0
93
- self.flops_performed = 0
94
-
95
- # HTTP-based memory management
96
- self.model_registry = {} # Track loaded models
97
- self.matrix_registry = {} # Track loaded matrices
98
- self.matrix_counter = 0
99
- self.activation_cache: Dict[str, str] = {} # Cache activation outputs
100
- self.weight_cache: Dict[str, Any] = {} # Cache preprocessed weights
 
 
 
 
 
 
 
 
101
 
102
  # Model registries
103
  self.model_registry: Dict[str, Any] = {}
@@ -434,32 +461,47 @@ class AIAccelerator:
434
  return None
435
 
436
  def has_model(self, model_id: str) -> bool:
437
- """Check if model is loaded via HTTP storage"""
438
- return self.storage.is_model_loaded(model_id)
 
 
439
 
440
  def load_model(self, model_id: str, model=None, processor=None) -> bool:
441
- """Load model via HTTP storage"""
442
  try:
 
 
 
443
  # Prepare model data for storage
444
- model_data = None
445
- if model is not None:
446
- # In a real implementation, this would serialize the model
 
 
447
  model_data = {
448
  "model_type": type(model).__name__,
449
  "config": self._serialize_model_config(getattr(model, 'config', None)),
450
  "loaded_at": time.time()
451
  }
452
-
453
- # Use HTTP storage to load model
454
  success = self.storage.load_model(model_id, model_data=model_data)
455
 
456
  if success:
 
457
  self.model_registry[model_id] = {
458
  "model_data": model_data,
459
  "processor": processor,
460
  "loaded_at": time.time()
461
  }
 
 
462
  self.resource_monitor['loaded_models'].add(model_id)
 
 
 
 
 
463
  return True
464
 
465
  return False
 
25
  """
26
 
27
  def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
28
+ """Initialize AI Accelerator with electron-speed awareness and shared storage."""
29
  from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
30
 
31
  self.storage = storage # Use the shared storage instance
32
  if self.storage is None:
33
+ from http_storage import LocalStorage
34
+ self.storage = LocalStorage()
35
  if not self.storage.wait_for_connection():
36
+ raise RuntimeError("Could not connect to storage")
37
 
38
  self.vram = vram
39
  self.num_sms = num_sms
40
  self.cores_per_sm = cores_per_sm
41
  self.total_cores = num_sms * cores_per_sm
42
 
43
+ # Initialize registries and monitors
44
+ self.model_registry: Dict[str, Dict[str, Any]] = {} # Track loaded models
45
+ self.tensor_registry: Dict[str, Dict[str, Any]] = {} # Track tensor metadata
46
+ self.tokenizer_registry: Dict[str, Any] = {} # Track tokenizers
47
+ self.resource_monitor = {
48
+ 'vram_used': 0,
49
+ 'active_tensors': 0,
50
+ 'loaded_models': set()
51
+ }
52
+
53
  # Configure for maximum parallel processing at electron speed
54
  total_tensor_cores = num_sms * cores_per_sm # Use ALL cores for tensor operations
55
  self.tensor_core_array = TensorCoreArray(
 
60
  self.tensor_cores_initialized = False
61
  self._vram_allocated = 0
62
 
63
+ # Initialize operation tracking
64
+ self.operations_performed = 0
65
+ self.total_compute_time = 0.0
66
+ self.flops_performed = 0
67
+
68
+ # Initialize caches
69
+ self.activation_cache: Dict[str, str] = {} # Cache activation outputs
70
+ self.weight_cache: Dict[str, Any] = {} # Cache preprocessed weights
71
+
72
  def pre_allocate_vram(self, size_bytes: int) -> bool:
73
  """Pre-allocate VRAM for model loading"""
74
  if not self.vram:
 
92
 
93
  def has_model(self, model_id: str) -> bool:
94
  """Check if a model is loaded"""
95
+ if not model_id:
96
  return False
97
+ return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
98
 
99
+ def load_model(self, model_id: str, model: Dict[str, Any], processor: Any = None) -> bool:
100
+ """Load a model into the accelerator"""
101
+ try:
102
+ if not self.storage:
103
+ raise RuntimeError("No storage available")
104
+
105
+ # Store model in local storage
106
+ if not self.storage.load_model(model_id, model_data=model):
107
+ raise RuntimeError("Failed to store model in local storage")
108
+
109
+ # Update model registry and resource monitor
110
+ self.model_registry[model_id] = {
111
+ 'config': model,
112
+ 'loaded_at': time.time(),
113
+ 'processor': processor
114
+ }
115
+
116
+ # Update resource monitoring
117
+ self.resource_monitor['loaded_models'].add(model_id)
118
+
119
+ # Update storage monitoring if available
120
+ if hasattr(self.storage, 'resource_monitor'):
121
+ self.storage.resource_monitor['loaded_models'].add(model_id)
122
+
123
+ return True
124
+
125
+ except Exception as e:
126
+ print(f"Error loading model {model_id}: {str(e)}")
127
+ return False
128
 
129
  # Model registries
130
  self.model_registry: Dict[str, Any] = {}
 
461
  return None
462
 
463
  def has_model(self, model_id: str) -> bool:
464
+ """Check if model is loaded"""
465
+ if not model_id:
466
+ return False
467
+ return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
468
 
469
  def load_model(self, model_id: str, model=None, processor=None) -> bool:
470
+ """Load model into local storage and register it with the accelerator"""
471
  try:
472
+ if not self.storage:
473
+ raise RuntimeError("No storage available")
474
+
475
  # Prepare model data for storage
476
+ model_data = model
477
+ if isinstance(model, dict):
478
+ model_data = model # Use as is if it's already a dict
479
+ elif model is not None:
480
+ # Serialize model object
481
  model_data = {
482
  "model_type": type(model).__name__,
483
  "config": self._serialize_model_config(getattr(model, 'config', None)),
484
  "loaded_at": time.time()
485
  }
486
+
487
+ # Store in local storage
488
  success = self.storage.load_model(model_id, model_data=model_data)
489
 
490
  if success:
491
+ # Update local registry
492
  self.model_registry[model_id] = {
493
  "model_data": model_data,
494
  "processor": processor,
495
  "loaded_at": time.time()
496
  }
497
+
498
+ # Update monitoring
499
  self.resource_monitor['loaded_models'].add(model_id)
500
+
501
+ # Update storage monitoring if supported
502
+ if hasattr(self.storage, 'resource_monitor'):
503
+ self.storage.resource_monitor['loaded_models'].add(model_id)
504
+
505
  return True
506
 
507
  return False
test_ai_integration_http.py CHANGED
@@ -167,17 +167,23 @@ def test_ai_integration_http():
167
  # Load model with local storage
168
  success = ai_accelerator_for_loading.load_model(
169
  model_id=model_id,
170
- model=model_data,
171
- processor=None,
172
- verify_load=True
173
  )
 
 
 
 
 
 
 
 
 
174
  except Exception as e:
175
  print(f"Exception during model loading: {str(e)}")
176
  success = False
177
-
178
  if success:
179
- print(f"Model '{model_id}' loaded successfully to HTTP storage.")
180
- assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
181
 
182
  # Store model parameters in components dict
183
  components['model_id'] = model_id
 
167
  # Load model with local storage
168
  success = ai_accelerator_for_loading.load_model(
169
  model_id=model_id,
170
+ model=model_data
 
 
171
  )
172
+
173
+ if success:
174
+ print(f"Model '{model_id}' loaded successfully to local storage")
175
+ # Verify model is loaded in both accelerator and storage
176
+ if not ai_accelerator_for_loading.has_model(model_id):
177
+ raise RuntimeError(f"Model {model_id} not found in accelerator registry after loading")
178
+ else:
179
+ raise RuntimeError("Failed to load model in local storage")
180
+
181
  except Exception as e:
182
  print(f"Exception during model loading: {str(e)}")
183
  success = False
184
+
185
  if success:
186
+ print(f"Model '{model_id}' successfully loaded and verified")
 
187
 
188
  # Store model parameters in components dict
189
  components['model_id'] = model_id