Spaces:
Runtime error
Runtime error
Factor Studios
commited on
Upload 2 files
Browse files- ai_http.py +77 -35
- test_ai_integration_http.py +12 -6
ai_http.py
CHANGED
|
@@ -25,21 +25,31 @@ class AIAccelerator:
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
|
| 28 |
-
"""Initialize AI Accelerator with electron-speed awareness and shared
|
| 29 |
from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
|
| 30 |
|
| 31 |
self.storage = storage # Use the shared storage instance
|
| 32 |
if self.storage is None:
|
| 33 |
-
from http_storage import
|
| 34 |
-
self.storage =
|
| 35 |
if not self.storage.wait_for_connection():
|
| 36 |
-
raise RuntimeError("Could not connect to
|
| 37 |
|
| 38 |
self.vram = vram
|
| 39 |
self.num_sms = num_sms
|
| 40 |
self.cores_per_sm = cores_per_sm
|
| 41 |
self.total_cores = num_sms * cores_per_sm
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# Configure for maximum parallel processing at electron speed
|
| 44 |
total_tensor_cores = num_sms * cores_per_sm # Use ALL cores for tensor operations
|
| 45 |
self.tensor_core_array = TensorCoreArray(
|
|
@@ -50,6 +60,15 @@ class AIAccelerator:
|
|
| 50 |
self.tensor_cores_initialized = False
|
| 51 |
self._vram_allocated = 0
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def pre_allocate_vram(self, size_bytes: int) -> bool:
|
| 54 |
"""Pre-allocate VRAM for model loading"""
|
| 55 |
if not self.vram:
|
|
@@ -73,31 +92,39 @@ class AIAccelerator:
|
|
| 73 |
|
| 74 |
def has_model(self, model_id: str) -> bool:
|
| 75 |
"""Check if a model is loaded"""
|
| 76 |
-
if not
|
| 77 |
return False
|
| 78 |
-
return self.storage.is_model_loaded(model_id)
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
# Model registries
|
| 103 |
self.model_registry: Dict[str, Any] = {}
|
|
@@ -434,32 +461,47 @@ class AIAccelerator:
|
|
| 434 |
return None
|
| 435 |
|
| 436 |
def has_model(self, model_id: str) -> bool:
|
| 437 |
-
"""Check if model is loaded
|
| 438 |
-
|
|
|
|
|
|
|
| 439 |
|
| 440 |
def load_model(self, model_id: str, model=None, processor=None) -> bool:
|
| 441 |
-
"""Load model
|
| 442 |
try:
|
|
|
|
|
|
|
|
|
|
| 443 |
# Prepare model data for storage
|
| 444 |
-
model_data =
|
| 445 |
-
if model
|
| 446 |
-
#
|
|
|
|
|
|
|
| 447 |
model_data = {
|
| 448 |
"model_type": type(model).__name__,
|
| 449 |
"config": self._serialize_model_config(getattr(model, 'config', None)),
|
| 450 |
"loaded_at": time.time()
|
| 451 |
}
|
| 452 |
-
|
| 453 |
-
#
|
| 454 |
success = self.storage.load_model(model_id, model_data=model_data)
|
| 455 |
|
| 456 |
if success:
|
|
|
|
| 457 |
self.model_registry[model_id] = {
|
| 458 |
"model_data": model_data,
|
| 459 |
"processor": processor,
|
| 460 |
"loaded_at": time.time()
|
| 461 |
}
|
|
|
|
|
|
|
| 462 |
self.resource_monitor['loaded_models'].add(model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
return True
|
| 464 |
|
| 465 |
return False
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
|
| 28 |
+
"""Initialize AI Accelerator with electron-speed awareness and shared storage."""
|
| 29 |
from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
|
| 30 |
|
| 31 |
self.storage = storage # Use the shared storage instance
|
| 32 |
if self.storage is None:
|
| 33 |
+
from http_storage import LocalStorage
|
| 34 |
+
self.storage = LocalStorage()
|
| 35 |
if not self.storage.wait_for_connection():
|
| 36 |
+
raise RuntimeError("Could not connect to storage")
|
| 37 |
|
| 38 |
self.vram = vram
|
| 39 |
self.num_sms = num_sms
|
| 40 |
self.cores_per_sm = cores_per_sm
|
| 41 |
self.total_cores = num_sms * cores_per_sm
|
| 42 |
|
| 43 |
+
# Initialize registries and monitors
|
| 44 |
+
self.model_registry: Dict[str, Dict[str, Any]] = {} # Track loaded models
|
| 45 |
+
self.tensor_registry: Dict[str, Dict[str, Any]] = {} # Track tensor metadata
|
| 46 |
+
self.tokenizer_registry: Dict[str, Any] = {} # Track tokenizers
|
| 47 |
+
self.resource_monitor = {
|
| 48 |
+
'vram_used': 0,
|
| 49 |
+
'active_tensors': 0,
|
| 50 |
+
'loaded_models': set()
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
# Configure for maximum parallel processing at electron speed
|
| 54 |
total_tensor_cores = num_sms * cores_per_sm # Use ALL cores for tensor operations
|
| 55 |
self.tensor_core_array = TensorCoreArray(
|
|
|
|
| 60 |
self.tensor_cores_initialized = False
|
| 61 |
self._vram_allocated = 0
|
| 62 |
|
| 63 |
+
# Initialize operation tracking
|
| 64 |
+
self.operations_performed = 0
|
| 65 |
+
self.total_compute_time = 0.0
|
| 66 |
+
self.flops_performed = 0
|
| 67 |
+
|
| 68 |
+
# Initialize caches
|
| 69 |
+
self.activation_cache: Dict[str, str] = {} # Cache activation outputs
|
| 70 |
+
self.weight_cache: Dict[str, Any] = {} # Cache preprocessed weights
|
| 71 |
+
|
| 72 |
def pre_allocate_vram(self, size_bytes: int) -> bool:
|
| 73 |
"""Pre-allocate VRAM for model loading"""
|
| 74 |
if not self.vram:
|
|
|
|
| 92 |
|
| 93 |
def has_model(self, model_id: str) -> bool:
|
| 94 |
"""Check if a model is loaded"""
|
| 95 |
+
if not model_id:
|
| 96 |
return False
|
| 97 |
+
return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
|
| 98 |
|
| 99 |
+
def load_model(self, model_id: str, model: Dict[str, Any], processor: Any = None) -> bool:
|
| 100 |
+
"""Load a model into the accelerator"""
|
| 101 |
+
try:
|
| 102 |
+
if not self.storage:
|
| 103 |
+
raise RuntimeError("No storage available")
|
| 104 |
+
|
| 105 |
+
# Store model in local storage
|
| 106 |
+
if not self.storage.load_model(model_id, model_data=model):
|
| 107 |
+
raise RuntimeError("Failed to store model in local storage")
|
| 108 |
+
|
| 109 |
+
# Update model registry and resource monitor
|
| 110 |
+
self.model_registry[model_id] = {
|
| 111 |
+
'config': model,
|
| 112 |
+
'loaded_at': time.time(),
|
| 113 |
+
'processor': processor
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
# Update resource monitoring
|
| 117 |
+
self.resource_monitor['loaded_models'].add(model_id)
|
| 118 |
+
|
| 119 |
+
# Update storage monitoring if available
|
| 120 |
+
if hasattr(self.storage, 'resource_monitor'):
|
| 121 |
+
self.storage.resource_monitor['loaded_models'].add(model_id)
|
| 122 |
+
|
| 123 |
+
return True
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(f"Error loading model {model_id}: {str(e)}")
|
| 127 |
+
return False
|
| 128 |
|
| 129 |
# Model registries
|
| 130 |
self.model_registry: Dict[str, Any] = {}
|
|
|
|
| 461 |
return None
|
| 462 |
|
| 463 |
def has_model(self, model_id: str) -> bool:
|
| 464 |
+
"""Check if model is loaded"""
|
| 465 |
+
if not model_id:
|
| 466 |
+
return False
|
| 467 |
+
return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
|
| 468 |
|
| 469 |
def load_model(self, model_id: str, model=None, processor=None) -> bool:
|
| 470 |
+
"""Load model into local storage and register it with the accelerator"""
|
| 471 |
try:
|
| 472 |
+
if not self.storage:
|
| 473 |
+
raise RuntimeError("No storage available")
|
| 474 |
+
|
| 475 |
# Prepare model data for storage
|
| 476 |
+
model_data = model
|
| 477 |
+
if isinstance(model, dict):
|
| 478 |
+
model_data = model # Use as is if it's already a dict
|
| 479 |
+
elif model is not None:
|
| 480 |
+
# Serialize model object
|
| 481 |
model_data = {
|
| 482 |
"model_type": type(model).__name__,
|
| 483 |
"config": self._serialize_model_config(getattr(model, 'config', None)),
|
| 484 |
"loaded_at": time.time()
|
| 485 |
}
|
| 486 |
+
|
| 487 |
+
# Store in local storage
|
| 488 |
success = self.storage.load_model(model_id, model_data=model_data)
|
| 489 |
|
| 490 |
if success:
|
| 491 |
+
# Update local registry
|
| 492 |
self.model_registry[model_id] = {
|
| 493 |
"model_data": model_data,
|
| 494 |
"processor": processor,
|
| 495 |
"loaded_at": time.time()
|
| 496 |
}
|
| 497 |
+
|
| 498 |
+
# Update monitoring
|
| 499 |
self.resource_monitor['loaded_models'].add(model_id)
|
| 500 |
+
|
| 501 |
+
# Update storage monitoring if supported
|
| 502 |
+
if hasattr(self.storage, 'resource_monitor'):
|
| 503 |
+
self.storage.resource_monitor['loaded_models'].add(model_id)
|
| 504 |
+
|
| 505 |
return True
|
| 506 |
|
| 507 |
return False
|
test_ai_integration_http.py
CHANGED
|
@@ -167,17 +167,23 @@ def test_ai_integration_http():
|
|
| 167 |
# Load model with local storage
|
| 168 |
success = ai_accelerator_for_loading.load_model(
|
| 169 |
model_id=model_id,
|
| 170 |
-
model=model_data
|
| 171 |
-
processor=None,
|
| 172 |
-
verify_load=True
|
| 173 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
except Exception as e:
|
| 175 |
print(f"Exception during model loading: {str(e)}")
|
| 176 |
success = False
|
| 177 |
-
|
| 178 |
if success:
|
| 179 |
-
print(f"Model '{model_id}'
|
| 180 |
-
assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
|
| 181 |
|
| 182 |
# Store model parameters in components dict
|
| 183 |
components['model_id'] = model_id
|
|
|
|
| 167 |
# Load model with local storage
|
| 168 |
success = ai_accelerator_for_loading.load_model(
|
| 169 |
model_id=model_id,
|
| 170 |
+
model=model_data
|
|
|
|
|
|
|
| 171 |
)
|
| 172 |
+
|
| 173 |
+
if success:
|
| 174 |
+
print(f"Model '{model_id}' loaded successfully to local storage")
|
| 175 |
+
# Verify model is loaded in both accelerator and storage
|
| 176 |
+
if not ai_accelerator_for_loading.has_model(model_id):
|
| 177 |
+
raise RuntimeError(f"Model {model_id} not found in accelerator registry after loading")
|
| 178 |
+
else:
|
| 179 |
+
raise RuntimeError("Failed to load model in local storage")
|
| 180 |
+
|
| 181 |
except Exception as e:
|
| 182 |
print(f"Exception during model loading: {str(e)}")
|
| 183 |
success = False
|
| 184 |
+
|
| 185 |
if success:
|
| 186 |
+
print(f"Model '{model_id}' successfully loaded and verified")
|
|
|
|
| 187 |
|
| 188 |
# Store model parameters in components dict
|
| 189 |
components['model_id'] = model_id
|