Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

2d21971

verified ·

1 Parent(s): a31170e

Upload 2 files

Browse files

Files changed (2) hide show

ai_http.py +77 -35
test_ai_integration_http.py +12 -6

ai_http.py CHANGED Viewed

@@ -25,21 +25,31 @@ class AIAccelerator:
     """
     def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
-        """Initialize AI Accelerator with electron-speed awareness and shared HTTP storage."""
         from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
         self.storage = storage  # Use the shared storage instance
         if self.storage is None:
-            from http_storage import HTTPGPUStorage
-            self.storage = HTTPGPUStorage()  # Create HTTP storage instead of WebSocket
             if not self.storage.wait_for_connection():
-                raise RuntimeError("Could not connect to GPU storage server")
         self.vram = vram
         self.num_sms = num_sms
         self.cores_per_sm = cores_per_sm
         self.total_cores = num_sms * cores_per_sm
         # Configure for maximum parallel processing at electron speed
         total_tensor_cores = num_sms * cores_per_sm  # Use ALL cores for tensor operations
         self.tensor_core_array = TensorCoreArray(
@@ -50,6 +60,15 @@ class AIAccelerator:
         self.tensor_cores_initialized = False
         self._vram_allocated = 0
     def pre_allocate_vram(self, size_bytes: int) -> bool:
         """Pre-allocate VRAM for model loading"""
         if not self.vram:
@@ -73,31 +92,39 @@ class AIAccelerator:
     def has_model(self, model_id: str) -> bool:
         """Check if a model is loaded"""
-        if not self.storage:
             return False
-        return self.storage.is_model_loaded(model_id)
-        # Initialize model, tensor, and tokenizer tracking
-        self.model_registry: Dict[str, Dict[str, Any]] = {}  # Track loaded models
-        self.tensor_registry: Dict[str, Dict[str, Any]] = {}  # Track tensor metadata
-        self.tokenizer_registry: Dict[str, Any] = {}  # Track tokenizers
-        self.resource_monitor = {
-            'vram_used': 0,
-            'active_tensors': 0,
-            'loaded_models': set()
-        }
-        # AI operation statistics
-        self.operations_performed = 0
-        self.total_compute_time = 0.0
-        self.flops_performed = 0
-        # HTTP-based memory management
-        self.model_registry = {}  # Track loaded models
-        self.matrix_registry = {}  # Track loaded matrices
-        self.matrix_counter = 0
-        self.activation_cache: Dict[str, str] = {}  # Cache activation outputs
-        self.weight_cache: Dict[str, Any] = {}  # Cache preprocessed weights
         # Model registries
         self.model_registry: Dict[str, Any] = {}
@@ -434,32 +461,47 @@ class AIAccelerator:
             return None
     def has_model(self, model_id: str) -> bool:
-        """Check if model is loaded via HTTP storage"""
-        return self.storage.is_model_loaded(model_id)
     def load_model(self, model_id: str, model=None, processor=None) -> bool:
-        """Load model via HTTP storage"""
         try:
             # Prepare model data for storage
-            model_data = None
-            if model is not None:
-                # In a real implementation, this would serialize the model
                 model_data = {
                     "model_type": type(model).__name__,
                     "config": self._serialize_model_config(getattr(model, 'config', None)),
                     "loaded_at": time.time()
                 }
-            # Use HTTP storage to load model
             success = self.storage.load_model(model_id, model_data=model_data)
             if success:
                 self.model_registry[model_id] = {
                     "model_data": model_data,
                     "processor": processor,
                     "loaded_at": time.time()
                 }
                 self.resource_monitor['loaded_models'].add(model_id)
                 return True
             return False

     """
     def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
+        """Initialize AI Accelerator with electron-speed awareness and shared storage."""
         from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
         self.storage = storage  # Use the shared storage instance
         if self.storage is None:
+            from http_storage import LocalStorage
+            self.storage = LocalStorage()
             if not self.storage.wait_for_connection():
+                raise RuntimeError("Could not connect to storage")
         self.vram = vram
         self.num_sms = num_sms
         self.cores_per_sm = cores_per_sm
         self.total_cores = num_sms * cores_per_sm
+        # Initialize registries and monitors
+        self.model_registry: Dict[str, Dict[str, Any]] = {}  # Track loaded models
+        self.tensor_registry: Dict[str, Dict[str, Any]] = {}  # Track tensor metadata
+        self.tokenizer_registry: Dict[str, Any] = {}  # Track tokenizers
+        self.resource_monitor = {
+            'vram_used': 0,
+            'active_tensors': 0,
+            'loaded_models': set()
+        }
         # Configure for maximum parallel processing at electron speed
         total_tensor_cores = num_sms * cores_per_sm  # Use ALL cores for tensor operations
         self.tensor_core_array = TensorCoreArray(
         self.tensor_cores_initialized = False
         self._vram_allocated = 0
+        # Initialize operation tracking
+        self.operations_performed = 0
+        self.total_compute_time = 0.0
+        self.flops_performed = 0
+        # Initialize caches
+        self.activation_cache: Dict[str, str] = {}  # Cache activation outputs
+        self.weight_cache: Dict[str, Any] = {}  # Cache preprocessed weights
     def pre_allocate_vram(self, size_bytes: int) -> bool:
         """Pre-allocate VRAM for model loading"""
         if not self.vram:
     def has_model(self, model_id: str) -> bool:
         """Check if a model is loaded"""
+        if not model_id:
             return False
+        return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
+    def load_model(self, model_id: str, model: Dict[str, Any], processor: Any = None) -> bool:
+        """Load a model into the accelerator"""
+        try:
+            if not self.storage:
+                raise RuntimeError("No storage available")
+            # Store model in local storage
+            if not self.storage.load_model(model_id, model_data=model):
+                raise RuntimeError("Failed to store model in local storage")
+            # Update model registry and resource monitor
+            self.model_registry[model_id] = {
+                'config': model,
+                'loaded_at': time.time(),
+                'processor': processor
+            }
+            # Update resource monitoring
+            self.resource_monitor['loaded_models'].add(model_id)
+            # Update storage monitoring if available
+            if hasattr(self.storage, 'resource_monitor'):
+                self.storage.resource_monitor['loaded_models'].add(model_id)
+            return True
+        except Exception as e:
+            print(f"Error loading model {model_id}: {str(e)}")
+            return False
         # Model registries
         self.model_registry: Dict[str, Any] = {}
             return None
     def has_model(self, model_id: str) -> bool:
+        """Check if model is loaded"""
+        if not model_id:
+            return False
+        return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
     def load_model(self, model_id: str, model=None, processor=None) -> bool:
+        """Load model into local storage and register it with the accelerator"""
         try:
+            if not self.storage:
+                raise RuntimeError("No storage available")
             # Prepare model data for storage
+            model_data = model
+            if isinstance(model, dict):
+                model_data = model  # Use as is if it's already a dict
+            elif model is not None:
+                # Serialize model object
                 model_data = {
                     "model_type": type(model).__name__,
                     "config": self._serialize_model_config(getattr(model, 'config', None)),
                     "loaded_at": time.time()
                 }
+            # Store in local storage
             success = self.storage.load_model(model_id, model_data=model_data)
             if success:
+                # Update local registry
                 self.model_registry[model_id] = {
                     "model_data": model_data,
                     "processor": processor,
                     "loaded_at": time.time()
                 }
+                # Update monitoring
                 self.resource_monitor['loaded_models'].add(model_id)
+                # Update storage monitoring if supported
+                if hasattr(self.storage, 'resource_monitor'):
+                    self.storage.resource_monitor['loaded_models'].add(model_id)
                 return True
             return False

test_ai_integration_http.py CHANGED Viewed

@@ -167,17 +167,23 @@ def test_ai_integration_http():
                 # Load model with local storage
                 success = ai_accelerator_for_loading.load_model(
                     model_id=model_id,
-                    model=model_data,
-                    processor=None,
-                    verify_load=True
                 )
             except Exception as e:
                 print(f"Exception during model loading: {str(e)}")
                 success = False
             if success:
-                print(f"Model '{model_id}' loaded successfully to HTTP storage.")
-                assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
                 # Store model parameters in components dict
                 components['model_id'] = model_id

                 # Load model with local storage
                 success = ai_accelerator_for_loading.load_model(
                     model_id=model_id,
+                    model=model_data
                 )
+                if success:
+                    print(f"Model '{model_id}' loaded successfully to local storage")
+                    # Verify model is loaded in both accelerator and storage
+                    if not ai_accelerator_for_loading.has_model(model_id):
+                        raise RuntimeError(f"Model {model_id} not found in accelerator registry after loading")
+                else:
+                    raise RuntimeError("Failed to load model in local storage")
             except Exception as e:
                 print(f"Exception during model loading: {str(e)}")
                 success = False
             if success:
+                print(f"Model '{model_id}' successfully loaded and verified")
                 # Store model parameters in components dict
                 components['model_id'] = model_id