Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 14

Commit

02ddb96

verified ·

1 Parent(s): 6195e16

Upload 2 files

Browse files

Files changed (2) hide show

http_storage.py +335 -516
test_ai_integration_http.py +71 -117

http_storage.py CHANGED Viewed

@@ -1,516 +1,335 @@
-"""
-Test AI integration with local storage and zero CPU memory usage.
-All operations are performed through local storage with direct tensor core access.
-"""
-import asyncio
-from gpu_arch import Chip
-from ai_http import AIAccelerator
-from virtual_vram import VirtualVRAM
-from PIL import Image
-import numpy as np
-from http_storage import HTTPGPUStorage as LocalGPUStorage
-import time
-import os
-import platform
-import contextlib
-import atexit
-import logging
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-# Local storage manager
-@contextlib.contextmanager
-def storage_manager():
-    storage = None
-    try:
-        # Create new storage instance with local path
-        storage = LocalGPUStorage(storage_path="local_storage")
-        # Verify storage is accessible
-        if storage.ping():
-            logging.info("Successfully initialized local storage")
-        else:
-            raise RuntimeError("Local storage is not accessible")
-        yield storage
-    except Exception as e:
-        logging.error(f"Storage initialization error: {e}")
-        raise
-    try:
-        yield storage
-    except Exception as e:
-        logging.error(f"HTTP operation failed: {e}")
-        # Try to reconnect once if operation fails
-        if try_connect():
-            logging.info("Successfully reconnected to GPU storage server via HTTP")
-            yield storage
-        else:
-            raise
-    finally:
-        if storage:
-            try:
-                storage.close()
-            except:
-                pass
-# Cleanup handler
-def cleanup_resources():
-    try:
-        # Get the current storage instance if it exists
-        current_storage = LocalGPUStorage._instance
-        if current_storage is not None:
-            try:
-                # Clear any cached data
-                current_storage.resource_monitor['vram_used'] = 0
-                current_storage.resource_monitor['active_tensors'] = 0
-                current_storage.resource_monitor['loaded_models'].clear()
-            except Exception as e:
-                logging.error(f"Error cleaning up storage resources: {e}")
-    except Exception as e:
-        logging.error(f"Error in storage cleanup: {e}")
-    # Clear VRAM and other resources
-    import gc
-    gc.collect()
-# Register enhanced cleanup handler
-atexit.register(cleanup_resources)
-def test_ai_integration():
-    print("\n--- Testing Local Storage-Based AI Integration with Zero CPU Usage ---")
-    from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
-    # Initialize components dictionary to store GPU resources
-    components = {
-        'chips': [],
-        'ai_accelerators': [],
-        'model_id': None,
-        'vram': None,
-        'storage': None,
-        'model_config': None,
-        'tensor_registry': {},
-        'initialized': False
-    }
-    # Initialize global tensor registry
-    global_tensor_registry = {
-        'model_tensors': {},
-        'runtime_tensors': {},
-        'placeholder_tensors': {},
-        'stats': {
-            'total_vram_used': 0,
-            'active_tensors': 0
-        }
-    }
-    print(f"\nElectron-Speed Architecture Parameters:")
-    print(f"Target switches/sec: {TARGET_SWITCHES_PER_SEC:.2e}")
-    print(f"Transistors on chip: {TRANSISTORS_ON_CHIP:,}")
-    print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
-    print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
-    # Test 1: Local Model Loading
-    print("\nTest 1: Model Loading with Local Storage")
-    try:
-        # Use storage manager for proper resource handling
-        with storage_manager() as storage:
-            components['storage'] = storage  # Save storage reference
-            # Initialize virtual GPU stack with unlimited local storage
-            chip_for_loading = Chip(chip_id=0, vram_size_gb=None, storage=storage)  # Unlimited VRAM
-            components['chips'].append(chip_for_loading)
-            # Initialize VRAM with local storage (unlimited)
-            vram = VirtualVRAM(storage=storage)
-            components['vram'] = vram
-            # Set up AI accelerator
-            ai_accelerator_for_loading = AIAccelerator(vram=vram, storage=storage)
-            ai_accelerator_for_loading.initialize_tensor_cores()
-            components['ai_accelerators'].append(ai_accelerator_for_loading)
-            # Initialize model registry in local storage (unlimited)
-            storage.store_state("model_registry", "state", {
-                "initialized": True,
-                "max_vram": None,  # Unlimited VRAM
-                "active_models": {}
-            })
-        # Load BLIP-2 Large model directly to HTTP storage
-        model_id = "microsoft/florence-2-large"
-        print(f"Loading model {model_id} directly to HTTP storage...")
-        try:
-            # Simulate model loading (in real scenario, would load actual model)
-            model_data = {
-                "model_name": model_id,
-                "model_type": "florence-2-large",
-                "parameters": 771000000,
-                "architecture": "vision-language",
-                "loaded_at": time.time()
-            }
-            # Load model with local storage verification
-            try:
-                # Verify storage is accessible
-                if not ai_accelerator_for_loading.storage.ping():
-                    raise RuntimeError("Local storage not accessible")
-                # Calculate model size for proper VRAM allocation
-                model_size = model_data["parameters"] * 4  # 4 bytes per parameter (float32)
-                print(f"Model size: {model_size / (1024**3):.2f} GB")
-                # Pre-allocate VRAM for model
-                ai_accelerator_for_loading.pre_allocate_vram(model_size)
-                # Load model with local storage
-                success = ai_accelerator_for_loading.load_model(
-                    model_id=model_id,
-                    model=model_data,
-                    processor=None,
-                    verify_load=True
-                )
-            except Exception as e:
-                print(f"Exception during model loading: {str(e)}")
-                success = False
-            if success:
-                print(f"Model '{model_id}' loaded successfully to HTTP storage.")
-                assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
-                # Store model parameters in components dict
-                components['model_id'] = model_id
-                components['model_size'] = model_size
-                components['model_config'] = model_data
-            else:
-                raise RuntimeError("Failed to load model via HTTP storage")
-        except Exception as e:
-            print(f"Detailed model loading error: {str(e)}")
-            print("Falling back to placeholder model mode...")
-            # Try loading with placeholder model
-            try:
-                # Match server-side model configuration
-                placeholder_model = {
-                    "model_name": model_id,
-                    "model_type": "placeholder",
-                    "parameters": 1000000,  # Small placeholder
-                    "architecture": {
-                        "type": "nvidia_ampere",
-                        "features": ["tensor_cores", "ray_tracing", "dynamic_scheduling"]
-                    },
-                    "loaded_at": time.time(),
-                    # Server-validated GPU architecture configuration
-                    "num_sms": 108,  # A100 config
-                    "tensor_cores_per_sm": 4,
-                    "cuda_cores_per_sm": 64,
-                    "compute_capability": "8.0",
-                    "vram_config": {
-                        "size_gb": 40,
-                        "bandwidth_gbps": 1555,
-                        "cache_size_mb": 40,
-                        "allocation": "dynamic"
-                    }
-                }
-                # Validate required fields before loading
-                required_fields = ["num_sms", "tensor_cores_per_sm", "cuda_cores_per_sm"]
-                if not all(field in placeholder_model for field in required_fields):
-                    raise ValueError(f"Missing required GPU architecture fields: {[f for f in required_fields if f not in placeholder_model]}")
-                success = ai_accelerator_for_loading.load_model(
-                    model_id=model_id,
-                    model=placeholder_model,
-                    processor=None
-                )
-                if success:
-                    components['model_id'] = model_id
-                    components['model_config'] = placeholder_model
-                    print("Successfully loaded placeholder model via HTTP")
-                else:
-                    raise RuntimeError("Placeholder model loading also failed")
-            except Exception as e2:
-                print(f"Placeholder fallback also failed: {str(e2)}")
-                raise
-    except Exception as e:
-        print(f"Model loading test failed: {e}")
-        return
-    # Test 2: Multi-Chip Parallel Processing
-    print("\nTest 2: Parallel Processing across Multiple Chips")
-    num_chips = 4  # Using multiple chips for maximum parallelization
-    chips = []
-    ai_accelerators = []
-    try:
-        # Try to reuse existing connection with verification
-        shared_storage = None
-        max_connection_attempts = 3
-        for attempt in range(max_connection_attempts):
-            try:
-                if (components['storage'] and
-                    components['storage'].is_connected()):
-                    shared_storage = components['storage']
-                    logging.info("Successfully reused existing HTTP connection")
-                    break
-                else:
-                    logging.warning("Existing connection unavailable, creating new HTTP connection...")
-                    with http_storage_manager() as new_storage:
-                        if new_storage and new_storage.is_connected():
-                            components['storage'] = new_storage
-                            shared_storage = new_storage
-                            logging.info("Successfully established new HTTP connection")
-                            break
-            except Exception as e:
-                logging.error(f"HTTP connection attempt {attempt + 1} failed: {e}")
-                if attempt < max_connection_attempts - 1:
-                    time.sleep(2)
-                    continue
-                raise RuntimeError(f"Failed to establish HTTP connection after {max_connection_attempts} attempts")
-        # Initialize high-performance chip array with HTTP storage
-        total_sms = 0
-        total_cores = 0
-        # Create optical interconnect for chip communication
-        from gpu_arch import OpticalInterconnect
-        optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1)
-        # Reuse existing VRAM instance with shared storage
-        shared_vram = components['vram']
-        if shared_vram is None:
-            shared_vram = VirtualVRAM(storage=shared_storage)
-        shared_vram.storage = shared_storage
-        for i in range(num_chips):
-            # Configure each chip with shared HTTP storage
-            chip = Chip(chip_id=i, vram_size_gb=None, storage=shared_storage)
-            chips.append(chip)
-            # Connect chips in a ring topology
-            if i > 0:
-                chip.connect_chip(chips[i-1], optical_link)
-            # Initialize AI accelerator with shared resources
-            ai_accelerator = AIAccelerator(vram=shared_vram, storage=shared_storage)
-            ai_accelerators.append(ai_accelerator)
-            # Verify and potentially repair HTTP connection
-            max_retry = 3
-            for retry in range(max_retry):
-                try:
-                    if not shared_storage.is_connected():
-                        logging.warning(f"Connection check failed for chip {i}, attempt {retry + 1}")
-                        shared_storage._create_session()  # Attempt to reconnect
-                        time.sleep(1)
-                        continue
-                    # Load model weights from HTTP storage (no CPU transfer)
-                    success = ai_accelerator.load_model(components['model_id'], components['model_config'], None)
-                    if success:
-                        logging.info(f"Successfully initialized chip {i} with model via HTTP")
-                        break
-                    else:
-                        raise RuntimeError("Model loading failed")
-                except Exception as e:
-                    if retry < max_retry - 1:
-                        logging.warning(f"Error initializing chip {i}, attempt {retry + 1}: {e}")
-                        time.sleep(1)
-                        continue
-                    else:
-                        logging.error(f"Failed to initialize chip {i} after {max_retry} attempts: {e}")
-                        raise
-            # Track total processing units
-            total_sms += chip.num_sms
-            total_cores += chip.num_sms * chip.cores_per_sm
-            # Store chip configuration in HTTP storage
-            shared_storage.store_state(f"chips/{i}/config", "state", {
-                "num_sms": chip.num_sms,
-                "cores_per_sm": chip.cores_per_sm,
-                "total_cores": chip.num_sms * chip.cores_per_sm,
-                "connected_chips": [c.chip_id for c in chip.connected_chips]
-            })
-            print(f"Chip {i} initialized with HTTP storage and optical interconnect")
-        print(f"\nTotal Processing Units:")
-        print(f"- Streaming Multiprocessors: {total_sms:,}")
-        print(f"- CUDA Cores: {total_cores:,}")
-        print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
-        # Test multi-chip parallel inference with local storage
-        print(f"\nRunning parallel inference simulation")
-        # Create test input data
-        test_image = np.random.rand(224, 224, 3).astype(np.float32)
-        print(f"Created test image with shape: {test_image.shape}")
-        # Store input image in local storage
-        input_tensor_id = "test_input_image"
-        if shared_storage.store_tensor(input_tensor_id, test_image):
-            print(f"Successfully stored test image in local storage")
-        else:
-            raise RuntimeError("Failed to store test image")
-        # Synchronize all chips through HTTP storage
-        start_time = time.time()
-        # Distribute workload across chips using HTTP storage
-        batch_size = test_image.shape[0] // num_chips if test_image.shape[0] >= num_chips else 1
-        results = []
-        for i, accelerator in enumerate(ai_accelerators):
-            try:
-                # Run inference using locally stored weights
-                result = accelerator.inference(components['model_id'], input_tensor_id)
-                if result is not None:
-                    # Store result in local storage
-                    result_id = f"results/chip_{i}/test_image"
-                    if shared_storage.store_tensor(result_id, result):
-                        results.append(result)
-                        print(f"Chip {i} completed inference and stored result")
-                    else:
-                        print(f"Chip {i} inference succeeded but result storage failed")
-                else:
-                    print(f"Chip {i} inference failed")
-            except Exception as e:
-                print(f"Error in chip {i} inference: {e}")
-        elapsed = time.time() - start_time
-        # Calculate performance metrics
-        ops_per_inference = total_cores * 1024  # FMA ops per core
-        from electron_speed import drift_velocity, TARGET_SWITCHES_PER_SEC
-        electron_transit_time = 1 / (drift_velocity * TARGET_SWITCHES_PER_SEC)
-        theoretical_time = electron_transit_time * ops_per_inference / total_cores
-        print(f"\nHTTP-Based Multi-Chip Inference Results:")
-        print(f"- Chips used: {num_chips}")
-        print(f"- Results collected: {len(results)}")
-        print(f"- Total time: {elapsed:.4f}s")
-        print(f"- Theoretical electron-speed time: {theoretical_time:.6f}s")
-        print(f"- Speed ratio: {theoretical_time/elapsed:.2f}x theoretical")
-        print(f"- Operations per second: {ops_per_inference/elapsed:.2e}")
-        # Test 3: HTTP Storage Performance
-        print(f"\nTest 3: HTTP Storage Performance Evaluation")
-        # Test tensor storage/retrieval performance
-        test_sizes = [1024, 4096, 16384, 65536]  # Different tensor sizes
-        storage_times = []
-        retrieval_times = []
-        for size in test_sizes:
-            test_tensor = np.random.rand(size).astype(np.float32)
-            tensor_id = f"perf_test_{size}"
-            # Test storage time
-            start = time.time()
-            success = shared_storage.store_tensor(tensor_id, test_tensor)
-            storage_time = time.time() - start
-            if success:
-                storage_times.append(storage_time)
-                # Test retrieval time
-                start = time.time()
-                retrieved = shared_storage.load_tensor(tensor_id)
-                retrieval_time = time.time() - start
-                if retrieved is not None and np.array_equal(test_tensor, retrieved):
-                    retrieval_times.append(retrieval_time)
-                    print(f"Size {size}: Store {storage_time:.4f}s, Retrieve {retrieval_time:.4f}s")
-                else:
-                    print(f"Size {size}: Retrieval verification failed")
-            else:
-                print(f"Size {size}: Storage failed")
-        if storage_times and retrieval_times:
-            avg_storage = sum(storage_times) / len(storage_times)
-            avg_retrieval = sum(retrieval_times) / len(retrieval_times)
-            print(f"Average storage time: {avg_storage:.4f}s")
-            print(f"Average retrieval time: {avg_retrieval:.4f}s")
-        # Test 4: Multi-chip coordination via HTTP
-        print(f"\nTest 4: Multi-Chip Coordination via HTTP")
-        # Test cross-chip data transfer
-        test_data_id = "cross_chip_test_data"
-        test_data = np.array([1, 2, 3, 4, 5], dtype=np.float32)
-        if shared_storage.store_tensor(test_data_id, test_data):
-            print("Stored test data for cross-chip transfer")
-            # Transfer data between chips
-            new_data_id = shared_storage.transfer_between_chips(0, 1, test_data_id)
-            if new_data_id:
-                print(f"Successfully transferred data from chip 0 to chip 1: {new_data_id}")
-                # Verify transferred data
-                transferred_data = shared_storage.load_tensor(new_data_id)
-                if transferred_data is not None and np.array_equal(test_data, transferred_data):
-                    print("Cross-chip transfer verification successful")
-                else:
-                    print("Cross-chip transfer verification failed")
-            else:
-                print("Cross-chip transfer failed")
-        # Test synchronization barriers
-        barrier_id = "test_barrier"
-        num_participants = num_chips
-        if shared_storage.create_sync_barrier(barrier_id, num_participants):
-            print(f"Created synchronization barrier for {num_participants} participants")
-            # Simulate participants arriving at barrier
-            for i in range(num_participants):
-                result = shared_storage.wait_sync_barrier(barrier_id)
-                if i == num_participants - 1:
-                    if result:
-                        print("All participants reached barrier - synchronization successful")
-                    else:
-                        print("Barrier synchronization failed")
-                else:
-                    print(f"Participant {i+1} reached barrier")
-        print(f"\nHTTP-based AI integration test completed successfully!")
-        # Final statistics
-        final_stats = {
-            "chips_initialized": len(chips),
-            "ai_accelerators": len(ai_accelerators),
-            "total_cores": total_cores,
-            "model_loaded": components['model_id'] is not None,
-            "storage_type": "HTTP",
-            "connection_status": shared_storage.get_connection_status()
-        }
-        print(f"\nFinal System Statistics:")
-        for key, value in final_stats.items():
-            print(f"- {key}: {value}")
-    except Exception as e:
-        print(f"Multi-chip processing test failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return
-if __name__ == "__main__":
-    test_ai_integration_http()

+import json
+import numpy as np
+from typing import Dict, Any, Optional, Union
+import threading
+import time
+import hashlib
+import logging
+import os
+import shutil
+import uuid
+from pathlib import Path
+class LocalStorage:
+    """
+    Local storage implementation for GPU memory management.
+    Maintains backward compatibility with previous storage interfaces.
+    """
+    # Singleton instance
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls, storage_path: str = "storage"):
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super().__new__(cls)
+                # Convert to absolute path if relative
+                if not os.path.isabs(storage_path):
+                    storage_path = os.path.abspath(storage_path)
+                cls._instance._init_singleton(storage_path)
+            return cls._instance
+    def _init_singleton(self, storage_path: str):
+        """Initialize the singleton instance with local storage"""
+        if hasattr(self, 'initialized'):
+            return
+        # Setup storage paths
+        self.base_path = Path(storage_path)
+        self.vram_path = self.base_path / "vram_blocks"
+        self.models_path = self.base_path / "models"
+        self.cache_path = self.base_path / "cache"
+        self.state_path = self.base_path / "states"
+        # Create directories
+        for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
+            path.mkdir(parents=True, exist_ok=True)
+        self.lock = threading.Lock()
+        self._closing = False
+        self.error_count = 0
+        self.last_error_time = 0
+        self.session_id = str(uuid.uuid4())
+        # Tensor and model registries (maintained for compatibility)
+        self.tensor_registry: Dict[str, Dict[str, Any]] = {}
+        self.model_registry: Dict[str, Dict[str, Any]] = {}
+        self.resource_monitor = {
+            'vram_used': 0,
+            'active_tensors': 0,
+            'loaded_models': set()
+        }
+        # Initialize local storage monitoring
+        self.storage_monitor = {
+            'total_size': 0,
+            'last_access': time.time(),
+            'disk_usage': os.path.getsize(str(self.base_path)) if os.path.exists(str(self.base_path)) else 0
+        }
+        # Initialize session
+        self._create_session()
+        self.initialized = True
+    def __init__(self, storage_path: str = "storage"):
+        """This will actually just return the singleton instance.
+        The actual initialization happens in __new__ and _init_singleton"""
+        pass
+    def _create_session(self):
+        """Initialize local storage session"""
+        try:
+            # Create status file to track session
+            status_path = self.base_path / "session_status.json"
+            status_data = {
+                "session_id": self.session_id,
+                "created_at": time.time(),
+                "resource_limits": {
+                    "max_vram_gb": 40,  # A100 size
+                    "max_models": 5,
+                    "max_batch_size": 32
+                }
+            }
+            with open(status_path, 'w') as f:
+                json.dump(status_data, f, indent=2)
+            logging.info(f"Local storage session created: {self.session_id}")
+            return True
+        except Exception as e:
+            logging.error(f"Failed to create HTTP session: {e}")
+            self.error_count += 1
+            self.last_error_time = time.time()
+            return False
+    def _check_storage(self) -> Dict[str, Any]:
+        """Check local storage status and usage"""
+        try:
+            # Update storage monitoring
+            self.storage_monitor.update({
+                'total_size': sum(f.stat().st_size for f in self.base_path.rglob('*') if f.is_file()),
+                'last_access': time.time(),
+                'disk_usage': os.path.getsize(str(self.base_path)) if os.path.exists(str(self.base_path)) else 0
+            })
+            return {"status": "ok", "monitor": self.storage_monitor}
+        except Exception as e:
+            logging.error(f"Error checking storage: {e}")
+            return {"status": "error", "message": str(e)}
+    def store_tensor(self, tensor_id: str, data: np.ndarray, model_size: Optional[int] = None) -> bool:
+        """Store tensor data in local storage"""
+        try:
+            if data is None:
+                raise ValueError("Cannot store None tensor")
+            # Calculate tensor metadata
+            tensor_shape = data.shape
+            tensor_dtype = str(data.dtype)
+            tensor_size = data.nbytes
+            # Save tensor data
+            tensor_path = self.vram_path / f"{tensor_id}.npy"
+            np.save(str(tensor_path), data)
+            # Save metadata
+            metadata = {
+                'shape': tensor_shape,
+                'dtype': tensor_dtype,
+                'size': tensor_size,
+                'timestamp': time.time(),
+                'model_size': model_size if model_size is not None else -1
+            }
+            metadata_path = self.vram_path / f"{tensor_id}_meta.json"
+            with open(metadata_path, 'w') as f:
+                json.dump(metadata, f)
+            # Update tensor registry
+            with self.lock:
+                self.tensor_registry[tensor_id] = metadata
+                self.resource_monitor['vram_used'] += tensor_size
+                self.resource_monitor['active_tensors'] += 1
+            return True
+        except Exception as e:
+            logging.error(f"Error storing tensor {tensor_id}: {str(e)}")
+            return False
+    def load_tensor(self, tensor_id: str) -> Optional[np.ndarray]:
+        """Load tensor data from local storage"""
+        try:
+            tensor_path = self.vram_path / f"{tensor_id}.npy"
+            metadata_path = self.vram_path / f"{tensor_id}_meta.json"
+            # Check if tensor files exist
+            if not tensor_path.exists() or not metadata_path.exists():
+                logging.warning(f"Tensor {tensor_id} not found in local storage")
+                return None
+            # Load metadata
+            with open(metadata_path, 'r') as f:
+                metadata = json.load(f)
+            # Load tensor data
+            arr = np.load(str(tensor_path))
+            # Update registry if not present
+            if tensor_id not in self.tensor_registry:
+                with self.lock:
+                    self.tensor_registry[tensor_id] = metadata
+            return arr
+        except Exception as e:
+            logging.error(f"Error loading tensor {tensor_id}: {str(e)}")
+            return None
+    def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
+        """Store component state in local storage"""
+        try:
+            # Create component directory if needed
+            component_dir = self.state_path / component
+            component_dir.mkdir(parents=True, exist_ok=True)
+            # Save state data with timestamp
+            state_file = component_dir / f"{state_id}.json"
+            data_to_save = {
+                "data": state_data,
+                "timestamp": time.time()
+            }
+            with open(state_file, 'w') as f:
+                json.dump(data_to_save, f, indent=2)
+            return True
+        except Exception as e:
+            logging.error(f"Error storing state for {component}/{state_id}: {str(e)}")
+            return False
+    def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
+        """Load component state from local storage"""
+        try:
+            state_file = self.state_path / component / f"{state_id}.json"
+            if not state_file.exists():
+                logging.warning(f"State file not found for {component}/{state_id}")
+                return None
+            with open(state_file, 'r') as f:
+                saved_data = json.load(f)
+            return saved_data.get('data')
+        except Exception as e:
+            logging.error(f"Error loading state for {component}/{state_id}: {str(e)}")
+            return None
+    def cache_data(self, key: str, data: Any) -> bool:
+        """Cache data via HTTP API"""
+        try:
+            request_data = {"data": data}
+            response = self._make_request(
+                'POST',
+                f'/cache/{key}',
+                json=request_data
+            )
+            return response and response.get('status') == 'success'
+        except Exception as e:
+            logging.error(f"Error caching data for key {key}: {str(e)}")
+            return False
+    def get_cached_data(self, key: str) -> Optional[Any]:
+        """Get cached data via HTTP API"""
+        try:
+            response = self._make_request("GET", f"/cache/{key}")
+            if response and response.get('status') == 'success':
+                return response.get('data')
+            return None
+        except Exception as e:
+            logging.error(f"Error getting cached data for key {key}: {str(e)}")
+            return None
+    def load_model(self, model_name: str, model_path: Optional[str] = None, model_data: Optional[Dict] = None) -> bool:
+        """Load a model from local storage"""
+        try:
+            # Check if model is already loaded
+            if self.is_model_loaded(model_name):
+                logging.info(f"Model {model_name} already loaded")
+                return True
+            # Generate model directory path
+            model_dir = self.models_path / model_name.replace('/', '_')
+            model_dir.mkdir(parents=True, exist_ok=True)
+            # Clean up any existing files
+            for existing_file in model_dir.glob('*'):
+                try:
+                    if existing_file.is_file():
+                        existing_file.unlink()
+                except Exception as e:
+                    logging.warning(f"Could not remove existing file {existing_file}: {e}")
+            # Save model data if provided
+            if model_data:
+                model_config_path = model_dir / "config.json"
+                with open(model_config_path, 'w') as f:
+                    json.dump(model_data, f, indent=2)
+                # Update model registry
+                with self.lock:
+                    self.model_registry[model_name] = {
+                        'path': str(model_dir),
+                        'config': model_data,
+                        'loaded_at': time.time(),
+                        'hash': self._calculate_model_hash(model_path) if model_path else None
+                    }
+                    self.resource_monitor['loaded_models'].add(model_name)
+            # Copy model files if path provided
+            if model_path and os.path.exists(model_path):
+                model_file_path = model_dir / "model.bin"
+                shutil.copy2(model_path, model_file_path)
+            logging.info(f"Successfully loaded model {model_name} to local storage")
+            return True
+        except Exception as e:
+            logging.error(f"Error loading model {model_name}: {str(e)}")
+            return False
+    def _calculate_model_hash(self, model_path: str) -> str:
+        """Calculate SHA256 hash of model file"""
+        try:
+            sha256_hash = hashlib.sha256()
+            with open(model_path, "rb") as f:
+                for byte_block in iter(lambda: f.read(4096), b""):
+                    sha256_hash.update(byte_block)
+            return sha256_hash.hexdigest()
+        except Exception as e:
+            logging.error(f"Error calculating model hash: {str(e)}")
+            return ""
+    def ping(self) -> bool:
+        """Check if local storage is accessible"""
+        try:
+            # Check if all storage directories exist and are accessible
+            for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
+                if not path.exists() or not os.access(str(path), os.R_OK | os.W_OK):
+                    return False
+            return True
+        except Exception as e:
+            logging.error(f"Storage check failed: {e}")
+            return False
+# Compatibility aliases for existing code
+HTTPGPUStorage = LocalStorage
+WebSocketGPUStorage = LocalStorage

test_ai_integration_http.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Test AI integration with HTTP-based storage and zero CPU memory usage.
-All operations are performed through HTTP storage with direct tensor core access.
 """
 import asyncio
 from gpu_arch import Chip
@@ -8,7 +8,7 @@ from ai_http import AIAccelerator
 from virtual_vram import VirtualVRAM
 from PIL import Image
 import numpy as np
-from http_storage import HTTPGPUStorage
 import time
 import os
 import platform
@@ -22,60 +22,26 @@ logging.basicConfig(
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-# HTTP connection manager with retry handling
 @contextlib.contextmanager
-def http_storage_manager(max_retries=5, retry_delay=2, timeout=30.0):
     storage = None
-    last_error = None
-    def try_connect():
-        nonlocal storage
-        try:
-            if storage:
-                if storage.is_connected():
-                    # Verify session is active
-                    if storage.session_token is not None:
-                        return True
-                storage.close()
-            # Create new storage instance
-            storage = HTTPGPUStorage()
-            # Initialize session
-            if storage._create_session():
-                # Verify session was created
-                if storage.session_token is not None and not storage._closing:
-                    return True
-            return False
-        except Exception as e:
-            logging.error(f"Connection error: {e}")
-            return False
-    # Initial connection with improved error handling
-    for attempt in range(max_retries):
-        try:
-            if try_connect():
-                logging.info("Successfully connected to GPU storage server via HTTP")
-                # Verify the connection is active
-                if storage.is_connected():
-                    # Test the connection with a basic operation
-                    test_key = "_connection_test"
-                    if storage.cache_data(test_key, {"test": True}):
-                        break
-                logging.warning("Connection established but not responsive")
-            else:
-                logging.warning(f"HTTP connection attempt {attempt + 1} failed, retrying in {retry_delay}s...")
-                time.sleep(retry_delay * (1.5 ** attempt))  # Exponential backoff
-        except Exception as e:
-            last_error = str(e)
-            logging.error(f"HTTP connection attempt {attempt + 1} failed with error: {e}")
-            time.sleep(retry_delay * (1.5 ** attempt))
-        if attempt == max_retries - 1:
-            error_msg = f"Could not connect to GPU storage server via HTTP after {max_retries} attempts"
-            if last_error:
-                error_msg += f". Last error: {last_error}"
-            raise RuntimeError(error_msg)
     try:
         yield storage
@@ -94,21 +60,19 @@ def http_storage_manager(max_retries=5, retry_delay=2, timeout=30.0):
             except:
                 pass
-# Enhanced cleanup handler with connection management
 def cleanup_resources():
     try:
         # Get the current storage instance if it exists
-        from http_storage import HTTPGPUStorage
-        current_storage = HTTPGPUStorage.get_current_instance()
         if current_storage is not None:
             try:
-                # Ensure all pending operations are completed
-                if hasattr(current_storage, 'sync'):
-                    current_storage.sync()
-                # Close the connection
-                current_storage.close()
             except Exception as e:
-                logging.error(f"Error closing HTTP storage: {e}")
     except Exception as e:
         logging.error(f"Error in storage cleanup: {e}")
@@ -119,8 +83,8 @@ def cleanup_resources():
 # Register enhanced cleanup handler
 atexit.register(cleanup_resources)
-def test_ai_integration_http():
-    print("\n--- Testing HTTP-Based AI Integration with Zero CPU Usage ---")
     from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
     # Initialize components dictionary to store GPU resources
@@ -152,30 +116,30 @@ def test_ai_integration_http():
     print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
     print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
-    # Test 1: HTTP-Based Model Loading
-    print("\nTest 1: Model Loading with HTTP Storage")
     try:
-        # Use HTTP connection manager for proper resource handling
-        with http_storage_manager() as storage:
             components['storage'] = storage  # Save storage reference
-            # Initialize virtual GPU stack with unlimited HTTP storage and shared connection
-            chip_for_loading = Chip(chip_id=0, vram_size_gb=None, storage=storage)  # Pass shared storage
             components['chips'].append(chip_for_loading)
-            # Initialize VRAM with shared HTTP storage
-            vram = VirtualVRAM(storage=storage)  # Pass shared storage instance
             components['vram'] = vram
-            # Set up AI accelerator with HTTP storage
             ai_accelerator_for_loading = AIAccelerator(vram=vram, storage=storage)
-            ai_accelerator_for_loading.initialize_tensor_cores()  # Ensure tensor cores are ready
             components['ai_accelerators'].append(ai_accelerator_for_loading)
-            # Initialize model registry in HTTP storage
             storage.store_state("model_registry", "state", {
                 "initialized": True,
-                "max_vram": None,  # Unlimited
                 "active_models": {}
             })
@@ -193,40 +157,30 @@ def test_ai_integration_http():
                 "loaded_at": time.time()
             }
-            # Enhanced connection verification and model loading
-            max_load_retries = 3
-            for load_attempt in range(max_load_retries):
-                try:
-                    # Verify HTTP connection with ping
-                    if not ai_accelerator_for_loading.storage.ping():
-                        raise RuntimeError("HTTP connection unresponsive")
-                    # Calculate model size for proper VRAM allocation
-                    model_size = model_data["parameters"] * 4  # 4 bytes per parameter (float32)
-                    print(f"Model size: {model_size / (1024**3):.2f} GB")
-                    # Pre-allocate VRAM for model
-                    ai_accelerator_for_loading.pre_allocate_vram(model_size)
-                    # Load model with HTTP transfer mode
-                    success = ai_accelerator_for_loading.load_model(
-                        model_id=model_id,
-                        model=model_data,
-                        processor=None,
-                        transfer_mode="http",
-                        verify_load=True
-                    )
-                    if success:
-                        break
-                except Exception as load_err:
-                    logging.error(f"Load attempt {load_attempt + 1} failed: {str(load_err)}")
-                    if load_attempt < max_load_retries - 1:
-                        time.sleep(2 ** load_attempt)  # Exponential backoff
-                        continue
-                    raise
             if success:
                 print(f"Model '{model_id}' loaded successfully to HTTP storage.")
                 assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
@@ -292,8 +246,8 @@ def test_ai_integration_http():
         print(f"Model loading test failed: {e}")
         return
-    # Test 2: HTTP-Based Multi-Chip Processing
-    print("\nTest 2: HTTP-Based Parallel Processing across Multiple Chips")
     num_chips = 4  # Using multiple chips for maximum parallelization
     chips = []
     ai_accelerators = []
@@ -398,17 +352,17 @@ def test_ai_integration_http():
         print(f"- CUDA Cores: {total_cores:,}")
         print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
-        # Test multi-chip parallel inference with HTTP storage
-        print(f"\nRunning HTTP-based inference simulation")
         # Create test input data
         test_image = np.random.rand(224, 224, 3).astype(np.float32)
         print(f"Created test image with shape: {test_image.shape}")
-        # Store input image in HTTP storage
         input_tensor_id = "test_input_image"
         if shared_storage.store_tensor(input_tensor_id, test_image):
-            print(f"Successfully stored test image in HTTP storage")
         else:
             raise RuntimeError("Failed to store test image")
@@ -421,11 +375,11 @@ def test_ai_integration_http():
         for i, accelerator in enumerate(ai_accelerators):
             try:
-                # Run inference using HTTP-stored weights
                 result = accelerator.inference(components['model_id'], input_tensor_id)
                 if result is not None:
-                    # Store result in HTTP storage
                     result_id = f"results/chip_{i}/test_image"
                     if shared_storage.store_tensor(result_id, result):
                         results.append(result)

 """
+Test AI integration with local storage and zero CPU memory usage.
+All operations are performed through local storage with direct tensor core access.
 """
 import asyncio
 from gpu_arch import Chip
 from virtual_vram import VirtualVRAM
 from PIL import Image
 import numpy as np
+from http_storage import LocalStorage
 import time
 import os
 import platform
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+# Local storage manager
 @contextlib.contextmanager
+def storage_manager():
     storage = None
+    try:
+        # Create new storage instance with local path
+        storage = LocalStorage(storage_path="local_storage")
+        # Verify storage is accessible
+        if storage.ping():
+            logging.info("Successfully initialized local storage")
+        else:
+            raise RuntimeError("Local storage is not accessible")
+        yield storage
+    except Exception as e:
+        logging.error(f"Storage initialization error: {e}")
+        raise
     try:
         yield storage
             except:
                 pass
+# Cleanup handler
 def cleanup_resources():
     try:
         # Get the current storage instance if it exists
+        current_storage = LocalGPUStorage._instance
         if current_storage is not None:
             try:
+                # Clear any cached data
+                current_storage.resource_monitor['vram_used'] = 0
+                current_storage.resource_monitor['active_tensors'] = 0
+                current_storage.resource_monitor['loaded_models'].clear()
             except Exception as e:
+                logging.error(f"Error cleaning up storage resources: {e}")
     except Exception as e:
         logging.error(f"Error in storage cleanup: {e}")
 # Register enhanced cleanup handler
 atexit.register(cleanup_resources)
+def test_ai_integration():
+    print("\n--- Testing Local Storage-Based AI Integration with Zero CPU Usage ---")
     from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
     # Initialize components dictionary to store GPU resources
     print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
     print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
+    # Test 1: Local Model Loading
+    print("\nTest 1: Model Loading with Local Storage")
     try:
+        # Use storage manager for proper resource handling
+        with storage_manager() as storage:
             components['storage'] = storage  # Save storage reference
+            # Initialize virtual GPU stack with unlimited local storage
+            chip_for_loading = Chip(chip_id=0, vram_size_gb=None, storage=storage)  # Unlimited VRAM
             components['chips'].append(chip_for_loading)
+            # Initialize VRAM with local storage (unlimited)
+            vram = VirtualVRAM(storage=storage)
             components['vram'] = vram
+            # Set up AI accelerator
             ai_accelerator_for_loading = AIAccelerator(vram=vram, storage=storage)
+            ai_accelerator_for_loading.initialize_tensor_cores()
             components['ai_accelerators'].append(ai_accelerator_for_loading)
+            # Initialize model registry in local storage (unlimited)
             storage.store_state("model_registry", "state", {
                 "initialized": True,
+                "max_vram": None,  # Unlimited VRAM
                 "active_models": {}
             })
                 "loaded_at": time.time()
             }
+            # Load model with local storage verification
+            try:
+                # Verify storage is accessible
+                if not ai_accelerator_for_loading.storage.ping():
+                    raise RuntimeError("Local storage not accessible")
+                # Calculate model size for proper VRAM allocation
+                model_size = model_data["parameters"] * 4  # 4 bytes per parameter (float32)
+                print(f"Model size: {model_size / (1024**3):.2f} GB")
+                # Pre-allocate VRAM for model
+                ai_accelerator_for_loading.pre_allocate_vram(model_size)
+                # Load model with local storage
+                success = ai_accelerator_for_loading.load_model(
+                    model_id=model_id,
+                    model=model_data,
+                    processor=None,
+                    verify_load=True
+                )
+            except Exception as e:
+                print(f"Exception during model loading: {str(e)}")
+                success = False
             if success:
                 print(f"Model '{model_id}' loaded successfully to HTTP storage.")
                 assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
         print(f"Model loading test failed: {e}")
         return
+    # Test 2: Multi-Chip Parallel Processing
+    print("\nTest 2: Parallel Processing across Multiple Chips")
     num_chips = 4  # Using multiple chips for maximum parallelization
     chips = []
     ai_accelerators = []
         print(f"- CUDA Cores: {total_cores:,}")
         print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
+        # Test multi-chip parallel inference with local storage
+        print(f"\nRunning parallel inference simulation")
         # Create test input data
         test_image = np.random.rand(224, 224, 3).astype(np.float32)
         print(f"Created test image with shape: {test_image.shape}")
+        # Store input image in local storage
         input_tensor_id = "test_input_image"
         if shared_storage.store_tensor(input_tensor_id, test_image):
+            print(f"Successfully stored test image in local storage")
         else:
             raise RuntimeError("Failed to store test image")
         for i, accelerator in enumerate(ai_accelerators):
             try:
+                # Run inference using locally stored weights
                 result = accelerator.inference(components['model_id'], input_tensor_id)
                 if result is not None:
+                    # Store result in local storage
                     result_id = f"results/chip_{i}/test_image"
                     if shared_storage.store_tensor(result_id, result):
                         results.append(result)