Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

f5fb3d1

verified ·

1 Parent(s): 1b59e81

Upload 3 files

Browse files

Files changed (3) hide show

ai_http.py +12 -0
http_storage.py +18 -0
test_ai_integration_http.py +519 -510

ai_http.py CHANGED Viewed

@@ -48,6 +48,18 @@ class AIAccelerator:
             bandwidth_tbps=drift_velocity / 1e-12  # Bandwidth scaled to electron drift speed
         )
         self.tensor_cores_initialized = False
         # Initialize model, tensor, and tokenizer tracking
         self.model_registry: Dict[str, Dict[str, Any]] = {}  # Track loaded models

             bandwidth_tbps=drift_velocity / 1e-12  # Bandwidth scaled to electron drift speed
         )
         self.tensor_cores_initialized = False
+        self._vram_allocated = 0
+    def pre_allocate_vram(self, size_bytes: int) -> bool:
+        """Pre-allocate VRAM for model loading"""
+        if not self.vram:
+            return True  # No VRAM restrictions
+        if self._vram_allocated + size_bytes > self.vram.total_size:
+            return False
+        self._vram_allocated += size_bytes
+        return True
         # Initialize model, tensor, and tokenizer tracking
         self.model_registry: Dict[str, Dict[str, Any]] = {}  # Track loaded models

http_storage.py CHANGED Viewed

@@ -48,6 +48,24 @@ class LocalStorage:
         self.lock = threading.Lock()
         self._closing = False
         self._connected = True  # Local storage is always "connected"
     def wait_for_connection(self, timeout: float = 30.0) -> bool:

         self.lock = threading.Lock()
         self._closing = False
+        self.model_registry = {}
+        self._connected = True
+    def is_model_loaded(self, model_id: str) -> bool:
+        """Check if a model is loaded in local storage"""
+        if not model_id:
+            return False
+        # Check if model directory exists
+        model_dir = self.models_path / model_id.replace('/', '_')
+        if not model_dir.exists():
+            return False
+        # Check for model files
+        model_file = model_dir / "model.bin"
+        config_file = model_dir / "config.json"
+        return model_file.exists() and config_file.exists()
         self._connected = True  # Local storage is always "connected"
     def wait_for_connection(self, timeout: float = 30.0) -> bool:

test_ai_integration_http.py CHANGED Viewed

@@ -1,510 +1,519 @@
-"""
-Test AI integration with local storage and zero CPU memory usage.
-All operations are performed through local storage with direct tensor core access.
-"""
-import asyncio
-from gpu_arch import Chip
-from ai_http import AIAccelerator
-from virtual_vram import VirtualVRAM
-from PIL import Image
-import numpy as np
-from http_storage import LocalStorage
-import time
-import os
-import platform
-import contextlib
-import atexit
-import logging
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-# Local storage manager
-@contextlib.contextmanager
-def storage_manager():
-    storage = None
-    try:
-        # Create new storage instance with local path
-        storage = LocalStorage(storage_path="local_storage")
-        # Verify storage is accessible
-        if storage.ping():
-            logging.info("Successfully initialized local storage")
-        else:
-            raise RuntimeError("Local storage is not accessible")
-        yield storage
-    except Exception as e:
-        logging.error(f"Storage initialization error: {e}")
-        raise
-        if try_connect():
-            logging.info("Successfully reconnected to GPU storage server via HTTP")
-            yield storage
-        else:
-            raise
-    finally:
-        if storage:
-            try:
-                storage.close()
-            except:
-                pass
-# Cleanup handler
-def cleanup_resources():
-    try:
-        # Get the current storage instance if it exists
-        current_storage = LocalStorage._instance
-        if current_storage is not None:
-            try:
-                # Clear any cached data
-                current_storage.resource_monitor['vram_used'] = 0
-                current_storage.resource_monitor['active_tensors'] = 0
-                current_storage.resource_monitor['loaded_models'].clear()
-            except Exception as e:
-                logging.error(f"Error cleaning up storage resources: {e}")
-    except Exception as e:
-        logging.error(f"Error in storage cleanup: {e}")
-    # Clear VRAM and other resources
-    import gc
-    gc.collect()
-# Register enhanced cleanup handler
-atexit.register(cleanup_resources)
-def test_ai_integration_http():
-    print("\n--- Testing Local Storage-Based AI Integration with Zero CPU Usage ---")
-    from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
-    # Initialize components dictionary to store GPU resources
-    components = {
-        'chips': [],
-        'ai_accelerators': [],
-        'model_id': None,
-        'vram': None,
-        'storage': None,
-        'model_config': None,
-        'tensor_registry': {},
-        'initialized': False
-    }
-    # Initialize global tensor registry
-    global_tensor_registry = {
-        'model_tensors': {},
-        'runtime_tensors': {},
-        'placeholder_tensors': {},
-        'stats': {
-            'total_vram_used': 0,
-            'active_tensors': 0
-        }
-    }
-    print(f"\nElectron-Speed Architecture Parameters:")
-    print(f"Target switches/sec: {TARGET_SWITCHES_PER_SEC:.2e}")
-    print(f"Transistors on chip: {TRANSISTORS_ON_CHIP:,}")
-    print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
-    print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
-    # Test 1: Local Model Loading
-    print("\nTest 1: Model Loading with Local Storage")
-    try:
-        # Use storage manager for proper resource handling
-        with storage_manager() as storage:
-            components['storage'] = storage  # Save storage reference
-            # Initialize virtual GPU stack with unlimited local storage
-            chip_for_loading = Chip(chip_id=0, vram_size_gb=None, storage=storage)  # Unlimited VRAM
-            components['chips'].append(chip_for_loading)
-            # Initialize VRAM with local storage (unlimited)
-            vram = VirtualVRAM(storage=storage)
-            components['vram'] = vram
-            # Set up AI accelerator
-            ai_accelerator_for_loading = AIAccelerator(vram=vram, storage=storage)
-            ai_accelerator_for_loading.initialize_tensor_cores()
-            components['ai_accelerators'].append(ai_accelerator_for_loading)
-            # Initialize model registry in local storage (unlimited)
-            storage.store_state("model_registry", "state", {
-                "initialized": True,
-                "max_vram": None,  # Unlimited VRAM
-                "active_models": {}
-            })
-        # Load BLIP-2 Large model directly to HTTP storage
-        model_id = "microsoft/florence-2-large"
-        print(f"Loading model {model_id} directly to HTTP storage...")
-        try:
-            # Simulate model loading (in real scenario, would load actual model)
-            model_data = {
-                "model_name": model_id,
-                "model_type": "florence-2-large",
-                "parameters": 771000000,
-                "architecture": "vision-language",
-                "loaded_at": time.time()
-            }
-            # Load model with local storage verification
-            try:
-                # Verify storage is accessible
-                if not ai_accelerator_for_loading.storage.ping():
-                    raise RuntimeError("Local storage not accessible")
-                # Calculate model size for proper VRAM allocation
-                model_size = model_data["parameters"] * 4  # 4 bytes per parameter (float32)
-                print(f"Model size: {model_size / (1024**3):.2f} GB")
-                # Pre-allocate VRAM for model
-                ai_accelerator_for_loading.pre_allocate_vram(model_size)
-                # Load model with local storage
-                success = ai_accelerator_for_loading.load_model(
-                    model_id=model_id,
-                    model=model_data,
-                    processor=None,
-                    verify_load=True
-                )
-            except Exception as e:
-                print(f"Exception during model loading: {str(e)}")
-                success = False
-            if success:
-                print(f"Model '{model_id}' loaded successfully to HTTP storage.")
-                assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
-                # Store model parameters in components dict
-                components['model_id'] = model_id
-                components['model_size'] = model_size
-                components['model_config'] = model_data
-            else:
-                raise RuntimeError("Failed to load model via HTTP storage")
-        except Exception as e:
-            print(f"Detailed model loading error: {str(e)}")
-            print("Falling back to placeholder model mode...")
-            # Try loading with placeholder model
-            try:
-                # Match server-side model configuration
-                placeholder_model = {
-                    "model_name": model_id,
-                    "model_type": "placeholder",
-                    "parameters": 1000000,  # Small placeholder
-                    "architecture": {
-                        "type": "nvidia_ampere",
-                        "features": ["tensor_cores", "ray_tracing", "dynamic_scheduling"]
-                    },
-                    "loaded_at": time.time(),
-                    # Server-validated GPU architecture configuration
-                    "num_sms": 108,  # A100 config
-                    "tensor_cores_per_sm": 4,
-                    "cuda_cores_per_sm": 64,
-                    "compute_capability": "8.0",
-                    "vram_config": {
-                        "size_gb": 40,
-                        "bandwidth_gbps": 1555,
-                        "cache_size_mb": 40,
-                        "allocation": "dynamic"
-                    }
-                }
-                # Validate required fields before loading
-                required_fields = ["num_sms", "tensor_cores_per_sm", "cuda_cores_per_sm"]
-                if not all(field in placeholder_model for field in required_fields):
-                    raise ValueError(f"Missing required GPU architecture fields: {[f for f in required_fields if f not in placeholder_model]}")
-                success = ai_accelerator_for_loading.load_model(
-                    model_id=model_id,
-                    model=placeholder_model,
-                    processor=None
-                )
-                if success:
-                    components['model_id'] = model_id
-                    components['model_config'] = placeholder_model
-                    print("Successfully loaded placeholder model via HTTP")
-                else:
-                    raise RuntimeError("Placeholder model loading also failed")
-            except Exception as e2:
-                print(f"Placeholder fallback also failed: {str(e2)}")
-                raise
-    except Exception as e:
-        print(f"Model loading test failed: {e}")
-        return
-    # Test 2: Multi-Chip Parallel Processing
-    print("\nTest 2: Parallel Processing across Multiple Chips")
-    num_chips = 4  # Using multiple chips for maximum parallelization
-    chips = []
-    ai_accelerators = []
-    try:
-        # Try to reuse existing connection with verification
-        shared_storage = None
-        max_connection_attempts = 3
-        for attempt in range(max_connection_attempts):
-            try:
-                if (components['storage'] and
-                    components['storage'].is_connected()):
-                    shared_storage = components['storage']
-                    logging.info("Successfully reused existing HTTP connection")
-                    break
-                else:
-                    logging.warning("Existing connection unavailable, creating new HTTP connection...")
-                    with storage_manager() as new_storage:
-                        if new_storage and new_storage.is_connected():
-                            components['storage'] = new_storage
-                            shared_storage = new_storage
-                            logging.info("Successfully established new HTTP connection")
-                            break
-            except Exception as e:
-                logging.error(f"HTTP connection attempt {attempt + 1} failed: {e}")
-                if attempt < max_connection_attempts - 1:
-                    time.sleep(2)
-                    continue
-                raise RuntimeError(f"Failed to establish HTTP connection after {max_connection_attempts} attempts")
-        # Initialize high-performance chip array with HTTP storage
-        total_sms = 0
-        total_cores = 0
-        # Create optical interconnect for chip communication
-        from gpu_arch import OpticalInterconnect
-        optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1)
-        # Reuse existing VRAM instance with shared storage
-        shared_vram = components['vram']
-        if shared_vram is None:
-            shared_vram = VirtualVRAM(storage=shared_storage)
-        shared_vram.storage = shared_storage
-        for i in range(num_chips):
-            # Configure each chip with shared HTTP storage
-            chip = Chip(chip_id=i, vram_size_gb=None, storage=shared_storage)
-            chips.append(chip)
-            # Connect chips in a ring topology
-            if i > 0:
-                chip.connect_chip(chips[i-1], optical_link)
-            # Initialize AI accelerator with shared resources
-            ai_accelerator = AIAccelerator(vram=shared_vram, storage=shared_storage)
-            ai_accelerators.append(ai_accelerator)
-            # Verify and potentially repair HTTP connection
-            max_retry = 3
-            for retry in range(max_retry):
-                try:
-                    if not shared_storage.is_connected():
-                        logging.warning(f"Connection check failed for chip {i}, attempt {retry + 1}")
-                        shared_storage._create_session()  # Attempt to reconnect
-                        time.sleep(1)
-                        continue
-                    # Load model weights from HTTP storage (no CPU transfer)
-                    success = ai_accelerator.load_model(components['model_id'], components['model_config'], None)
-                    if success:
-                        logging.info(f"Successfully initialized chip {i} with model via HTTP")
-                        break
-                    else:
-                        raise RuntimeError("Model loading failed")
-                except Exception as e:
-                    if retry < max_retry - 1:
-                        logging.warning(f"Error initializing chip {i}, attempt {retry + 1}: {e}")
-                        time.sleep(1)
-                        continue
-                    else:
-                        logging.error(f"Failed to initialize chip {i} after {max_retry} attempts: {e}")
-                        raise
-            # Track total processing units
-            total_sms += chip.num_sms
-            total_cores += chip.num_sms * chip.cores_per_sm
-            # Store chip configuration in HTTP storage
-            shared_storage.store_state(f"chips/{i}/config", "state", {
-                "num_sms": chip.num_sms,
-                "cores_per_sm": chip.cores_per_sm,
-                "total_cores": chip.num_sms * chip.cores_per_sm,
-                "connected_chips": [c.chip_id for c in chip.connected_chips]
-            })
-            print(f"Chip {i} initialized with HTTP storage and optical interconnect")
-        print(f"\nTotal Processing Units:")
-        print(f"- Streaming Multiprocessors: {total_sms:,}")
-        print(f"- CUDA Cores: {total_cores:,}")
-        print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
-        # Test multi-chip parallel inference with local storage
-        print(f"\nRunning parallel inference simulation")
-        # Create test input data
-        test_image = np.random.rand(224, 224, 3).astype(np.float32)
-        print(f"Created test image with shape: {test_image.shape}")
-        # Store input image in local storage
-        input_tensor_id = "test_input_image"
-        if shared_storage.store_tensor(input_tensor_id, test_image):
-            print(f"Successfully stored test image in local storage")
-        else:
-            raise RuntimeError("Failed to store test image")
-        # Synchronize all chips through HTTP storage
-        start_time = time.time()
-        # Distribute workload across chips using HTTP storage
-        batch_size = test_image.shape[0] // num_chips if test_image.shape[0] >= num_chips else 1
-        results = []
-        for i, accelerator in enumerate(ai_accelerators):
-            try:
-                # Run inference using locally stored weights
-                result = accelerator.inference(components['model_id'], input_tensor_id)
-                if result is not None:
-                    # Store result in local storage
-                    result_id = f"results/chip_{i}/test_image"
-                    if shared_storage.store_tensor(result_id, result):
-                        results.append(result)
-                        print(f"Chip {i} completed inference and stored result")
-                    else:
-                        print(f"Chip {i} inference succeeded but result storage failed")
-                else:
-                    print(f"Chip {i} inference failed")
-            except Exception as e:
-                print(f"Error in chip {i} inference: {e}")
-        elapsed = time.time() - start_time
-        # Calculate performance metrics
-        ops_per_inference = total_cores * 1024  # FMA ops per core
-        from electron_speed import drift_velocity, TARGET_SWITCHES_PER_SEC
-        electron_transit_time = 1 / (drift_velocity * TARGET_SWITCHES_PER_SEC)
-        theoretical_time = electron_transit_time * ops_per_inference / total_cores
-        print(f"\nHTTP-Based Multi-Chip Inference Results:")
-        print(f"- Chips used: {num_chips}")
-        print(f"- Results collected: {len(results)}")
-        print(f"- Total time: {elapsed:.4f}s")
-        print(f"- Theoretical electron-speed time: {theoretical_time:.6f}s")
-        print(f"- Speed ratio: {theoretical_time/elapsed:.2f}x theoretical")
-        print(f"- Operations per second: {ops_per_inference/elapsed:.2e}")
-        # Test 3: HTTP Storage Performance
-        print(f"\nTest 3: HTTP Storage Performance Evaluation")
-        # Test tensor storage/retrieval performance
-        test_sizes = [1024, 4096, 16384, 65536]  # Different tensor sizes
-        storage_times = []
-        retrieval_times = []
-        for size in test_sizes:
-            test_tensor = np.random.rand(size).astype(np.float32)
-            tensor_id = f"perf_test_{size}"
-            # Test storage time
-            start = time.time()
-            success = shared_storage.store_tensor(tensor_id, test_tensor)
-            storage_time = time.time() - start
-            if success:
-                storage_times.append(storage_time)
-                # Test retrieval time
-                start = time.time()
-                retrieved = shared_storage.load_tensor(tensor_id)
-                retrieval_time = time.time() - start
-                if retrieved is not None and np.array_equal(test_tensor, retrieved):
-                    retrieval_times.append(retrieval_time)
-                    print(f"Size {size}: Store {storage_time:.4f}s, Retrieve {retrieval_time:.4f}s")
-                else:
-                    print(f"Size {size}: Retrieval verification failed")
-            else:
-                print(f"Size {size}: Storage failed")
-        if storage_times and retrieval_times:
-            avg_storage = sum(storage_times) / len(storage_times)
-            avg_retrieval = sum(retrieval_times) / len(retrieval_times)
-            print(f"Average storage time: {avg_storage:.4f}s")
-            print(f"Average retrieval time: {avg_retrieval:.4f}s")
-        # Test 4: Multi-chip coordination via HTTP
-        print(f"\nTest 4: Multi-Chip Coordination via HTTP")
-        # Test cross-chip data transfer
-        test_data_id = "cross_chip_test_data"
-        test_data = np.array([1, 2, 3, 4, 5], dtype=np.float32)
-        if shared_storage.store_tensor(test_data_id, test_data):
-            print("Stored test data for cross-chip transfer")
-            # Transfer data between chips
-            new_data_id = shared_storage.transfer_between_chips(0, 1, test_data_id)
-            if new_data_id:
-                print(f"Successfully transferred data from chip 0 to chip 1: {new_data_id}")
-                # Verify transferred data
-                transferred_data = shared_storage.load_tensor(new_data_id)
-                if transferred_data is not None and np.array_equal(test_data, transferred_data):
-                    print("Cross-chip transfer verification successful")
-                else:
-                    print("Cross-chip transfer verification failed")
-            else:
-                print("Cross-chip transfer failed")
-        # Test synchronization barriers
-        barrier_id = "test_barrier"
-        num_participants = num_chips
-        if shared_storage.create_sync_barrier(barrier_id, num_participants):
-            print(f"Created synchronization barrier for {num_participants} participants")
-            # Simulate participants arriving at barrier
-            for i in range(num_participants):
-                result = shared_storage.wait_sync_barrier(barrier_id)
-                if i == num_participants - 1:
-                    if result:
-                        print("All participants reached barrier - synchronization successful")
-                    else:
-                        print("Barrier synchronization failed")
-                else:
-                    print(f"Participant {i+1} reached barrier")
-        print(f"\nHTTP-based AI integration test completed successfully!")
-        # Final statistics
-        final_stats = {
-            "chips_initialized": len(chips),
-            "ai_accelerators": len(ai_accelerators),
-            "total_cores": total_cores,
-            "model_loaded": components['model_id'] is not None,
-            "storage_type": "HTTP",
-            "connection_status": shared_storage.get_connection_status()
-        }
-        print(f"\nFinal System Statistics:")
-        for key, value in final_stats.items():
-            print(f"- {key}: {value}")
-    except Exception as e:
-        print(f"Multi-chip processing test failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return
-if __name__ == "__main__":
-    test_ai_integration_http()

+"""
+Test AI integration with local storage and zero CPU memory usage.
+All operations are performed through local storage with direct tensor core access.
+"""
+import asyncio
+from gpu_arch import Chip
+from ai_http import AIAccelerator
+from virtual_vram import VirtualVRAM
+from PIL import Image
+import numpy as np
+from http_storage import LocalStorage
+import time
+import os
+import platform
+import contextlib
+import atexit
+import logging
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+# Local storage manager
+@contextlib.contextmanager
+def storage_manager():
+    storage = None
+    try:
+        # Create new storage instance with local path
+        storage = LocalStorage(storage_path="local_storage")
+        # Verify storage is accessible
+        if storage.ping():
+            logging.info("Successfully initialized local storage")
+        else:
+            raise RuntimeError("Local storage is not accessible")
+        yield storage
+    except Exception as e:
+        logging.error(f"Storage initialization error: {e}")
+        raise
+        if try_connect():
+            logging.info("Successfully reconnected to GPU storage server via HTTP")
+            yield storage
+        else:
+            raise
+    finally:
+        if storage:
+            try:
+                storage.close()
+            except:
+                pass
+# Cleanup handler
+def cleanup_resources():
+    try:
+        # Get the current storage instance if it exists
+        current_storage = LocalStorage._instance
+        if current_storage is not None:
+            try:
+                # Clear any cached data
+                current_storage.resource_monitor['vram_used'] = 0
+                current_storage.resource_monitor['active_tensors'] = 0
+                current_storage.resource_monitor['loaded_models'].clear()
+            except Exception as e:
+                logging.error(f"Error cleaning up storage resources: {e}")
+    except Exception as e:
+        logging.error(f"Error in storage cleanup: {e}")
+    # Clear VRAM and other resources
+    import gc
+    gc.collect()
+# Register enhanced cleanup handler
+atexit.register(cleanup_resources)
+def test_ai_integration_http():
+    print("\n--- Testing Local Storage-Based AI Integration with Zero CPU Usage ---")
+    from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity, speed_of_light_silicon
+    # Initialize components dictionary to store GPU resources
+    components = {
+        'chips': [],
+        'ai_accelerators': [],
+        'model_id': None,
+        'vram': None,
+        'storage': None,
+        'model_config': None,
+        'tensor_registry': {},
+        'initialized': False
+    }
+    # Initialize global tensor registry
+    global_tensor_registry = {
+        'model_tensors': {},
+        'runtime_tensors': {},
+        'placeholder_tensors': {},
+        'stats': {
+            'total_vram_used': 0,
+            'active_tensors': 0
+        }
+    }
+    print(f"\nElectron-Speed Architecture Parameters:")
+    print(f"Target switches/sec: {TARGET_SWITCHES_PER_SEC:.2e}")
+    print(f"Transistors on chip: {TRANSISTORS_ON_CHIP:,}")
+    print(f"Electron drift velocity: {drift_velocity:.2e} m/s")
+    print(f"Percentage of light speed: {(drift_velocity/speed_of_light_silicon)*100:.2f}%")
+    # Test 1: Local Model Loading
+    print("\nTest 1: Model Loading with Local Storage")
+    try:
+        # Use storage manager for proper resource handling
+        with storage_manager() as storage:
+            components['storage'] = storage  # Save storage reference
+            # Initialize virtual GPU stack with unlimited local storage
+            chip_for_loading = Chip(chip_id=0, vram_size_gb=None, storage=storage)  # Unlimited VRAM
+            components['chips'].append(chip_for_loading)
+            # Initialize VRAM with local storage (unlimited)
+            vram = VirtualVRAM(storage=storage)
+            components['vram'] = vram
+            # Set up AI accelerator
+            ai_accelerator_for_loading = AIAccelerator(vram=vram, storage=storage)
+            ai_accelerator_for_loading.initialize_tensor_cores()
+            components['ai_accelerators'].append(ai_accelerator_for_loading)
+            # Initialize model registry in local storage (unlimited)
+            storage.store_state("model_registry", "state", {
+                "initialized": True,
+                "max_vram": None,  # Unlimited VRAM
+                "active_models": {}
+            })
+        # Load BLIP-2 Large model directly to HTTP storage
+        model_id = "microsoft/florence-2-large"
+        print(f"Loading model {model_id} directly to HTTP storage...")
+        try:
+            # Simulate model loading (in real scenario, would load actual model)
+            model_data = {
+                "model_name": model_id,
+                "model_type": "florence-2-large",
+                "parameters": 771000000,
+                "architecture": "vision-language",
+                "loaded_at": time.time()
+            }
+            # Load model with local storage verification
+            try:
+                # Verify storage is accessible
+                if not ai_accelerator_for_loading.storage.ping():
+                    raise RuntimeError("Local storage not accessible")
+                # Calculate model size for proper VRAM allocation
+                model_size = model_data["parameters"] * 4  # 4 bytes per parameter (float32)
+                print(f"Model size: {model_size / (1024**3):.2f} GB")
+                # Pre-allocate VRAM for model
+                ai_accelerator_for_loading.pre_allocate_vram(model_size)
+                # Load model with local storage
+                success = ai_accelerator_for_loading.load_model(
+                    model_id=model_id,
+                    model=model_data,
+                    processor=None,
+                    verify_load=True
+                )
+            except Exception as e:
+                print(f"Exception during model loading: {str(e)}")
+                success = False
+            if success:
+                print(f"Model '{model_id}' loaded successfully to HTTP storage.")
+                assert ai_accelerator_for_loading.has_model(model_id), "Model not found in HTTP storage after loading."
+                # Store model parameters in components dict
+                components['model_id'] = model_id
+                components['model_size'] = model_size
+                components['model_config'] = model_data
+            else:
+                raise RuntimeError("Failed to load model via HTTP storage")
+        except Exception as e:
+            print(f"Detailed model loading error: {str(e)}")
+            print("Falling back to placeholder model mode...")
+            # Try loading with placeholder model
+            try:
+                # Match server-side model configuration
+                placeholder_model = {
+                    "model_name": model_id,
+                    "model_type": "placeholder",
+                    "parameters": 1000000,  # Small placeholder
+                    "architecture": {
+                        "type": "nvidia_ampere",
+                        "features": ["tensor_cores", "ray_tracing", "dynamic_scheduling"]
+                    },
+                    "loaded_at": time.time(),
+                    # Server-validated GPU architecture configuration
+                    "num_sms": 108,  # A100 config
+                    "tensor_cores_per_sm": 4,
+                    "cuda_cores_per_sm": 64,
+                    "compute_capability": "8.0",
+                    "vram_config": {
+                        "size_gb": 40,
+                        "bandwidth_gbps": 1555,
+                        "cache_size_mb": 40,
+                        "allocation": "dynamic"
+                    }
+                }
+                # Validate required fields before loading
+                required_fields = ["num_sms", "tensor_cores_per_sm", "cuda_cores_per_sm"]
+                if not all(field in placeholder_model for field in required_fields):
+                    raise ValueError(f"Missing required GPU architecture fields: {[f for f in required_fields if f not in placeholder_model]}")
+                # Pre-allocate VRAM for the model
+                model_size = placeholder_model["parameters"] * 4  # 4 bytes per parameter
+                if not ai_accelerator_for_loading.pre_allocate_vram(model_size):
+                    raise RuntimeError("Failed to pre-allocate VRAM for model")
+                # Load the model with storage verification
+                if not ai_accelerator_for_loading.storage.ping():
+                    raise RuntimeError("Storage not accessible")
+                success = ai_accelerator_for_loading.load_model(
+                    model_id=model_id,
+                    model=placeholder_model,
+                    processor=None
+                )
+                if success:
+                    components['model_id'] = model_id
+                    components['model_config'] = placeholder_model
+                    print("Successfully loaded placeholder model via HTTP")
+                else:
+                    raise RuntimeError("Placeholder model loading also failed")
+            except Exception as e2:
+                print(f"Placeholder fallback also failed: {str(e2)}")
+                raise
+    except Exception as e:
+        print(f"Model loading test failed: {e}")
+        return
+    # Test 2: Multi-Chip Parallel Processing
+    print("\nTest 2: Parallel Processing across Multiple Chips")
+    num_chips = 4  # Using multiple chips for maximum parallelization
+    chips = []
+    ai_accelerators = []
+    try:
+        # Try to reuse existing connection with verification
+        shared_storage = None
+        max_connection_attempts = 3
+        for attempt in range(max_connection_attempts):
+            try:
+                if (components['storage'] and
+                    components['storage'].is_connected()):
+                    shared_storage = components['storage']
+                    logging.info("Successfully reused existing HTTP connection")
+                    break
+                else:
+                    logging.warning("Existing connection unavailable, creating new HTTP connection...")
+                    with storage_manager() as new_storage:
+                        if new_storage and new_storage.is_connected():
+                            components['storage'] = new_storage
+                            shared_storage = new_storage
+                            logging.info("Successfully established new HTTP connection")
+                            break
+            except Exception as e:
+                logging.error(f"HTTP connection attempt {attempt + 1} failed: {e}")
+                if attempt < max_connection_attempts - 1:
+                    time.sleep(2)
+                    continue
+                raise RuntimeError(f"Failed to establish HTTP connection after {max_connection_attempts} attempts")
+        # Initialize high-performance chip array with HTTP storage
+        total_sms = 0
+        total_cores = 0
+        # Create optical interconnect for chip communication
+        from gpu_arch import OpticalInterconnect
+        optical_link = OpticalInterconnect(bandwidth_tbps=800, latency_ns=1)
+        # Reuse existing VRAM instance with shared storage
+        shared_vram = components['vram']
+        if shared_vram is None:
+            shared_vram = VirtualVRAM(storage=shared_storage)
+        shared_vram.storage = shared_storage
+        for i in range(num_chips):
+            # Configure each chip with shared HTTP storage
+            chip = Chip(chip_id=i, vram_size_gb=None, storage=shared_storage)
+            chips.append(chip)
+            # Connect chips in a ring topology
+            if i > 0:
+                chip.connect_chip(chips[i-1], optical_link)
+            # Initialize AI accelerator with shared resources
+            ai_accelerator = AIAccelerator(vram=shared_vram, storage=shared_storage)
+            ai_accelerators.append(ai_accelerator)
+            # Verify and potentially repair HTTP connection
+            max_retry = 3
+            for retry in range(max_retry):
+                try:
+                    if not shared_storage.is_connected():
+                        logging.warning(f"Connection check failed for chip {i}, attempt {retry + 1}")
+                        shared_storage._create_session()  # Attempt to reconnect
+                        time.sleep(1)
+                        continue
+                    # Load model weights from HTTP storage (no CPU transfer)
+                    success = ai_accelerator.load_model(components['model_id'], components['model_config'], None)
+                    if success:
+                        logging.info(f"Successfully initialized chip {i} with model via HTTP")
+                        break
+                    else:
+                        raise RuntimeError("Model loading failed")
+                except Exception as e:
+                    if retry < max_retry - 1:
+                        logging.warning(f"Error initializing chip {i}, attempt {retry + 1}: {e}")
+                        time.sleep(1)
+                        continue
+                    else:
+                        logging.error(f"Failed to initialize chip {i} after {max_retry} attempts: {e}")
+                        raise
+            # Track total processing units
+            total_sms += chip.num_sms
+            total_cores += chip.num_sms * chip.cores_per_sm
+            # Store chip configuration in HTTP storage
+            shared_storage.store_state(f"chips/{i}/config", "state", {
+                "num_sms": chip.num_sms,
+                "cores_per_sm": chip.cores_per_sm,
+                "total_cores": chip.num_sms * chip.cores_per_sm,
+                "connected_chips": [c.chip_id for c in chip.connected_chips]
+            })
+            print(f"Chip {i} initialized with HTTP storage and optical interconnect")
+        print(f"\nTotal Processing Units:")
+        print(f"- Streaming Multiprocessors: {total_sms:,}")
+        print(f"- CUDA Cores: {total_cores:,}")
+        print(f"- Electron-speed tensor cores: {total_cores * 8:,}")
+        # Test multi-chip parallel inference with local storage
+        print(f"\nRunning parallel inference simulation")
+        # Create test input data
+        test_image = np.random.rand(224, 224, 3).astype(np.float32)
+        print(f"Created test image with shape: {test_image.shape}")
+        # Store input image in local storage
+        input_tensor_id = "test_input_image"
+        if shared_storage.store_tensor(input_tensor_id, test_image):
+            print(f"Successfully stored test image in local storage")
+        else:
+            raise RuntimeError("Failed to store test image")
+        # Synchronize all chips through HTTP storage
+        start_time = time.time()
+        # Distribute workload across chips using HTTP storage
+        batch_size = test_image.shape[0] // num_chips if test_image.shape[0] >= num_chips else 1
+        results = []
+        for i, accelerator in enumerate(ai_accelerators):
+            try:
+                # Run inference using locally stored weights
+                result = accelerator.inference(components['model_id'], input_tensor_id)
+                if result is not None:
+                    # Store result in local storage
+                    result_id = f"results/chip_{i}/test_image"
+                    if shared_storage.store_tensor(result_id, result):
+                        results.append(result)
+                        print(f"Chip {i} completed inference and stored result")
+                    else:
+                        print(f"Chip {i} inference succeeded but result storage failed")
+                else:
+                    print(f"Chip {i} inference failed")
+            except Exception as e:
+                print(f"Error in chip {i} inference: {e}")
+        elapsed = time.time() - start_time
+        # Calculate performance metrics
+        ops_per_inference = total_cores * 1024  # FMA ops per core
+        from electron_speed import drift_velocity, TARGET_SWITCHES_PER_SEC
+        electron_transit_time = 1 / (drift_velocity * TARGET_SWITCHES_PER_SEC)
+        theoretical_time = electron_transit_time * ops_per_inference / total_cores
+        print(f"\nHTTP-Based Multi-Chip Inference Results:")
+        print(f"- Chips used: {num_chips}")
+        print(f"- Results collected: {len(results)}")
+        print(f"- Total time: {elapsed:.4f}s")
+        print(f"- Theoretical electron-speed time: {theoretical_time:.6f}s")
+        print(f"- Speed ratio: {theoretical_time/elapsed:.2f}x theoretical")
+        print(f"- Operations per second: {ops_per_inference/elapsed:.2e}")
+        # Test 3: HTTP Storage Performance
+        print(f"\nTest 3: HTTP Storage Performance Evaluation")
+        # Test tensor storage/retrieval performance
+        test_sizes = [1024, 4096, 16384, 65536]  # Different tensor sizes
+        storage_times = []
+        retrieval_times = []
+        for size in test_sizes:
+            test_tensor = np.random.rand(size).astype(np.float32)
+            tensor_id = f"perf_test_{size}"
+            # Test storage time
+            start = time.time()
+            success = shared_storage.store_tensor(tensor_id, test_tensor)
+            storage_time = time.time() - start
+            if success:
+                storage_times.append(storage_time)
+                # Test retrieval time
+                start = time.time()
+                retrieved = shared_storage.load_tensor(tensor_id)
+                retrieval_time = time.time() - start
+                if retrieved is not None and np.array_equal(test_tensor, retrieved):
+                    retrieval_times.append(retrieval_time)
+                    print(f"Size {size}: Store {storage_time:.4f}s, Retrieve {retrieval_time:.4f}s")
+                else:
+                    print(f"Size {size}: Retrieval verification failed")
+            else:
+                print(f"Size {size}: Storage failed")
+        if storage_times and retrieval_times:
+            avg_storage = sum(storage_times) / len(storage_times)
+            avg_retrieval = sum(retrieval_times) / len(retrieval_times)
+            print(f"Average storage time: {avg_storage:.4f}s")
+            print(f"Average retrieval time: {avg_retrieval:.4f}s")
+        # Test 4: Multi-chip coordination via HTTP
+        print(f"\nTest 4: Multi-Chip Coordination via HTTP")
+        # Test cross-chip data transfer
+        test_data_id = "cross_chip_test_data"
+        test_data = np.array([1, 2, 3, 4, 5], dtype=np.float32)
+        if shared_storage.store_tensor(test_data_id, test_data):
+            print("Stored test data for cross-chip transfer")
+            # Transfer data between chips
+            new_data_id = shared_storage.transfer_between_chips(0, 1, test_data_id)
+            if new_data_id:
+                print(f"Successfully transferred data from chip 0 to chip 1: {new_data_id}")
+                # Verify transferred data
+                transferred_data = shared_storage.load_tensor(new_data_id)
+                if transferred_data is not None and np.array_equal(test_data, transferred_data):
+                    print("Cross-chip transfer verification successful")
+                else:
+                    print("Cross-chip transfer verification failed")
+            else:
+                print("Cross-chip transfer failed")
+        # Test synchronization barriers
+        barrier_id = "test_barrier"
+        num_participants = num_chips
+        if shared_storage.create_sync_barrier(barrier_id, num_participants):
+            print(f"Created synchronization barrier for {num_participants} participants")
+            # Simulate participants arriving at barrier
+            for i in range(num_participants):
+                result = shared_storage.wait_sync_barrier(barrier_id)
+                if i == num_participants - 1:
+                    if result:
+                        print("All participants reached barrier - synchronization successful")
+                    else:
+                        print("Barrier synchronization failed")
+                else:
+                    print(f"Participant {i+1} reached barrier")
+        print(f"\nHTTP-based AI integration test completed successfully!")
+        # Final statistics
+        final_stats = {
+            "chips_initialized": len(chips),
+            "ai_accelerators": len(ai_accelerators),
+            "total_cores": total_cores,
+            "model_loaded": components['model_id'] is not None,
+            "storage_type": "HTTP",
+            "connection_status": shared_storage.get_connection_status()
+        }
+        print(f"\nFinal System Statistics:")
+        for key, value in final_stats.items():
+            print(f"- {key}: {value}")
+    except Exception as e:
+        print(f"Multi-chip processing test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return
+if __name__ == "__main__":
+    test_ai_integration_http()