Spaces:

factorstudios
/

INTAI

Sleeping

App Files Files Community

Factor Studios commited on Aug 13, 2025

Commit

43464e3

verified ·

1 Parent(s): f55c75f

Upload 36 files

Browse files

Files changed (6) hide show

ai.py +36 -51
network_tensor_core.py +90 -0
network_vram_server.py +0 -45
test_ai.py +34 -0
websocket_model_storage.py +115 -0
websocket_storage.py +455 -455

ai.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import numpy as np
 import time
 from typing import Dict, Any, Optional, Tuple, Union, List
 from enum import Enum
-from tensor_core import TensorCoreArray
 class VectorOperation(Enum):
     """Enumeration of supported vector operations."""
@@ -17,33 +22,23 @@ class VectorOperation(Enum):
 class AIAccelerator:
-    """
-    AI Accelerator that simulates GPU-based AI computations.
-    This class leverages NumPy's optimized operations to simulate the parallel
-    processing capabilities of the vGPU for AI workloads.
-    """
     def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
-        """Initialize AI Accelerator with electron-speed awareness and shared WebSocket storage."""
-        from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
-        self.storage = storage  # Use the shared storage instance
-        if self.storage is None:
-            from websocket_storage import WebSocketGPUStorage
-            self.storage = WebSocketGPUStorage()  # Only create new if not provided
-            if not self.storage.wait_for_connection():
-                raise RuntimeError("Could not connect to GPU storage server")
-        self.vram = vram
         self.num_sms = num_sms
         self.cores_per_sm = cores_per_sm
         self.total_cores = num_sms * cores_per_sm
-        # Configure for maximum parallel processing at electron speed
-        total_tensor_cores = num_sms * cores_per_sm  # Use ALL cores for tensor operations
         self.tensor_core_array = TensorCoreArray(
-            num_tensor_cores=total_tensor_cores,
             bits=32,
             bandwidth_tbps=drift_velocity / 1e-12  # Bandwidth scaled to electron drift speed
         )
@@ -116,7 +111,7 @@ class AIAccelerator:
         except Exception as e:
             return f"<Unserializable object of type {type(config).__name__}: {str(e)}>"
-    def store_model_state(self, model_name: str, model_info: Dict[str, Any]) -> bool:
         """Store model state in WebSocket storage with proper serialization."""
         try:
             # Convert any non-serializable parts of model_info
@@ -126,25 +121,14 @@ class AIAccelerator:
             self.model_registry[model_name] = serializable_info
             # Save to storage
-            if self.storage:
-                # Store model info
-                info_success = self.storage.store_state(
-                    "models",
-                    f"{model_name}/info",
-                    serializable_info
-                )
-                # Store model state
-                state_success = self.storage.store_state(
-                    "models",
-                    f"{model_name}/state",
-                    {"loaded": True, "timestamp": time.time()}
-                )
-                if info_success and state_success:
-                    self.resource_monitor['loaded_models'].add(model_name)
                     return True
             return False
         except Exception as e:
             print(f"Error storing model state: {str(e)}")
@@ -209,14 +193,11 @@ class AIAccelerator:
         self.min_batch_size = 4
         self.dynamic_batching = True  # Enable automatic batch size adjustment
-    def set_vram(self, vram):
-        """Set the VRAM reference."""
-        self.vram = vram
     def allocate_matrix(self, shape: Tuple[int, ...], dtype=np.float32,
                        name: Optional[str] = None) -> str:
         """Allocate a matrix in VRAM and return its ID."""
-        if not self.vram:
             raise RuntimeError("VRAM not available")
         if name is None:
@@ -227,14 +208,14 @@ class AIAccelerator:
         matrix_data = np.zeros(shape, dtype=dtype)
         # Store in VRAM as a texture (reusing texture storage mechanism)
-        matrix_id = self.vram.load_texture(matrix_data, name)
         self.matrix_registry[name] = matrix_id
         return name
     def load_matrix(self, matrix_data: np.ndarray, name: Optional[str] = None) -> str:
         """Load matrix data into VRAM and return its ID."""
-        if not self.vram:
             raise RuntimeError("VRAM not available")
         if name is None:
@@ -242,18 +223,18 @@ class AIAccelerator:
             self.matrix_counter += 1
         # Store in VRAM
-        matrix_id = self.vram.load_texture(matrix_data, name)
         self.matrix_registry[name] = matrix_id
         return name
     def get_matrix(self, matrix_id: str) -> Optional[np.ndarray]:
         """Retrieve matrix data from VRAM."""
-        if not self.vram or matrix_id not in self.matrix_registry:
             return None
         vram_id = self.matrix_registry[matrix_id]
-        return self.vram.get_texture(vram_id)
     def matrix_multiply(self, matrix_a_id: str, matrix_b_id: str,
                        result_id: Optional[str] = None) -> Optional[str]:
@@ -801,3 +782,7 @@ class AIAccelerator:
             return None

+import json
 import numpy as np
 import time
 from typing import Dict, Any, Optional, Tuple, Union, List
 from enum import Enum
+from electron_speed import TARGET_SWITCHES_PER_SEC, TRANSISTORS_ON_CHIP, drift_velocity
+from network_tensor_core import TensorCoreArray
+from websocket_storage import WebSocketGPUStorage
+from websocket_model_storage import WebSocketModelStorage
 class VectorOperation(Enum):
     """Enumeration of supported vector operations."""
 class AIAccelerator:
+    """AI Accelerator that leverages electron-speed physics for optimized AI inference and virtual GPU operations."""
     def __init__(self, vram=None, num_sms: int = 800, cores_per_sm: int = 222, storage=None):
+        self.gpu_storage = WebSocketGPUStorage("ws://localhost:7860/ws")  # For tensor operations and general GPU state
+        self.model_storage = WebSocketModelStorage("ws://localhost:7860/ws/model") # For model upload/download
+        self.vram = self.gpu_storage # VRAM operations will go through gpu_storage
         self.num_sms = num_sms
         self.cores_per_sm = cores_per_sm
         self.total_cores = num_sms * cores_per_sm
+    async def connect_to_storage(self):
+        if not self.gpu_storage.wait_for_connection():
+            raise RuntimeError("Could not connect to GPU storage server")
+        await self.model_storage.connect()
         self.tensor_core_array = TensorCoreArray(
+            num_tensor_cores=self.total_cores,
             bits=32,
             bandwidth_tbps=drift_velocity / 1e-12  # Bandwidth scaled to electron drift speed
         )
         except Exception as e:
             return f"<Unserializable object of type {type(config).__name__}: {str(e)}>"
+    async def store_model_state(self, model_name: str, model_info: Dict[str, Any]) -> bool:
         """Store model state in WebSocket storage with proper serialization."""
         try:
             # Convert any non-serializable parts of model_info
             self.model_registry[model_name] = serializable_info
             # Save to storage
+            if self.model_storage:
+                # Convert model_info to JSON string for upload
+                model_data_str = json.dumps(serializable_info)
+                upload_success = await self.model_storage.upload_model(model_name, model_data_str)
+                if upload_success:
+                    self.resource_monitor["loaded_models"].add(model_name)
                     return True
             return False
         except Exception as e:
             print(f"Error storing model state: {str(e)}")
         self.min_batch_size = 4
         self.dynamic_batching = True  # Enable automatic batch size adjustment
     def allocate_matrix(self, shape: Tuple[int, ...], dtype=np.float32,
                        name: Optional[str] = None) -> str:
         """Allocate a matrix in VRAM and return its ID."""
+        if not self.gpu_storage:
             raise RuntimeError("VRAM not available")
         if name is None:
         matrix_data = np.zeros(shape, dtype=dtype)
         # Store in VRAM as a texture (reusing texture storage mechanism)
+        matrix_id = self.gpu_storage.load_texture(matrix_data, name)
         self.matrix_registry[name] = matrix_id
         return name
     def load_matrix(self, matrix_data: np.ndarray, name: Optional[str] = None) -> str:
         """Load matrix data into VRAM and return its ID."""
+        if not self.gpu_storage:
             raise RuntimeError("VRAM not available")
         if name is None:
             self.matrix_counter += 1
         # Store in VRAM
+        matrix_id = self.gpu_storage.load_texture(matrix_data, name)
         self.matrix_registry[name] = matrix_id
         return name
     def get_matrix(self, matrix_id: str) -> Optional[np.ndarray]:
         """Retrieve matrix data from VRAM."""
+        if not self.gpu_storage or matrix_id not in self.matrix_registry:
             return None
         vram_id = self.matrix_registry[matrix_id]
+        return self.gpu_storage.get_texture(vram_id)
     def matrix_multiply(self, matrix_a_id: str, matrix_b_id: str,
                        result_id: Optional[str] = None) -> Optional[str]:
             return None

network_tensor_core.py CHANGED Viewed

	@@ -0,0 +1,90 @@

+import asyncio
+import websockets
+import json
+import numpy as np
+from typing import List, Any, Optional, Dict
+class TensorCoreArray:
+    def __init__(self, num_tensor_cores: int, bits: int, bandwidth_tbps: float):
+        self.num_tensor_cores = num_tensor_cores
+        self.bits = bits
+        self.bandwidth_tbps = bandwidth_tbps
+        self.initialized = False
+    def initialize(self):
+        print(f"Initializing {self.num_tensor_cores} tensor cores with {self.bits}-bit precision...")
+        self.initialized = True
+    def matmul(self, matrix_a: List[List[float]], matrix_b: List[List[float]]) -> List[List[float]]:
+        if not self.initialized:
+            raise RuntimeError("Tensor cores not initialized. Call initialize() first.")
+        np_a = np.array(matrix_a)
+        np_b = np.array(matrix_b)
+        if np_a.shape[1] != np_b.shape[0]:
+            raise ValueError("Matrix dimensions incompatible for multiplication")
+        result = np.matmul(np_a, np_b)
+        return result.tolist()
+    async def send_tensor_data(self, uri: str, tensor_id: str, data: np.ndarray):
+        async with websockets.connect(uri) as websocket:
+            payload = {
+                "operation": "tensor_data",
+                "type": "send",
+                "tensor_id": tensor_id,
+                "data": data.tolist()
+            }
+            await websocket.send(json.dumps(payload))
+            response = await websocket.recv()
+            return json.loads(response)
+    async def receive_tensor_data(self, uri: str, tensor_id: str) -> Optional[np.ndarray]:
+        async with websockets.connect(uri) as websocket:
+            payload = {
+                "operation": "tensor_data",
+                "type": "receive",
+                "tensor_id": tensor_id
+            }
+            await websocket.send(json.dumps(payload))
+            response = await websocket.recv()
+            response_data = json.loads(response)
+            if response_data.get("status") == "success":
+                return np.array(response_data["data"])
+            return None
+    def get_status(self) -> Dict[str, Any]:
+        return {
+            "num_tensor_cores": self.num_tensor_cores,
+            "bits": self.bits,
+            "bandwidth_tbps": self.bandwidth_tbps,
+            "initialized": self.initialized
+        }
+if __name__ == "__main__":
+    async def test_tensor_core_array():
+        tca = TensorCoreArray(num_tensor_cores=10, bits=32, bandwidth_tbps=1.0)
+        tca.initialize()
+        matrix_a = [[1, 2], [3, 4]]
+        matrix_b = [[5, 6], [7, 8]]
+        result = tca.matmul(matrix_a, matrix_b)
+        print(f"Matrix multiplication result: {result}")
+        # Example of sending/receiving tensor data (requires a running WebSocket server)
+        # uri = "ws://localhost:7860/ws"
+        # tensor_id = "test_tensor"
+        # dummy_data = np.array([[10, 20], [30, 40]])
+        #
+        # print(f"Sending tensor data: {dummy_data.tolist()}")
+        # send_response = await tca.send_tensor_data(uri, tensor_id, dummy_data)
+        # print(f"Send response: {send_response}")
+        #
+        # received_data = await tca.receive_tensor_data(uri, tensor_id)
+        # print(f"Received tensor data: {received_data.tolist() if received_data is not None else None}")
+    asyncio.run(test_tensor_core_array())

network_vram_server.py CHANGED Viewed

@@ -1,45 +0,0 @@
-import asyncio
-import websockets
-import json
-class VRAMServer:
-    def __init__(self):
-        self.vram_state = {}
-    async def handler(self, websocket):
-        async for message in websocket:
-            try:
-                operation = json.loads(message)
-                op_type = operation.get("operation")
-                if op_type == "vram/state":
-                    state_type = operation.get("type")
-                    key = operation.get("key")
-                    if state_type == "write":
-                        data = operation.get("data")
-                        self.vram_state[key] = data
-                        await websocket.send(json.dumps({"status": "success", "message": "State stored"}))
-                    elif state_type == "read":
-                        data = self.vram_state.get(key)
-                        if data is not None:
-                            await websocket.send(json.dumps({"status": "success", "data": data}))
-                        else:
-                            await websocket.send(json.dumps({"status": "error", "message": "State not found"}))
-                    else:
-                        await websocket.send(json.dumps({"status": "error", "message": "Unknown state operation type"}))
-                else:
-                    await websocket.send(json.dumps({"status": "error", "message": "Unknown operation"}))
-            except Exception as e:
-                await websocket.send(json.dumps({"status": "error", "message": str(e)}))
-async def main():
-    server = VRAMServer()
-    async with websockets.serve(server.handler, "0.0.0.0", 8765):
-        await asyncio.Future()
-if __name__ == "__main__":
-    asyncio.run(main())

test_ai.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import asyncio
+import numpy as np
+from ai import AIAccelerator
+async def main():
+    print("\n--- Testing AIAccelerator with WebSocket Storage ---")
+    try:
+        accelerator = AIAccelerator()
+        await accelerator.connect_to_storage()
+        print("AIAccelerator initialized and connected successfully.")
+        # Test model upload
+        dummy_model_info = {"layers": 5, "neurons": 100, "type": "CNN"}
+        model_name = "test_cnn_model"
+        print(f"Attempting to store model: {model_name}")
+        if await accelerator.store_model_state(model_name, dummy_model_info):
+            print(f"Model \'{model_name}\' stored successfully.")
+        else:
+            print(f"Failed to store model \'{model_name}\'")
+        # Test tensor core initialization (requires VRAM connection)
+        print("Attempting to initialize tensor cores...")
+        if accelerator.initialize_tensor_cores():
+            print("Tensor cores initialized successfully.")
+        else:
+            print("Failed to initialize tensor cores.")
+    except Exception as e:
+        print(f"An error occurred during AIAccelerator testing: {e}")
+if __name__ == "__main__":
+    asyncio.run(main())

websocket_model_storage.py CHANGED Viewed

	@@ -0,0 +1,115 @@

+import asyncio
+import websockets
+import json
+import numpy as np
+class WebSocketModelStorage:
+    def __init__(self, uri):
+        self.uri = uri
+        self.websocket = None
+    async def connect(self):
+        self.websocket = await websockets.connect(self.uri, max_size=None)
+    async def disconnect(self):
+        if self.websocket:
+            await self.websocket.close()
+    async def upload_model_chunk(self, model_id, chunk_id, chunk_data):
+        payload = {
+            "operation": "vram",
+            "type": "write",
+            "block_id": f"{model_id}_{chunk_id}",
+            "data": chunk_data.tolist() if isinstance(chunk_data, np.ndarray) else chunk_data
+        }
+        await self.websocket.send(json.dumps(payload))
+        response = await self.websocket.recv()
+        return json.loads(response)
+    async def download_model_chunk(self, model_id, chunk_id):
+        payload = {
+            "operation": "vram",
+            "type": "read",
+            "block_id": f"{model_id}_{chunk_id}"
+        }
+        await self.websocket.send(json.dumps(payload))
+        response = await self.websocket.recv()
+        return json.loads(response)
+    async def upload_model(self, model_id, model_data, chunk_size=1024*1024): # 1MB chunk size
+        if isinstance(model_data, np.ndarray):
+            model_data_bytes = model_data.tobytes()
+        else:
+            model_data_bytes = model_data.encode("utf-8") # Assuming string data for now
+        total_size = len(model_data_bytes)
+        num_chunks = (total_size + chunk_size - 1) // chunk_size
+        print(f"Uploading model {model_id} in {num_chunks} chunks...")
+        for i in range(num_chunks):
+            start = i * chunk_size
+            end = min((i + 1) * chunk_size, total_size)
+            chunk = model_data_bytes[start:end]
+            # Convert chunk to a list of integers for JSON serialization
+            chunk_list = list(chunk)
+            response = await self.upload_model_chunk(model_id, i, chunk_list)
+            if response.get("status") != "success":
+                print(f"Error uploading chunk {i}: {response.get('message')}")
+                return False
+            print(f"Uploaded chunk {i+1}/{num_chunks}")
+        return True
+    async def download_model(self, model_id, num_chunks):
+        print(f"Downloading model {model_id} with {num_chunks} chunks...")
+        downloaded_chunks = []
+        for i in range(num_chunks):
+            response = await self.download_model_chunk(model_id, i)
+            if response.get("status") == "success":
+                downloaded_chunks.append(np.array(response["data"], dtype=np.uint8).tobytes())
+                print(f"Downloaded chunk {i+1}/{num_chunks}")
+            else:
+                print("Error downloading chunk " + str(i) + ": " + str(response.get("message")))
+                return None
+        # Reconstruct the model from downloaded chunks
+        full_model_bytes = b"".join(downloaded_chunks)
+        return np.frombuffer(full_model_bytes, dtype=np.float32) # Assuming original data type was float32
+async def main():
+    uri = "ws://localhost:7860/ws"
+    storage = WebSocketModelStorage(uri)
+    await storage.connect()
+    # Example usage: Upload a dummy model
+    dummy_model_data = np.random.rand(1024 * 1024 * 5).astype(np.float32) # 5MB dummy model
+    model_id = "test_model_123"
+    chunk_size = 1024*1024 # Must match the chunk_size in upload_model
+    total_size = len(dummy_model_data.tobytes())
+    num_chunks = (total_size + chunk_size - 1) // chunk_size
+    success = await storage.upload_model(model_id, dummy_model_data)
+    if success:
+        print(f"Model {model_id} uploaded successfully.")
+        # Test download
+        downloaded_model = await storage.download_model(model_id, num_chunks)
+        if downloaded_model is not None:
+            print(f"Model {model_id} downloaded successfully. Shape: {downloaded_model.shape}")
+            # Verify integrity (optional, for testing purposes)
+            if np.array_equal(dummy_model_data, downloaded_model):
+                print("Downloaded model matches original.")
+            else:
+                print("Downloaded model DOES NOT match original.")
+        else:
+            print(f"Model {model_id} download failed.")
+    else:
+        print(f"Model {model_id} upload failed.")
+    await storage.disconnect()
+if __name__ == "__main__":
+    asyncio.run(main())

websocket_storage.py CHANGED Viewed

@@ -1,455 +1,455 @@
-import websockets
-import json
-import numpy as np
-from typing import Dict, Any, Optional, Union
-import threading
-from queue import Queue
-import time
-import asyncio
-import hashlib
-class WebSocketGPUStorage:
-    # Singleton instance
-    _instance = None
-    _lock = threading.Lock()
-    def __new__(cls, url: str = "wss://factorst-wbs1.hf.space/ws"):
-        with cls._lock:
-            if cls._instance is None:
-                cls._instance = super().__new__(cls)
-                cls._instance._init_singleton(url)
-            return cls._instance
-    def _init_singleton(self, url: str):
-        """Initialize the singleton instance"""
-        if hasattr(self, 'initialized'):
-            return
-        self.url = url
-        self.websocket = None
-        self.connected = False
-        self.message_queue = Queue()
-        self.response_queues: Dict[str, Queue] = {}
-        self.lock = threading.Lock()
-        self._closing = False
-        self._loop = None
-        self.error_count = 0
-        self.last_error_time = 0
-        self.max_retries = 5
-        self.tensor_registry: Dict[str, Dict[str, Any]] = {}  # Track tensor metadata
-        self.model_registry: Dict[str, Dict[str, Any]] = {}  # Track loaded models
-        self.resource_monitor = {
-            'vram_used': 0,
-            'active_tensors': 0,
-            'loaded_models': set()
-        }
-        # Start WebSocket connection in a separate thread
-        self.ws_thread = threading.Thread(target=self._run_websocket_loop, daemon=True)
-        self.ws_thread.start()
-        self.initialized = True
-    def __init__(self, url: str = "wss://factorst-wbs1.hf.space/ws"):
-        """This will actually just return the singleton instance"""
-        pass
-    def _run_websocket_loop(self):
-        self._loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(self._loop)
-        self._loop.run_until_complete(self._websocket_handler())
-    async def _websocket_handler(self):
-        while not self._closing:
-            try:
-                async with websockets.connect(self.url) as websocket:
-                    self.websocket = websocket
-                    self.connected = True
-                    self.error_count = 0  # Reset error count on successful connection
-                    print("Connected to GPU storage server")
-                    while True:
-                        # Handle outgoing messages
-                        try:
-                            while not self.message_queue.empty():
-                                msg_id, operation = self.message_queue.get()
-                                await websocket.send(json.dumps(operation))
-                                # Wait for response with timeout
-                                try:
-                                    response = await asyncio.wait_for(websocket.recv(), timeout=30)
-                                    response_data = json.loads(response)
-                                    # Put response in corresponding queue
-                                    if msg_id in self.response_queues:
-                                        self.response_queues[msg_id].put(response_data)
-                                except asyncio.TimeoutError:
-                                    if msg_id in self.response_queues:
-                                        self.response_queues[msg_id].put({
-                                            "status": "error",
-                                            "message": "Operation timed out"
-                                        })
-                                except Exception as e:
-                                    if msg_id in self.response_queues:
-                                        self.response_queues[msg_id].put({
-                                            "status": "error",
-                                            "message": f"Error processing response: {str(e)}"
-                                        })
-                        except Exception as e:
-                            print(f"Error processing message: {str(e)}")
-                        # Keep connection alive with heartbeat
-                        try:
-                            await websocket.ping()
-                        except:
-                            break  # Break inner loop on ping failure
-                        await asyncio.sleep(0.001)  # 1ms sleep for electron-speed response
-            except Exception as e:
-                print(f"WebSocket connection error: {e}")
-                self.connected = False
-                await asyncio.sleep(1)  # Wait before reconnecting
-    def _send_operation(self, operation: Dict[str, Any]) -> Dict[str, Any]:
-        if self._closing:
-            return {"status": "error", "message": "WebSocket is closing"}
-        if not self.wait_for_connection(timeout=10):
-            return {"status": "error", "message": "Not connected to GPU storage server"}
-        msg_id = str(time.time())
-        response_queue = Queue()
-        with self.lock:
-            self.response_queues[msg_id] = response_queue
-            self.message_queue.put((msg_id, operation))
-        try:
-            # Wait for response with configurable timeout
-            response = response_queue.get(timeout=30)  # Extended timeout for large models
-            if response.get("status") == "error" and "model_size" in operation:
-                # Retry once for model loading operations
-                self.message_queue.put((msg_id, operation))
-                response = response_queue.get(timeout=30)
-        except Exception as e:
-            response = {"status": "error", "message": f"Operation failed: {str(e)}"}
-        finally:
-            with self.lock:
-                if msg_id in self.response_queues:
-                    del self.response_queues[msg_id]
-        return response
-    def store_tensor(self, tensor_id: str, data: np.ndarray, model_size: Optional[int] = None) -> bool:
-        try:
-            if data is None:
-                raise ValueError("Cannot store None tensor")
-            # Calculate tensor metadata
-            tensor_shape = data.shape
-            tensor_dtype = str(data.dtype)
-            tensor_size = data.nbytes
-            operation = {
-                'operation': 'vram',
-                'type': 'write',
-                'block_id': tensor_id,
-                'data': data.tolist(),
-                'model_size': model_size if model_size is not None else -1,  # -1 indicates unlimited
-                'metadata': {
-                    'shape': tensor_shape,
-                    'dtype': tensor_dtype,
-                    'size': tensor_size,
-                    'timestamp': time.time()
-                }
-            }
-            response = self._send_operation(operation)
-            if response.get('status') == 'success':
-                # Update tensor registry
-                with self.lock:
-                    self.tensor_registry[tensor_id] = {
-                        'shape': tensor_shape,
-                        'dtype': tensor_dtype,
-                        'size': tensor_size,
-                        'timestamp': time.time()
-                    }
-                    self.resource_monitor['vram_used'] += tensor_size
-                    self.resource_monitor['active_tensors'] += 1
-                return True
-            else:
-                print(f"Failed to store tensor {tensor_id}: {response.get('message', 'Unknown error')}")
-                return False
-        except Exception as e:
-            print(f"Error storing tensor {tensor_id}: {str(e)}")
-            return False
-    def load_tensor(self, tensor_id: str) -> Optional[np.ndarray]:
-        try:
-            # Check tensor registry first
-            if tensor_id not in self.tensor_registry:
-                print(f"Tensor {tensor_id} not registered in VRAM")
-                return None
-            operation = {
-                'operation': 'vram',
-                'type': 'read',
-                'block_id': tensor_id,
-                'expected_metadata': self.tensor_registry.get(tensor_id, {})
-            }
-            response = self._send_operation(operation)
-            if response.get('status') == 'success':
-                data = response.get('data')
-                if data is None:
-                    print(f"No data found for tensor {tensor_id}")
-                    return None
-                # Verify tensor metadata
-                metadata = response.get('metadata', {})
-                expected_metadata = self.tensor_registry.get(tensor_id, {})
-                if metadata.get('shape') != expected_metadata.get('shape'):
-                    print(f"Warning: Tensor {tensor_id} shape mismatch")
-                try:
-                    # Convert to numpy array with correct dtype
-                    arr = np.array(data, dtype=np.dtype(expected_metadata.get('dtype', 'float32')))
-                    if arr.shape != expected_metadata.get('shape'):
-                        arr = arr.reshape(expected_metadata.get('shape'))
-                    return arr
-                except Exception as e:
-                    print(f"Error converting tensor data: {str(e)}")
-                    return None
-            else:
-                print(f"Failed to load tensor {tensor_id}: {response.get('message', 'Unknown error')}")
-                return None
-        except Exception as e:
-            print(f"Error loading tensor {tensor_id}: {str(e)}")
-            return None
-    def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
-        try:
-            operation = {
-                'operation': 'state',
-                'type': 'save',
-                'component': component,
-                'state_id': state_id,
-                'data': state_data,
-                'timestamp': time.time()
-            }
-            response = self._send_operation(operation)
-            if response.get('status') != 'success':
-                print(f"Failed to store state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
-                return False
-            return True
-        except Exception as e:
-            print(f"Error storing state for {component}/{state_id}: {str(e)}")
-            return False
-    def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
-        try:
-            operation = {
-                'operation': 'state',
-                'type': 'load',
-                'component': component,
-                'state_id': state_id
-            }
-            response = self._send_operation(operation)
-            if response.get('status') == 'success':
-                data = response.get('data')
-                if data is None:
-                    print(f"No state found for {component}/{state_id}")
-                    return None
-                return data
-            else:
-                print(f"Failed to load state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
-                return None
-        except Exception as e:
-            print(f"Error loading state for {component}/{state_id}: {str(e)}")
-            return None
-    def is_model_loaded(self, model_name: str) -> bool:
-        """Check if a model is already loaded in VRAM"""
-        return model_name in self.resource_monitor['loaded_models']
-    def load_model(self, model_name: str, model_path: Optional[str] = None, model_data: Optional[Dict] = None) -> bool:
-        """Load a model into VRAM if not already loaded"""
-        try:
-            # Check if model is already loaded
-            if self.is_model_loaded(model_name):
-                print(f"Model {model_name} already loaded in VRAM")
-                return True
-            # Calculate model hash if path provided
-            model_hash = None
-            if model_path:
-                model_hash = self._calculate_model_hash(model_path)
-            operation = {
-                'operation': 'model',
-                'type': 'load',
-                'model_name': model_name,
-                'model_hash': model_hash,
-                'model_data': model_data
-            }
-            response = self._send_operation(operation)
-            if response.get('status') == 'success':
-                with self.lock:
-                    self.model_registry[model_name] = {
-                        'hash': model_hash,
-                        'timestamp': time.time(),
-                        'tensors': response.get('tensor_ids', [])
-                    }
-                    self.resource_monitor['loaded_models'].add(model_name)
-                print(f"Successfully loaded model {model_name}")
-                return True
-            else:
-                print(f"Failed to load model {model_name}: {response.get('message', 'Unknown error')}")
-                return False
-        except Exception as e:
-            print(f"Error loading model {model_name}: {str(e)}")
-            return False
-    def _calculate_model_hash(self, model_path: str) -> str:
-        """Calculate SHA256 hash of model file"""
-        try:
-            sha256_hash = hashlib.sha256()
-            with open(model_path, "rb") as f:
-                for byte_block in iter(lambda: f.read(4096), b""):
-                    sha256_hash.update(byte_block)
-            return sha256_hash.hexdigest()
-        except Exception as e:
-            print(f"Error calculating model hash: {str(e)}")
-            return ""
-    def cache_data(self, key: str, data: Any) -> bool:
-        operation = {
-            'operation': 'cache',
-            'type': 'set',
-            'key': key,
-            'data': data
-        }
-        response = self._send_operation(operation)
-        return response.get('status') == 'success'
-    def get_cached_data(self, key: str) -> Optional[Any]:
-        operation = {
-            'operation': 'cache',
-            'type': 'get',
-            'key': key
-        }
-        response = self._send_operation(operation)
-        if response.get('status') == 'success':
-            return response['data']
-        return None
-    def wait_for_connection(self, timeout: float = 30.0) -> bool:
-        """Wait for WebSocket connection to be established"""
-        start_time = time.time()
-        while not self._closing and not self.connected:
-            if time.time() - start_time > timeout:
-                print("Connection timeout exceeded")
-                return False
-            time.sleep(0.1)
-        return self.connected
-    def is_connected(self) -> bool:
-        """Check if WebSocket connection is active"""
-        return self.connected and not self._closing
-    def get_connection_status(self) -> Dict[str, Any]:
-        """Get detailed connection status"""
-        return {
-            "connected": self.connected,
-            "closing": self._closing,
-            "error_count": self.error_count,
-            "url": self.url,
-            "last_error_time": self.last_error_time,
-            "loaded_models": list(self.resource_monitor['loaded_models'])
-        }
-    def start_inference(self, model_name: str, input_data: np.ndarray) -> Optional[Dict[str, Any]]:
-        """Start inference with a loaded model"""
-        try:
-            if not self.is_model_loaded(model_name):
-                print(f"Model {model_name} not loaded. Please load the model first.")
-                return None
-            operation = {
-                'operation': 'inference',
-                'type': 'run',
-                'model_name': model_name,
-                'input_data': input_data.tolist() if isinstance(input_data, np.ndarray) else input_data
-            }
-            response = self._send_operation(operation)
-            if response.get('status') == 'success':
-                return {
-                    'output': np.array(response['output']) if 'output' in response else None,
-                    'metrics': response.get('metrics', {}),
-                    'model_info': self.model_registry.get(model_name, {})
-                }
-            else:
-                print(f"Inference failed: {response.get('message', 'Unknown error')}")
-                return None
-        except Exception as e:
-            print(f"Error during inference: {str(e)}")
-            return None
-    def close(self):
-        """Close WebSocket connection and cleanup resources."""
-        if not self._closing:
-            self._closing = True
-            if self.websocket and self._loop:
-                async def cleanup():
-                    try:
-                        # Clean up registries
-                        with self.lock:
-                            self.tensor_registry.clear()
-                            self.model_registry.clear()
-                            self.resource_monitor['vram_used'] = 0
-                            self.resource_monitor['active_tensors'] = 0
-                            self.resource_monitor['loaded_models'].clear()
-                        # Notify server about cleanup
-                        if self.connected:
-                            try:
-                                await self.websocket.send(json.dumps({
-                                    'operation': 'cleanup',
-                                    'type': 'full'
-                                }))
-                            except:
-                                pass
-                        await self.websocket.close()
-                    except Exception as e:
-                        print(f"Error during cleanup: {str(e)}")
-                    finally:
-                        self.connected = False
-                if self._loop.is_running():
-                    self._loop.create_task(cleanup())
-                else:
-                    asyncio.run(cleanup())
-    async def aclose(self):
-        """Asynchronously close WebSocket connection."""
-        if not self._closing:
-            self._closing = True
-            if self.websocket:
-                try:
-                    await self.websocket.close()
-                except:
-                    pass
-                finally:
-                    self.connected = False
-    def __del__(self):
-        """Ensure cleanup on deletion."""
-        self.close()

+import websockets
+import json
+import numpy as np
+from typing import Dict, Any, Optional, Union
+import threading
+from queue import Queue
+import time
+import asyncio
+import hashlib
+class WebSocketGPUStorage:
+    # Singleton instance
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls, url: str = "ws://localhost:7860/ws"):
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super().__new__(cls)
+                cls._instance._init_singleton(url)
+            return cls._instance
+    def _init_singleton(self, url: str):
+        """Initialize the singleton instance"""
+        if hasattr(self, 'initialized'):
+            return
+        self.url = url
+        self.websocket = None
+        self.connected = False
+        self.message_queue = Queue()
+        self.response_queues: Dict[str, Queue] = {}
+        self.lock = threading.Lock()
+        self._closing = False
+        self._loop = None
+        self.error_count = 0
+        self.last_error_time = 0
+        self.max_retries = 5
+        self.tensor_registry: Dict[str, Dict[str, Any]] = {}  # Track tensor metadata
+        self.model_registry: Dict[str, Dict[str, Any]] = {}  # Track loaded models
+        self.resource_monitor = {
+            'vram_used': 0,
+            'active_tensors': 0,
+            'loaded_models': set()
+        }
+        # Start WebSocket connection in a separate thread
+        self.ws_thread = threading.Thread(target=self._run_websocket_loop, daemon=True)
+        self.ws_thread.start()
+        self.initialized = True
+    def __init__(self, url: str = "ws://localhost:7860/ws"):
+        """This will actually just return the singleton instance"""
+        pass
+    def _run_websocket_loop(self):
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        self._loop.run_until_complete(self._websocket_handler())
+    async def _websocket_handler(self):
+        while not self._closing:
+            try:
+                async with websockets.connect(self.url) as websocket:
+                    self.websocket = websocket
+                    self.connected = True
+                    self.error_count = 0  # Reset error count on successful connection
+                    print("Connected to GPU storage server")
+                    while True:
+                        # Handle outgoing messages
+                        try:
+                            while not self.message_queue.empty():
+                                msg_id, operation = self.message_queue.get()
+                                await websocket.send(json.dumps(operation))
+                                # Wait for response with timeout
+                                try:
+                                    response = await asyncio.wait_for(websocket.recv(), timeout=30)
+                                    response_data = json.loads(response)
+                                    # Put response in corresponding queue
+                                    if msg_id in self.response_queues:
+                                        self.response_queues[msg_id].put(response_data)
+                                except asyncio.TimeoutError:
+                                    if msg_id in self.response_queues:
+                                        self.response_queues[msg_id].put({
+                                            "status": "error",
+                                            "message": "Operation timed out"
+                                        })
+                                except Exception as e:
+                                    if msg_id in self.response_queues:
+                                        self.response_queues[msg_id].put({
+                                            "status": "error",
+                                            "message": f"Error processing response: {str(e)}"
+                                        })
+                        except Exception as e:
+                            print(f"Error processing message: {str(e)}")
+                        # Keep connection alive with heartbeat
+                        try:
+                            await websocket.ping()
+                        except:
+                            break  # Break inner loop on ping failure
+                        await asyncio.sleep(0.001)  # 1ms sleep for electron-speed response
+            except Exception as e:
+                print(f"WebSocket connection error: {e}")
+                self.connected = False
+                await asyncio.sleep(1)  # Wait before reconnecting
+    def _send_operation(self, operation: Dict[str, Any]) -> Dict[str, Any]:
+        if self._closing:
+            return {"status": "error", "message": "WebSocket is closing"}
+        if not self.wait_for_connection(timeout=10):
+            return {"status": "error", "message": "Not connected to GPU storage server"}
+        msg_id = str(time.time())
+        response_queue = Queue()
+        with self.lock:
+            self.response_queues[msg_id] = response_queue
+            self.message_queue.put((msg_id, operation))
+        try:
+            # Wait for response with configurable timeout
+            response = response_queue.get(timeout=30)  # Extended timeout for large models
+            if response.get("status") == "error" and "model_size" in operation:
+                # Retry once for model loading operations
+                self.message_queue.put((msg_id, operation))
+                response = response_queue.get(timeout=30)
+        except Exception as e:
+            response = {"status": "error", "message": f"Operation failed: {str(e)}"}
+        finally:
+            with self.lock:
+                if msg_id in self.response_queues:
+                    del self.response_queues[msg_id]
+        return response
+    def store_tensor(self, tensor_id: str, data: np.ndarray, model_size: Optional[int] = None) -> bool:
+        try:
+            if data is None:
+                raise ValueError("Cannot store None tensor")
+            # Calculate tensor metadata
+            tensor_shape = data.shape
+            tensor_dtype = str(data.dtype)
+            tensor_size = data.nbytes
+            operation = {
+                'operation': 'vram',
+                'type': 'write',
+                'block_id': tensor_id,
+                'data': data.tolist(),
+                'model_size': model_size if model_size is not None else -1,  # -1 indicates unlimited
+                'metadata': {
+                    'shape': tensor_shape,
+                    'dtype': tensor_dtype,
+                    'size': tensor_size,
+                    'timestamp': time.time()
+                }
+            }
+            response = self._send_operation(operation)
+            if response.get('status') == 'success':
+                # Update tensor registry
+                with self.lock:
+                    self.tensor_registry[tensor_id] = {
+                        'shape': tensor_shape,
+                        'dtype': tensor_dtype,
+                        'size': tensor_size,
+                        'timestamp': time.time()
+                    }
+                    self.resource_monitor['vram_used'] += tensor_size
+                    self.resource_monitor['active_tensors'] += 1
+                return True
+            else:
+                print(f"Failed to store tensor {tensor_id}: {response.get('message', 'Unknown error')}")
+                return False
+        except Exception as e:
+            print(f"Error storing tensor {tensor_id}: {str(e)}")
+            return False
+    def load_tensor(self, tensor_id: str) -> Optional[np.ndarray]:
+        try:
+            # Check tensor registry first
+            if tensor_id not in self.tensor_registry:
+                print(f"Tensor {tensor_id} not registered in VRAM")
+                return None
+            operation = {
+                'operation': 'vram',
+                'type': 'read',
+                'block_id': tensor_id,
+                'expected_metadata': self.tensor_registry.get(tensor_id, {})
+            }
+            response = self._send_operation(operation)
+            if response.get('status') == 'success':
+                data = response.get('data')
+                if data is None:
+                    print(f"No data found for tensor {tensor_id}")
+                    return None
+                # Verify tensor metadata
+                metadata = response.get('metadata', {})
+                expected_metadata = self.tensor_registry.get(tensor_id, {})
+                if metadata.get('shape') != expected_metadata.get('shape'):
+                    print(f"Warning: Tensor {tensor_id} shape mismatch")
+                try:
+                    # Convert to numpy array with correct dtype
+                    arr = np.array(data, dtype=np.dtype(expected_metadata.get('dtype', 'float32')))
+                    if arr.shape != expected_metadata.get('shape'):
+                        arr = arr.reshape(expected_metadata.get('shape'))
+                    return arr
+                except Exception as e:
+                    print(f"Error converting tensor data: {str(e)}")
+                    return None
+            else:
+                print(f"Failed to load tensor {tensor_id}: {response.get('message', 'Unknown error')}")
+                return None
+        except Exception as e:
+            print(f"Error loading tensor {tensor_id}: {str(e)}")
+            return None
+    def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
+        try:
+            operation = {
+                'operation': 'state',
+                'type': 'save',
+                'component': component,
+                'state_id': state_id,
+                'data': state_data,
+                'timestamp': time.time()
+            }
+            response = self._send_operation(operation)
+            if response.get('status') != 'success':
+                print(f"Failed to store state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
+                return False
+            return True
+        except Exception as e:
+            print(f"Error storing state for {component}/{state_id}: {str(e)}")
+            return False
+    def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
+        try:
+            operation = {
+                'operation': 'state',
+                'type': 'load',
+                'component': component,
+                'state_id': state_id
+            }
+            response = self._send_operation(operation)
+            if response.get('status') == 'success':
+                data = response.get('data')
+                if data is None:
+                    print(f"No state found for {component}/{state_id}")
+                    return None
+                return data
+            else:
+                print(f"Failed to load state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
+                return None
+        except Exception as e:
+            print(f"Error loading state for {component}/{state_id}: {str(e)}")
+            return None
+    def is_model_loaded(self, model_name: str) -> bool:
+        """Check if a model is already loaded in VRAM"""
+        return model_name in self.resource_monitor['loaded_models']
+    def load_model(self, model_name: str, model_path: Optional[str] = None, model_data: Optional[Dict] = None) -> bool:
+        """Load a model into VRAM if not already loaded"""
+        try:
+            # Check if model is already loaded
+            if self.is_model_loaded(model_name):
+                print(f"Model {model_name} already loaded in VRAM")
+                return True
+            # Calculate model hash if path provided
+            model_hash = None
+            if model_path:
+                model_hash = self._calculate_model_hash(model_path)
+            operation = {
+                'operation': 'model',
+                'type': 'load',
+                'model_name': model_name,
+                'model_hash': model_hash,
+                'model_data': model_data
+            }
+            response = self._send_operation(operation)
+            if response.get('status') == 'success':
+                with self.lock:
+                    self.model_registry[model_name] = {
+                        'hash': model_hash,
+                        'timestamp': time.time(),
+                        'tensors': response.get('tensor_ids', [])
+                    }
+                    self.resource_monitor['loaded_models'].add(model_name)
+                print(f"Successfully loaded model {model_name}")
+                return True
+            else:
+                print(f"Failed to load model {model_name}: {response.get('message', 'Unknown error')}")
+                return False
+        except Exception as e:
+            print(f"Error loading model {model_name}: {str(e)}")
+            return False
+    def _calculate_model_hash(self, model_path: str) -> str:
+        """Calculate SHA256 hash of model file"""
+        try:
+            sha256_hash = hashlib.sha256()
+            with open(model_path, "rb") as f:
+                for byte_block in iter(lambda: f.read(4096), b""):
+                    sha256_hash.update(byte_block)
+            return sha256_hash.hexdigest()
+        except Exception as e:
+            print(f"Error calculating model hash: {str(e)}")
+            return ""
+    def cache_data(self, key: str, data: Any) -> bool:
+        operation = {
+            'operation': 'cache',
+            'type': 'set',
+            'key': key,
+            'data': data
+        }
+        response = self._send_operation(operation)
+        return response.get('status') == 'success'
+    def get_cached_data(self, key: str) -> Optional[Any]:
+        operation = {
+            'operation': 'cache',
+            'type': 'get',
+            'key': key
+        }
+        response = self._send_operation(operation)
+        if response.get('status') == 'success':
+            return response['data']
+        return None
+    def wait_for_connection(self, timeout: float = 30.0) -> bool:
+        """Wait for WebSocket connection to be established"""
+        start_time = time.time()
+        while not self._closing and not self.connected:
+            if time.time() - start_time > timeout:
+                print("Connection timeout exceeded")
+                return False
+            time.sleep(0.1)
+        return self.connected
+    def is_connected(self) -> bool:
+        """Check if WebSocket connection is active"""
+        return self.connected and not self._closing
+    def get_connection_status(self) -> Dict[str, Any]:
+        """Get detailed connection status"""
+        return {
+            "connected": self.connected,
+            "closing": self._closing,
+            "error_count": self.error_count,
+            "url": self.url,
+            "last_error_time": self.last_error_time,
+            "loaded_models": list(self.resource_monitor['loaded_models'])
+        }
+    def start_inference(self, model_name: str, input_data: np.ndarray) -> Optional[Dict[str, Any]]:
+        """Start inference with a loaded model"""
+        try:
+            if not self.is_model_loaded(model_name):
+                print(f"Model {model_name} not loaded. Please load the model first.")
+                return None
+            operation = {
+                'operation': 'inference',
+                'type': 'run',
+                'model_name': model_name,
+                'input_data': input_data.tolist() if isinstance(input_data, np.ndarray) else input_data
+            }
+            response = self._send_operation(operation)
+            if response.get('status') == 'success':
+                return {
+                    'output': np.array(response['output']) if 'output' in response else None,
+                    'metrics': response.get('metrics', {}),
+                    'model_info': self.model_registry.get(model_name, {})
+                }
+            else:
+                print(f"Inference failed: {response.get('message', 'Unknown error')}")
+                return None
+        except Exception as e:
+            print(f"Error during inference: {str(e)}")
+            return None
+    def close(self):
+        """Close WebSocket connection and cleanup resources."""
+        if not self._closing:
+            self._closing = True
+            if self.websocket and self._loop:
+                async def cleanup():
+                    try:
+                        # Clean up registries
+                        with self.lock:
+                            self.tensor_registry.clear()
+                            self.model_registry.clear()
+                            self.resource_monitor['vram_used'] = 0
+                            self.resource_monitor['active_tensors'] = 0
+                            self.resource_monitor['loaded_models'].clear()
+                        # Notify server about cleanup
+                        if self.connected:
+                            try:
+                                await self.websocket.send(json.dumps({
+                                    'operation': 'cleanup',
+                                    'type': 'full'
+                                }))
+                            except:
+                                pass
+                        await self.websocket.close()
+                    except Exception as e:
+                        print(f"Error during cleanup: {str(e)}")
+                    finally:
+                        self.connected = False
+                if self._loop.is_running():
+                    self._loop.create_task(cleanup())
+                else:
+                    asyncio.run(cleanup())
+    async def aclose(self):
+        """Asynchronously close WebSocket connection."""
+        if not self._closing:
+            self._closing = True
+            if self.websocket:
+                try:
+                    await self.websocket.close()
+                except:
+                    pass
+                finally:
+                    self.connected = False
+    def __del__(self):
+        """Ensure cleanup on deletion."""
+        self.close()