Spaces:

factorstudios
/

INTAI

Sleeping

App Files Files Community

Factor Studios commited on Aug 12

Commit

eea87c5

verified ·

1 Parent(s): e7fb62d

Upload 21 files

Browse files

Files changed (4) hide show

ai.py +1 -1
network_vram_server.py +45 -0
test_ai_integration.py +17 -46
websocket_storage.py +43 -14

ai.py CHANGED Viewed

@@ -174,7 +174,7 @@ class AIAccelerator:
             if isinstance(test_input, list):
                 test_input = np.array(test_input, dtype=np.float32)
-            test_result = self.tensor_core_array.matmul(test_input, test_input)
             if test_result is None or not isinstance(test_result, (np.ndarray, list)) or len(test_result) == 0:
                 raise RuntimeError("Tensor core test computation failed")

             if isinstance(test_input, list):
                 test_input = np.array(test_input, dtype=np.float32)
+            test_result = self.tensor_core_array.matmul(test_input.tolist(), test_input.tolist())
             if test_result is None or not isinstance(test_result, (np.ndarray, list)) or len(test_result) == 0:
                 raise RuntimeError("Tensor core test computation failed")

network_vram_server.py CHANGED Viewed

	@@ -0,0 +1,45 @@

+import asyncio
+import websockets
+import json
+class VRAMServer:
+    def __init__(self):
+        self.vram_state = {}
+    async def handler(self, websocket):
+        async for message in websocket:
+            try:
+                operation = json.loads(message)
+                op_type = operation.get("operation")
+                if op_type == "vram/state":
+                    state_type = operation.get("type")
+                    key = operation.get("key")
+                    if state_type == "write":
+                        data = operation.get("data")
+                        self.vram_state[key] = data
+                        await websocket.send(json.dumps({"status": "success", "message": "State stored"}))
+                    elif state_type == "read":
+                        data = self.vram_state.get(key)
+                        if data is not None:
+                            await websocket.send(json.dumps({"status": "success", "data": data}))
+                        else:
+                            await websocket.send(json.dumps({"status": "error", "message": "State not found"}))
+                    else:
+                        await websocket.send(json.dumps({"status": "error", "message": "Unknown state operation type"}))
+                else:
+                    await websocket.send(json.dumps({"status": "error", "message": "Unknown operation"}))
+            except Exception as e:
+                await websocket.send(json.dumps({"status": "error", "message": str(e)}))
+async def main():
+    server = VRAMServer()
+    async with websockets.serve(server.handler, "0.0.0.0", 8765):
+        await asyncio.Future()
+if __name__ == "__main__":
+    asyncio.run(main())

test_ai_integration.py CHANGED Viewed

@@ -184,28 +184,15 @@ def test_ai_integration():
             model_size = sum(p.numel() * p.element_size() for p in model.parameters())
             print(f"Model size: {model_size / (1024**3):.2f} GB")
-            # Upload model weights directly to WebSocket storage
-            print("Uploading model weights to WebSocket storage...")
-            for name, param in model.state_dict().items():
-                # Convert tensor to numpy and upload
-                weight_data = param.cpu().numpy()
-                storage.store_tensor(f"model_weights/{model_id}/{name}", weight_data)
-            # Store minimal model info without serializing the config
-            storage.store_state(f"models/{model_id}", "info", {
-                "name": model_id,
-                "size_bytes": model_size,
-                "num_parameters": sum(p.numel() for p in model.parameters()),
-                "weight_keys": list(model.state_dict().keys())
-            })
-            # Set model reference without serializing the full model
-            ai_accelerator_for_loading.model_refs[model_id] = {
-                "weight_prefix": f"model_weights/{model_id}",
-                "size": model_size
-            }
-            print(f"Model weights uploaded successfully to WebSocket storage")
             assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
             # Store model parameters in components dict
@@ -254,6 +241,7 @@ def test_ai_integration():
                 if (components['storage'] and
                     components['storage'].wait_for_connection(timeout=10.0)):
                     shared_storage = components['storage']
                     logging.info("Successfully reused existing WebSocket connection")
                     break
                 else:
@@ -262,6 +250,7 @@ def test_ai_integration():
                         if new_storage and new_storage.wait_for_connection(timeout=10.0):
                             components['storage'] = new_storage
                             shared_storage = new_storage
                             logging.info("Successfully established new WebSocket connection")
                             break
             except Exception as e:
@@ -384,31 +373,13 @@ def test_ai_integration():
                 # Load image section from WebSocket storage
                 tensor_id = f"input_image/{img_name}"
-                # Load weights from WebSocket storage and run inference
-                try:
-                    # Get model info
-                    model_info = accelerator.storage.load_state(f"models/{model_id}", "info")
-                    weight_prefix = f"model_weights/{model_id}"
-                    # Load input tensor
-                    input_tensor = accelerator.storage.load_tensor(tensor_id)
-                    # Run inference with direct weight access
-                    result = accelerator.inference_with_ws_weights(
-                        model_id=model_id,
-                        input_tensor=input_tensor,
-                        weight_prefix=weight_prefix
-                    )
-                    # Store result in WebSocket storage
-                    if result is not None:
-                        storage.store_tensor(f"results/chip_{i}/{img_name}", result)
-                        results.append(result)
-                    else:
-                        logging.error(f"Inference returned None for chip {i}")
-                except Exception as e:
-                    logging.error(f"Inference failed on chip {i}: {str(e)}")
-                    raise
             elapsed = time.time() - start_time

             model_size = sum(p.numel() * p.element_size() for p in model.parameters())
             print(f"Model size: {model_size / (1024**3):.2f} GB")
+            # Store model in WebSocket storage with size information
+            # Load model directly using AIAccelerator's load_model method
+            ai_accelerator_for_loading.load_model(
+                model_id=model_id,
+                model=model,
+                processor=processor
+            )
+            print(f"Model '{model_id}' loaded successfully to WebSocket storage.")
             assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
             # Store model parameters in components dict
                 if (components['storage'] and
                     components['storage'].wait_for_connection(timeout=10.0)):
                     shared_storage = components['storage']
+                    shared_storage.set_keep_alive(True)  # Enable keep-alive
                     logging.info("Successfully reused existing WebSocket connection")
                     break
                 else:
                         if new_storage and new_storage.wait_for_connection(timeout=10.0):
                             components['storage'] = new_storage
                             shared_storage = new_storage
+                            shared_storage.set_keep_alive(True)  # Enable keep-alive
                             logging.info("Successfully established new WebSocket connection")
                             break
             except Exception as e:
                 # Load image section from WebSocket storage
                 tensor_id = f"input_image/{img_name}"
+                # Run inference using WebSocket-stored weights
+                result = accelerator.inference(model_id, tensor_id)
+                # Store result in WebSocket storage
+                if result is not None:
+                    storage.store_tensor(f"results/chip_{i}/{img_name}", result)
+                    results.append(result)
             elapsed = time.time() - start_time

websocket_storage.py CHANGED Viewed

@@ -13,7 +13,7 @@ class WebSocketGPUStorage:
     _instance = None
     _lock = threading.Lock()
-    def __new__(cls, url: str = "wss://factorst-wbs1.hf.space/ws"):
         with cls._lock:
             if cls._instance is None:
                 cls._instance = super().__new__(cls)
@@ -49,7 +49,7 @@ class WebSocketGPUStorage:
         self.ws_thread.start()
         self.initialized = True
-    def __init__(self, url: str = "wss://factorst-wbs1.hf.space/ws"):
         """This will actually just return the singleton instance"""
         pass
@@ -230,18 +230,33 @@ class WebSocketGPUStorage:
     def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
         try:
             operation = {
-                'operation': 'state',
-                'type': 'save',
-                'component': component,
-                'state_id': state_id,
                 'data': state_data,
-                'timestamp': time.time()
             }
             response = self._send_operation(operation)
             if response.get('status') != 'success':
-                print(f"Failed to store state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
                 return False
             return True
         except Exception as e:
@@ -250,11 +265,18 @@ class WebSocketGPUStorage:
     def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
         try:
             operation = {
-                'operation': 'state',
-                'type': 'load',
-                'component': component,
-                'state_id': state_id
             }
             response = self._send_operation(operation)
@@ -265,7 +287,14 @@ class WebSocketGPUStorage:
                     return None
                 return data
             else:
-                print(f"Failed to load state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
                 return None
         except Exception as e:
             print(f"Error loading state for {component}/{state_id}: {str(e)}")
@@ -290,7 +319,7 @@ class WebSocketGPUStorage:
             operation = {
                 'operation': 'model',
-                'type': 'load',
                 'model_name': model_name,
                 'model_hash': model_hash,
                 'model_data': model_data

     _instance = None
     _lock = threading.Lock()
+    def __new__(cls, url: str = "wss://8765-ie635qf2d79t3i1wada8c-fc2963e7.manusvm.computer/ws"):
         with cls._lock:
             if cls._instance is None:
                 cls._instance = super().__new__(cls)
         self.ws_thread.start()
         self.initialized = True
+    def __init__(self, url: str = "wss://8765-ie635qf2d79t3i1wada8c-fc2963e7.manusvm.computer/ws"):
         """This will actually just return the singleton instance"""
         pass
     def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
         try:
+            # Use memory-based state storage instead of file-based
+            state_key = f"{component}_{state_id}"
+            # Store state in memory
             operation = {
+                'operation': 'vram/state',
+                'type': 'write',
+                'key': state_key,
                 'data': state_data,
+                'timestamp': time.time(),
+                'metadata': {
+                    'component': component,
+                    'state_id': state_id,
+                    'storage_type': 'memory'
+                }
             }
             response = self._send_operation(operation)
             if response.get('status') != 'success':
+                error_msg = response.get('message', 'Unknown error')
+                if 'Permission denied' in error_msg:
+                    # Try memory-only fallback
+                    operation['storage_type'] = 'memory_only'
+                    response = self._send_operation(operation)
+                    if response.get('status') == 'success':
+                        return True
+                print(f"Failed to store state for {component}/{state_id}: {error_msg}")
                 return False
             return True
         except Exception as e:
     def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
         try:
+            state_key = f"{component}_{state_id}"
+            # Try loading from memory first
             operation = {
+                'operation': 'vram/state',
+                'type': 'read',
+                'key': state_key,
+                'metadata': {
+                    'component': component,
+                    'state_id': state_id,
+                    'storage_type': 'memory'
+                }
             }
             response = self._send_operation(operation)
                     return None
                 return data
             else:
+                error_msg = response.get('message', 'Unknown error')
+                if 'Permission denied' in error_msg:
+                    # Try memory-only fallback
+                    operation['storage_type'] = 'memory_only'
+                    response = self._send_operation(operation)
+                    if response.get('status') == 'success':
+                        return response.get('data')
+                print(f"Failed to load state for {component}/{state_id}: {error_msg}")
                 return None
         except Exception as e:
             print(f"Error loading state for {component}/{state_id}: {str(e)}")
             operation = {
                 'operation': 'model',
+                'type': 'read',
                 'model_name': model_name,
                 'model_hash': model_hash,
                 'model_data': model_data