Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

aef4f5c

verified ·

1 Parent(s): 520d6cf

Update http_storage.py

Browse files

Files changed (1) hide show

http_storage.py +441 -443

http_storage.py CHANGED Viewed

@@ -1,443 +1,441 @@
-import json
-import numpy as np
-from typing import Dict, Any, Optional, Union
-import threading
-import time
-import hashlib
-import logging
-import os
-import shutil
-import uuid
-from pathlib import Path
-class HTTPGPUStorage:
-    """
-    HTTP-based GPU storage client that replaces WebSocket functionality.
-    Maintains the same interface as WebSocketGPUStorage for backward compatibility.
-    """
-    # Singleton instance
-    _instance = None
-    _lock = threading.Lock()
-    def __new__(cls, storage_path: str = "storage"):
-        with cls._lock:
-            if cls._instance is None:
-                cls._instance = super().__new__(cls)
-                # Convert to absolute path if relative
-                if not os.path.isabs(storage_path):
-                    storage_path = os.path.abspath(storage_path)
-                cls._instance._init_singleton(storage_path)
-            return cls._instance
-    def _init_singleton(self, storage_path: str):
-        """Initialize the singleton instance with local storage"""
-        if hasattr(self, 'initialized'):
-            return
-        # Setup storage paths
-        self.base_path = Path(storage_path)
-        self.vram_path = self.base_path / "vram_blocks"
-        self.models_path = self.base_path / "models"
-        self.cache_path = self.base_path / "cache"
-        self.state_path = self.base_path / "states"
-        # Create directories
-        for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
-            path.mkdir(parents=True, exist_ok=True)
-        self.lock = threading.Lock()
-        self._closing = False
-        self.error_count = 0
-        self.last_error_time = 0
-        self.session_id = str(uuid.uuid4())
-        # Tensor and model registries (maintained for compatibility)
-        self.tensor_registry: Dict[str, Dict[str, Any]] = {}
-        self.model_registry: Dict[str, Dict[str, Any]] = {}
-        self.resource_monitor = {
-            'vram_used': 0,
-            'active_tensors': 0,
-            'loaded_models': set()
-        }
-        # Initialize local storage monitoring
-        self.storage_monitor = {
-            'total_size': 0,
-            'last_access': time.time(),
-            'disk_usage': os.path.getsize(str(self.base_path)) if os.path.exists(str(self.base_path)) else 0
-        }
-        # Initialize session
-        self._create_session()
-        self.initialized = True
-    def __init__(self, storage_path: str = "storage"):
-        """This will actually just return the singleton instance.
-        The actual initialization happens in __new__ and _init_singleton"""
-        pass
-    def _create_session(self):
-        """Initialize local storage session"""
-        try:
-            # Create status file to track session
-            status_path = self.base_path / "session_status.json"
-            status_data = {
-                "session_id": self.session_id,
-                "created_at": time.time(),
-                "resource_limits": {
-                    "max_vram_gb": 40,  # A100 size
-                    "max_models": 5,
-                    "max_batch_size": 32
-                }
-            }
-            with open(status_path, 'w') as f:
-                json.dump(status_data, f, indent=2)
-            logging.info(f"Local storage session created: {self.session_id}")
-            return True
-        except Exception as e:
-            logging.error(f"Failed to create HTTP session: {e}")
-            self.error_count += 1
-            self.last_error_time = time.time()
-            return False
-    def _check_storage(self) -> Dict[str, Any]:
-        """Check local storage status and usage"""
-        try:
-            # Update storage monitoring
-            self.storage_monitor.update({
-                'total_size': sum(f.stat().st_size for f in self.base_path.rglob('*') if f.is_file()),
-                'last_access': time.time(),
-                'disk_usage': os.path.getsize(str(self.base_path)) if os.path.exists(str(self.base_path)) else 0
-            })
-            return {"status": "ok", "monitor": self.storage_monitor}
-        except Exception as e:
-            logging.error(f"Error checking storage: {e}")
-            return {"status": "error", "message": str(e)}
-    def store_tensor(self, tensor_id: str, data: np.ndarray, model_size: Optional[int] = None) -> bool:
-        """Store tensor data in local storage"""
-        try:
-            if data is None:
-                raise ValueError("Cannot store None tensor")
-            # Calculate tensor metadata
-            tensor_shape = data.shape
-            tensor_dtype = str(data.dtype)
-            tensor_size = data.nbytes
-            # Save tensor data
-            tensor_path = self.vram_path / f"{tensor_id}.npy"
-            np.save(str(tensor_path), data)
-            # Save metadata
-            metadata = {
-                'shape': tensor_shape,
-                'dtype': tensor_dtype,
-                'size': tensor_size,
-                'timestamp': time.time(),
-                'model_size': model_size if model_size is not None else -1
-            }
-            metadata_path = self.vram_path / f"{tensor_id}_meta.json"
-            with open(metadata_path, 'w') as f:
-                json.dump(metadata, f)
-            # Update tensor registry
-            with self.lock:
-                self.tensor_registry[tensor_id] = metadata
-                self.resource_monitor['vram_used'] += tensor_size
-                self.resource_monitor['active_tensors'] += 1
-            return True
-        except Exception as e:
-            logging.error(f"Error storing tensor {tensor_id}: {str(e)}")
-            return False
-    def load_tensor(self, tensor_id: str) -> Optional[np.ndarray]:
-        """Load tensor data via HTTP API"""
-        try:
-            # Check tensor registry first
-            if tensor_id not in self.tensor_registry:
-                logging.warning(f"Tensor {tensor_id} not registered in VRAM")
-                # Still try to load it in case it exists on server
-            response = self._make_request('GET', f'/vram/blocks/{tensor_id}')
-            if response and response.get('status') == 'success':
-                data = response.get('data')
-                metadata = response.get('metadata', {})
-                if data is None:
-                    logging.error(f"No data found for tensor {tensor_id}")
-                    return None
-                try:
-                    # Convert to numpy array with correct dtype
-                    expected_dtype = metadata.get('dtype', 'float32')
-                    expected_shape = metadata.get('shape')
-                    arr = np.array(data, dtype=np.dtype(expected_dtype))
-                    if expected_shape and arr.shape != tuple(expected_shape):
-                        arr = arr.reshape(expected_shape)
-                    # Update registry if not present
-                    if tensor_id not in self.tensor_registry:
-                        with self.lock:
-                            self.tensor_registry[tensor_id] = metadata
-                    return arr
-                except Exception as e:
-                    logging.error(f"Error converting tensor data: {str(e)}")
-                    return None
-            else:
-                logging.error(f"Failed to load tensor {tensor_id}: {response.get('message', 'Unknown error')}")
-                return None
-        except Exception as e:
-            logging.error(f"Error loading tensor {tensor_id}: {str(e)}")
-            return None
-    def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
-        """Store component state via HTTP API"""
-        try:
-            request_data = {
-                "data": state_data,
-                "timestamp": time.time()
-            }
-            response = self._make_request(
-                'POST',
-                f'/state/{component}/{state_id}',
-                json=request_data
-            )
-            if response and response.get('status') == 'success':
-                return True
-            else:
-                logging.error(f"Failed to store state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
-                return False
-        except Exception as e:
-            logging.error(f"Error storing state for {component}/{state_id}: {str(e)}")
-            return False
-    def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
-        """Load component state via HTTP API"""
-        try:
-            response = self._make_request("GET", f"/api/v1/state/{component}/{state_id}")
-            if response and response.get('status') == 'success':
-                return response.get('data')
-            else:
-                logging.error(f"Failed to load state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
-                return None
-        except Exception as e:
-            logging.error(f"Error loading state for {component}/{state_id}: {str(e)}")
-            return None
-    def cache_data(self, key: str, data: Any) -> bool:
-        """Cache data via HTTP API"""
-        try:
-            request_data = {"data": data}
-            response = self._make_request(
-                'POST',
-                f'/cache/{key}',
-                json=request_data
-            )
-            return response and response.get('status') == 'success'
-        except Exception as e:
-            logging.error(f"Error caching data for key {key}: {str(e)}")
-            return False
-    def get_cached_data(self, key: str) -> Optional[Any]:
-        """Get cached data via HTTP API"""
-        try:
-            response = self._make_request("GET", f"/cache/{key}")
-            if response and response.get('status') == 'success':
-                return response.get('data')
-            return None
-        except Exception as e:
-            logging.error(f"Error getting cached data for key {key}: {str(e)}")
-            return None
-    def is_model_loaded(self, model_name: str) -> bool:
-        """Check if a model is loaded via HTTP API"""
-        try:
-            response = self._make_request(
-                "GET",
-                f"/models/{model_name}/status",
-                timeout=60
-            )
-            if response and response.get('status') == 'loaded':
-                return True
-            return False
-        except Exception as e:
-            logging.error(f"Error checking model status for {model_name}: {str(e)}")
-            return False
-    def load_model(self, model_name: str, model_path: Optional[str] = None, model_data: Optional[Dict] = None) -> bool:
-        """Load a model from local storage"""
-        try:
-            # Check if model is already loaded
-            if self.is_model_loaded(model_name):
-                logging.info(f"Model {model_name} already loaded")
-                return True
-            # Generate model directory path
-            model_dir = self.models_path / model_name.replace('/', '_')
-            model_dir.mkdir(parents=True, exist_ok=True)
-            # Save model data if provided
-            if model_data:
-                model_config_path = model_dir / "config.json"
-                with open(model_config_path, 'w') as f:
-                    json.dump(model_data, f, indent=2)
-                # Update model registry
-                with self.lock:
-                    self.model_registry[model_name] = {
-                        'path': str(model_dir),
-                        'config': model_data,
-                        'loaded_at': time.time()
-                    }
-            # Copy model files if path provided
-            if model_path and os.path.exists(model_path):
-                model_file_path = model_dir / "model.bin"
-                shutil.copy2(model_path, model_file_path)
-            logging.info(f"Successfully loaded model {model_name} to local storage")
-            return True
-        except Exception as e:
-            logging.error(f"Error loading model {model_name}: {str(e)}")
-            return False
-                # Clean up any existing model files
-                for existing_file in model_dir.glob('*'):
-                    try:
-                        existing_file.unlink()
-                    except Exception as e:
-                        logging.warning(f"Could not remove existing file {existing_file}: {e}")
-                return True
-        except Exception as e:
-            logging.error(f"Error loading model {model_name}: {e}")
-            return False
-    def _calculate_model_hash(self, model_path: str) -> str:
-        """Calculate SHA256 hash of model file"""
-        try:
-            sha256_hash = hashlib.sha256()
-            with open(model_path, "rb") as f:
-                for byte_block in iter(lambda: f.read(4096), b""):
-                    sha256_hash.update(byte_block)
-            return sha256_hash.hexdigest()
-        except Exception as e:
-            logging.error(f"Error calculating model hash: {str(e)}")
-            return ""
-    def start_inference(self, model_name: str, input_data: np.ndarray) -> Optional[Dict[str, Any]]:
-        """Start inference with a loaded model via HTTP API"""
-        try:
-            if not self.is_model_loaded(model_name):
-                logging.error(f"Model {model_name} not loaded. Please load the model first.")
-                return None
-            request_data = {
-                "input_data": input_data.tolist() if isinstance(input_data, np.ndarray) else input_data
-            }
-            response = self._make_request(
-                'POST',
-                f'/models/{model_name}/inference',
-                json=request_data
-            )
-            if response and response.get('status') == 'success':
-                return {
-                    'output': np.array(response['output']) if 'output' in response else None,
-                    'metrics': response.get('metrics', {}),
-                    'model_info': self.model_registry.get(model_name, {})
-                }
-            else:
-                logging.error(f"Inference failed for model {model_name}: {response.get('message', 'Unknown error')}")
-                return None
-        except Exception as e:
-            logging.error(f"Error during inference for model {model_name}: {str(e)}")
-            return None
-    def ping(self) -> bool:
-        """Ping the server to check connection status."""
-        try:
-            response = self._make_request('GET', '/status')
-            return response and response.get('status') == 'ok'
-        except Exception as e:
-            logging.error(f"Ping failed: {e}")
-            return False
-    def is_connected(self) -> bool:
-        """Check if the client is connected to the server."""
-        return self.ping()
-    def get_connection_status(self) -> Dict[str, Any]:
-        """Get detailed connection status."""
-        if self.is_connected():
-            return {"status": "connected", "session_id": self.session_id}
-        else:
-            return {"status": "disconnected", "error_count": self.error_count}
-    def set_keep_alive(self, interval: int):
-        """Set keep-alive interval (compatibility method)."""
-        logging.info(f"Keep-alive interval set to {interval} seconds (HTTP client does not use websockets).")
-    def reconnect(self):
-        """Attempt to reconnect (compatibility method)."""
-        logging.info("Attempting to reconnect HTTP client...")
-        self._create_session()
-    def wait_for_connection(self, timeout: float = 30.0) -> bool:
-        """Wait for HTTP connection to be established (compatibility method)"""
-        start_time = time.time()
-        while time.time() - start_time < timeout:
-            if self.is_connected():
-                logging.info("HTTP connection established.")
-                return True
-            time.sleep(1) # Wait for 1 second before retrying
-        logging.error("HTTP connection not established within timeout.")
-        return False
-    def close(self):
-        """Close HTTP client"""
-        self._closing = True
-        logging.info("HTTP client is closing.")
-        # Invalidate session on server side if possible
-        if self.session_token:
-            try:
-                self.http_session.post(f"{self.api_base}/sessions/invalidate",
-                                       headers={'Authorization': f'Bearer {self.session_token}'},
-                                       timeout=5)
-            except Exception as e:
-                logging.warning(f"Failed to invalidate session on server: {e}")
-        self.http_session.close()
-        HTTPGPUStorage._instance = None # Clear singleton instance
-# Compatibility alias for existing code
-WebSocketGPUStorage = HTTPGPUStorage

+import json
+import numpy as np
+from typing import Dict, Any, Optional, Union
+import threading
+import time
+import hashlib
+import logging
+import os
+import shutil
+import uuid
+from pathlib import Path
+class HTTPGPUStorage:
+    """
+    HTTP-based GPU storage client that replaces WebSocket functionality.
+    Maintains the same interface as WebSocketGPUStorage for backward compatibility.
+    """
+    # Singleton instance
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls, storage_path: str = "storage"):
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super().__new__(cls)
+                # Convert to absolute path if relative
+                if not os.path.isabs(storage_path):
+                    storage_path = os.path.abspath(storage_path)
+                cls._instance._init_singleton(storage_path)
+            return cls._instance
+    def _init_singleton(self, storage_path: str):
+        """Initialize the singleton instance with local storage"""
+        if hasattr(self, 'initialized'):
+            return
+        # Setup storage paths
+        self.base_path = Path(storage_path)
+        self.vram_path = self.base_path / "vram_blocks"
+        self.models_path = self.base_path / "models"
+        self.cache_path = self.base_path / "cache"
+        self.state_path = self.base_path / "states"
+        # Create directories
+        for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
+            path.mkdir(parents=True, exist_ok=True)
+        self.lock = threading.Lock()
+        self._closing = False
+        self.error_count = 0
+        self.last_error_time = 0
+        self.session_id = str(uuid.uuid4())
+        # Tensor and model registries (maintained for compatibility)
+        self.tensor_registry: Dict[str, Dict[str, Any]] = {}
+        self.model_registry: Dict[str, Dict[str, Any]] = {}
+        self.resource_monitor = {
+            'vram_used': 0,
+            'active_tensors': 0,
+            'loaded_models': set()
+        }
+        # Initialize local storage monitoring
+        self.storage_monitor = {
+            'total_size': 0,
+            'last_access': time.time(),
+            'disk_usage': os.path.getsize(str(self.base_path)) if os.path.exists(str(self.base_path)) else 0
+        }
+        # Initialize session
+        self._create_session()
+        self.initialized = True
+    def __init__(self, storage_path: str = "storage"):
+        """This will actually just return the singleton instance.
+        The actual initialization happens in __new__ and _init_singleton"""
+        pass
+    def _create_session(self):
+        """Initialize local storage session"""
+        try:
+            # Create status file to track session
+            status_path = self.base_path / "session_status.json"
+            status_data = {
+                "session_id": self.session_id,
+                "created_at": time.time(),
+                "resource_limits": {
+                    "max_vram_gb": 40,  # A100 size
+                    "max_models": 5,
+                    "max_batch_size": 32
+                }
+            }
+            with open(status_path, 'w') as f:
+                json.dump(status_data, f, indent=2)
+            logging.info(f"Local storage session created: {self.session_id}")
+            return True
+        except Exception as e:
+            logging.error(f"Failed to create HTTP session: {e}")
+            self.error_count += 1
+            self.last_error_time = time.time()
+            return False
+    def _check_storage(self) -> Dict[str, Any]:
+        """Check local storage status and usage"""
+        try:
+            # Update storage monitoring
+            self.storage_monitor.update({
+                'total_size': sum(f.stat().st_size for f in self.base_path.rglob('*') if f.is_file()),
+                'last_access': time.time(),
+                'disk_usage': os.path.getsize(str(self.base_path)) if os.path.exists(str(self.base_path)) else 0
+            })
+            return {"status": "ok", "monitor": self.storage_monitor}
+        except Exception as e:
+            logging.error(f"Error checking storage: {e}")
+            return {"status": "error", "message": str(e)}
+    def store_tensor(self, tensor_id: str, data: np.ndarray, model_size: Optional[int] = None) -> bool:
+        """Store tensor data in local storage"""
+        try:
+            if data is None:
+                raise ValueError("Cannot store None tensor")
+            # Calculate tensor metadata
+            tensor_shape = data.shape
+            tensor_dtype = str(data.dtype)
+            tensor_size = data.nbytes
+            # Save tensor data
+            tensor_path = self.vram_path / f"{tensor_id}.npy"
+            np.save(str(tensor_path), data)
+            # Save metadata
+            metadata = {
+                'shape': tensor_shape,
+                'dtype': tensor_dtype,
+                'size': tensor_size,
+                'timestamp': time.time(),
+                'model_size': model_size if model_size is not None else -1
+            }
+            metadata_path = self.vram_path / f"{tensor_id}_meta.json"
+            with open(metadata_path, 'w') as f:
+                json.dump(metadata, f)
+            # Update tensor registry
+            with self.lock:
+                self.tensor_registry[tensor_id] = metadata
+                self.resource_monitor['vram_used'] += tensor_size
+                self.resource_monitor['active_tensors'] += 1
+            return True
+        except Exception as e:
+            logging.error(f"Error storing tensor {tensor_id}: {str(e)}")
+            return False
+    def load_tensor(self, tensor_id: str) -> Optional[np.ndarray]:
+        """Load tensor data via HTTP API"""
+        try:
+            # Check tensor registry first
+            if tensor_id not in self.tensor_registry:
+                logging.warning(f"Tensor {tensor_id} not registered in VRAM")
+                # Still try to load it in case it exists on server
+            response = self._make_request('GET', f'/vram/blocks/{tensor_id}')
+            if response and response.get('status') == 'success':
+                data = response.get('data')
+                metadata = response.get('metadata', {})
+                if data is None:
+                    logging.error(f"No data found for tensor {tensor_id}")
+                    return None
+                try:
+                    # Convert to numpy array with correct dtype
+                    expected_dtype = metadata.get('dtype', 'float32')
+                    expected_shape = metadata.get('shape')
+                    arr = np.array(data, dtype=np.dtype(expected_dtype))
+                    if expected_shape and arr.shape != tuple(expected_shape):
+                        arr = arr.reshape(expected_shape)
+                    # Update registry if not present
+                    if tensor_id not in self.tensor_registry:
+                        with self.lock:
+                            self.tensor_registry[tensor_id] = metadata
+                    return arr
+                except Exception as e:
+                    logging.error(f"Error converting tensor data: {str(e)}")
+                    return None
+            else:
+                logging.error(f"Failed to load tensor {tensor_id}: {response.get('message', 'Unknown error')}")
+                return None
+        except Exception as e:
+            logging.error(f"Error loading tensor {tensor_id}: {str(e)}")
+            return None
+    def store_state(self, component: str, state_id: str, state_data: Dict[str, Any]) -> bool:
+        """Store component state via HTTP API"""
+        try:
+            request_data = {
+                "data": state_data,
+                "timestamp": time.time()
+            }
+            response = self._make_request(
+                'POST',
+                f'/state/{component}/{state_id}',
+                json=request_data
+            )
+            if response and response.get('status') == 'success':
+                return True
+            else:
+                logging.error(f"Failed to store state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
+                return False
+        except Exception as e:
+            logging.error(f"Error storing state for {component}/{state_id}: {str(e)}")
+            return False
+    def load_state(self, component: str, state_id: str) -> Optional[Dict[str, Any]]:
+        """Load component state via HTTP API"""
+        try:
+            response = self._make_request("GET", f"/api/v1/state/{component}/{state_id}")
+            if response and response.get('status') == 'success':
+                return response.get('data')
+            else:
+                logging.error(f"Failed to load state for {component}/{state_id}: {response.get('message', 'Unknown error')}")
+                return None
+        except Exception as e:
+            logging.error(f"Error loading state for {component}/{state_id}: {str(e)}")
+            return None
+    def cache_data(self, key: str, data: Any) -> bool:
+        """Cache data via HTTP API"""
+        try:
+            request_data = {"data": data}
+            response = self._make_request(
+                'POST',
+                f'/cache/{key}',
+                json=request_data
+            )
+            return response and response.get('status') == 'success'
+        except Exception as e:
+            logging.error(f"Error caching data for key {key}: {str(e)}")
+            return False
+    def get_cached_data(self, key: str) -> Optional[Any]:
+        """Get cached data via HTTP API"""
+        try:
+            response = self._make_request("GET", f"/cache/{key}")
+            if response and response.get('status') == 'success':
+                return response.get('data')
+            return None
+        except Exception as e:
+            logging.error(f"Error getting cached data for key {key}: {str(e)}")
+            return None
+    def is_model_loaded(self, model_name: str) -> bool:
+        """Check if a model is loaded via HTTP API"""
+        try:
+            response = self._make_request(
+                "GET",
+                f"/models/{model_name}/status",
+                timeout=60
+            )
+            if response and response.get('status') == 'loaded':
+                return True
+            return False
+        except Exception as e:
+            logging.error(f"Error checking model status for {model_name}: {str(e)}")
+            return False
+    def load_model(self, model_name: str, model_path: Optional[str] = None, model_data: Optional[Dict] = None) -> bool:
+        """Load a model from local storage"""
+        try:
+            # Check if model is already loaded
+            if self.is_model_loaded(model_name):
+                logging.info(f"Model {model_name} already loaded")
+                return True
+            # Generate model directory path
+            model_dir = self.models_path / model_name.replace('/', '_')
+            model_dir.mkdir(parents=True, exist_ok=True)
+            # Clean up any existing files
+            for existing_file in model_dir.glob('*'):
+                try:
+                    if existing_file.is_file():
+                        existing_file.unlink()
+                except Exception as e:
+                    logging.warning(f"Could not remove existing file {existing_file}: {e}")
+            # Save model data if provided
+            if model_data:
+                model_config_path = model_dir / "config.json"
+                with open(model_config_path, 'w') as f:
+                    json.dump(model_data, f, indent=2)
+                # Update model registry
+                with self.lock:
+                    self.model_registry[model_name] = {
+                        'path': str(model_dir),
+                        'config': model_data,
+                        'loaded_at': time.time(),
+                        'hash': self._calculate_model_hash(model_path) if model_path else None
+                    }
+                    self.resource_monitor['loaded_models'].add(model_name)
+            # Copy model files if path provided
+            if model_path and os.path.exists(model_path):
+                model_file_path = model_dir / "model.bin"
+                shutil.copy2(model_path, model_file_path)
+            logging.info(f"Successfully loaded model {model_name} to local storage")
+            return True
+        except Exception as e:
+            logging.error(f"Error loading model {model_name}: {str(e)}")
+            return False
+    def _calculate_model_hash(self, model_path: str) -> str:
+        """Calculate SHA256 hash of model file"""
+        try:
+            sha256_hash = hashlib.sha256()
+            with open(model_path, "rb") as f:
+                for byte_block in iter(lambda: f.read(4096), b""):
+                    sha256_hash.update(byte_block)
+            return sha256_hash.hexdigest()
+        except Exception as e:
+            logging.error(f"Error calculating model hash: {str(e)}")
+            return ""
+    def start_inference(self, model_name: str, input_data: np.ndarray) -> Optional[Dict[str, Any]]:
+        """Start inference with a loaded model via HTTP API"""
+        try:
+            if not self.is_model_loaded(model_name):
+                logging.error(f"Model {model_name} not loaded. Please load the model first.")
+                return None
+            request_data = {
+                "input_data": input_data.tolist() if isinstance(input_data, np.ndarray) else input_data
+            }
+            response = self._make_request(
+                'POST',
+                f'/models/{model_name}/inference',
+                json=request_data
+            )
+            if response and response.get('status') == 'success':
+                return {
+                    'output': np.array(response['output']) if 'output' in response else None,
+                    'metrics': response.get('metrics', {}),
+                    'model_info': self.model_registry.get(model_name, {})
+                }
+            else:
+                logging.error(f"Inference failed for model {model_name}: {response.get('message', 'Unknown error')}")
+                return None
+        except Exception as e:
+            logging.error(f"Error during inference for model {model_name}: {str(e)}")
+            return None
+    def ping(self) -> bool:
+        """Ping the server to check connection status."""
+        try:
+            response = self._make_request('GET', '/status')
+            return response and response.get('status') == 'ok'
+        except Exception as e:
+            logging.error(f"Ping failed: {e}")
+            return False
+    def is_connected(self) -> bool:
+        """Check if the client is connected to the server."""
+        return self.ping()
+    def get_connection_status(self) -> Dict[str, Any]:
+        """Get detailed connection status."""
+        if self.is_connected():
+            return {"status": "connected", "session_id": self.session_id}
+        else:
+            return {"status": "disconnected", "error_count": self.error_count}
+    def set_keep_alive(self, interval: int):
+        """Set keep-alive interval (compatibility method)."""
+        logging.info(f"Keep-alive interval set to {interval} seconds (HTTP client does not use websockets).")
+    def reconnect(self):
+        """Attempt to reconnect (compatibility method)."""
+        logging.info("Attempting to reconnect HTTP client...")
+        self._create_session()
+    def wait_for_connection(self, timeout: float = 30.0) -> bool:
+        """Wait for HTTP connection to be established (compatibility method)"""
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            if self.is_connected():
+                logging.info("HTTP connection established.")
+                return True
+            time.sleep(1) # Wait for 1 second before retrying
+        logging.error("HTTP connection not established within timeout.")
+        return False
+    def close(self):
+        """Close HTTP client"""
+        self._closing = True
+        logging.info("HTTP client is closing.")
+        # Invalidate session on server side if possible
+        if self.session_token:
+            try:
+                self.http_session.post(f"{self.api_base}/sessions/invalidate",
+                                       headers={'Authorization': f'Bearer {self.session_token}'},
+                                       timeout=5)
+            except Exception as e:
+                logging.warning(f"Failed to invalidate session on server: {e}")
+        self.http_session.close()
+        HTTPGPUStorage._instance = None # Clear singleton instance
+# Compatibility alias for existing code
+WebSocketGPUStorage = HTTPGPUStorage