Spaces:

factorstudios
/

INTIV

Sleeping

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

f3885ff

verified ·

1 Parent(s): e2b3d8e

Update virtual_gpu_server_http.py

Browse files

Files changed (1) hide show

virtual_gpu_server_http.py +68 -65

virtual_gpu_server_http.py CHANGED Viewed

@@ -16,13 +16,14 @@ from datetime import datetime, timedelta
 import hashlib
 import gzip
 import base64
-from pydantic import BaseModel
-import urllib.parse
-import re
 import logging
-# Configure basic logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Create FastAPI instance with enhanced configuration
 app = FastAPI(
@@ -126,8 +127,6 @@ class VirtualGPUServer:
         self.state_cache: Dict[str, Any] = {}
         self.memory_cache: Dict[str, Any] = {}
         self.model_cache: Dict[str, Any] = {}
-        # map original model_name -> safe_filename used on disk
-        self.model_name_map: Dict[str, str] = {}
         # Session management for HTTP API
         self.http_sessions: Dict[str, Dict[str, Any]] = {}
@@ -198,39 +197,6 @@ class VirtualGPUServer:
         """Decompress gzip data"""
         return gzip.decompress(data)
-    def sanitize_model_name(self, model_name: str) -> str:
-        """Create a filesystem-safe filename from a provided model_name.
-        This will URL-decode percent-encoded values, then replace unsafe characters with underscores.
-        """
-        if not model_name:
-            return "unnamed_model"
-        # URL-decode first (handles cases where client sent %2F)
-        decoded = urllib.parse.unquote(model_name)
-        # Replace characters that are not alphanumeric, dot, underscore or dash
-        safe = re.sub(r'[^0-9A-Za-z._-]', '_', decoded)
-        # Trim length to avoid overly long filenames
-        return safe[:240]
-    def resolve_model_key(self, model_name: str) -> Optional[str]:
-        """Resolve the canonical model key used in model_cache.
-        Accepts several possible incoming forms (percent-encoded, decoded, sanitized) and returns
-        the key present in model_cache if any, otherwise None.
-        """
-        # direct hit
-        if model_name in self.model_cache:
-            return model_name
-        # try URL-decoded
-        decoded = urllib.parse.unquote(model_name)
-        if decoded in self.model_cache:
-            return decoded
-        # try sanitized form matching stored map
-        safe = self.sanitize_model_name(model_name)
-        # see if we have an original key that maps to safe filename
-        for orig, safe_name in self.model_name_map.items():
-            if safe_name == safe:
-                return orig
-        return None
     async def handle_vram_operation(self, operation: dict) -> dict:
         """Handle VRAM read/write operations (preserved from WebSocket implementation)"""
         try:
@@ -662,6 +628,13 @@ async def get_cache(
             detail=f"Cache get operation failed: {str(e)}"
         )
 @app.post("/api/v1/models/{model_name}/load")
 async def load_model(
     model_name: str,
@@ -670,10 +643,13 @@ async def load_model(
 ):
     """Load AI model"""
     try:
         logging.info(f"Received model load request for: {model_name}")
-        # Create a safe filename and persist model info under the original key
-        safe_name = server.sanitize_model_name(model_name)
         model_info = {
             'model_name': model_name,
             'model_data': request.model_data,
@@ -683,17 +659,16 @@ async def load_model(
             'session_id': session['session_id']
         }
-        # Store mapping and cache
         server.model_cache[model_name] = model_info
-        server.model_name_map[model_name] = safe_name
-        # Store in persistent storage using safe filename
         model_file = server.models_path / f"{safe_name}.json"
         with open(model_file, 'w') as f:
             json.dump(model_info, f)
         server.ops_counter += 1
-        logging.info(f"Model '{model_name}' saved to disk as '{safe_name}.json'")
         return {
             "status": "success",
             "message": f"Model {model_name} loaded successfully",
@@ -704,7 +679,6 @@ async def load_model(
         }
     except Exception as e:
-        logging.exception("Model load operation failed")
         raise HTTPException(
             status_code=500,
             detail=f"Model load operation failed: {str(e)}"
@@ -718,12 +692,25 @@ async def run_inference(
 ):
     """Run model inference"""
     try:
-        logging.info(f"Inference requested for model: {model_name}")
-        resolved_key = server.resolve_model_key(model_name)
-        if not resolved_key:
-            raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
-        # Simulate inference processing (echo input for now)
         result = {
             "status": "success",
             "output": request.input_data,  # Echo input for now
@@ -731,16 +718,17 @@ async def run_inference(
                 "inference_time": 0.1,
                 "tokens_processed": len(request.input_data)
             },
-            "model_info": server.model_cache.get(resolved_key)
         }
         server.ops_counter += 1
         return result
     except HTTPException:
         raise
     except Exception as e:
-        logging.exception("Inference operation failed")
         raise HTTPException(
             status_code=500,
             detail=f"Inference operation failed: {str(e)}"
@@ -753,20 +741,38 @@ async def get_model_status(
 ):
     """Get model status"""
     try:
-        resolved_key = server.resolve_model_key(model_name)
-        if resolved_key:
             return {
                 "status": "loaded",
-                "model_info": server.model_cache[resolved_key]
             }
-        else:
             return {
-                "status": "not_loaded",
-                "message": f"Model {model_name} is not loaded"
             }
     except Exception as e:
-        logging.exception("Model status check failed")
         raise HTTPException(
             status_code=500,
             detail=f"Model status check failed: {str(e)}"
@@ -831,7 +837,6 @@ async def transfer_between_chips(
     except HTTPException:
         raise
     except Exception as e:
-        logging.exception("Chip transfer failed")
         raise HTTPException(
             status_code=500,
             detail=f"Chip transfer failed: {str(e)}"
@@ -864,7 +869,6 @@ async def create_sync_barrier(
         }
     except Exception as e:
-        logging.exception("Barrier creation failed")
         raise HTTPException(
             status_code=500,
             detail=f"Barrier creation failed: {str(e)}"
@@ -902,7 +906,6 @@ async def wait_sync_barrier(
     except HTTPException:
         raise
     except Exception as e:
-        logging.exception("Barrier wait failed")
         raise HTTPException(
             status_code=500,
             detail=f"Barrier wait failed: {str(e)}"

 import hashlib
 import gzip
 import base64
 import logging
+from pydantic import BaseModel
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 # Create FastAPI instance with enhanced configuration
 app = FastAPI(
         self.state_cache: Dict[str, Any] = {}
         self.memory_cache: Dict[str, Any] = {}
         self.model_cache: Dict[str, Any] = {}
         # Session management for HTTP API
         self.http_sessions: Dict[str, Dict[str, Any]] = {}
         """Decompress gzip data"""
         return gzip.decompress(data)
     async def handle_vram_operation(self, operation: dict) -> dict:
         """Handle VRAM read/write operations (preserved from WebSocket implementation)"""
         try:
             detail=f"Cache get operation failed: {str(e)}"
         )
+def sanitize_filename(name: str) -> str:
+    """
+    Sanitize a string for safe file system usage.
+    Replaces slashes with double underscores.
+    """
+    return name.replace('/', '__')
 @app.post("/api/v1/models/{model_name}/load")
 async def load_model(
     model_name: str,
 ):
     """Load AI model"""
     try:
+        # Log the received model name for debugging
         logging.info(f"Received model load request for: {model_name}")
+        # Get safe filename for storage
+        safe_name = sanitize_filename(model_name)
+        # Store model information
         model_info = {
             'model_name': model_name,
             'model_data': request.model_data,
             'session_id': session['session_id']
         }
         server.model_cache[model_name] = model_info
+        # Store in persistent storage
         model_file = server.models_path / f"{safe_name}.json"
+        logging.info(f"Storing model info at: {model_file}")
         with open(model_file, 'w') as f:
             json.dump(model_info, f)
         server.ops_counter += 1
         return {
             "status": "success",
             "message": f"Model {model_name} loaded successfully",
         }
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Model load operation failed: {str(e)}"
 ):
     """Run model inference"""
     try:
+        logging.info(f"Running inference - Raw model name: {model_name}")
+        safe_name = sanitize_model_name(model_name)
+        logging.info(f"Running inference - Safe model name: {safe_name}")
+        # Check if model is loaded (try both original and safe names)
+        if model_name not in server.model_cache:
+            # Try loading from file system using safe name
+            model_file = server.models_path / f"{safe_name}.json"
+            if not model_file.exists():
+                logging.error(f"Model {model_name} not found in cache or filesystem")
+                raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
+            logging.info(f"Loading model info from file: {model_file}")
+            with open(model_file) as f:
+                model_info = json.load(f)
+            server.model_cache[model_name] = model_info
+        # Simulate inference processing
+        # In a real implementation, this would invoke the actual model
         result = {
             "status": "success",
             "output": request.input_data,  # Echo input for now
                 "inference_time": 0.1,
                 "tokens_processed": len(request.input_data)
             },
+            "model_info": server.model_cache[model_name]
         }
         server.ops_counter += 1
+        logging.info(f"Inference completed successfully for model: {model_name}")
         return result
     except HTTPException:
         raise
     except Exception as e:
+        logging.error(f"Inference operation failed for {model_name}: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail=f"Inference operation failed: {str(e)}"
 ):
     """Get model status"""
     try:
+        logging.info(f"Checking model status for: {model_name}")
+        # Check cache first
+        if model_name in server.model_cache:
+            logging.info(f"Model {model_name} found in cache")
             return {
                 "status": "loaded",
+                "model_info": server.model_cache[model_name]
             }
+        # Check file system using safe name
+        safe_name = sanitize_filename(model_name)
+        model_file = server.models_path / f"{safe_name}.json"
+        if model_file.exists():
+            logging.info(f"Model file found: {model_file}")
+            with open(model_file) as f:
+                model_info = json.load(f)
+            # Update cache
+            server.model_cache[model_name] = model_info
             return {
+                "status": "loaded",
+                "model_info": model_info
             }
+        logging.info(f"Model {model_name} not found in cache or filesystem")
+        return {
+            "status": "not_loaded",
+            "message": f"Model {model_name} is not loaded"
+        }
     except Exception as e:
+        logging.error(f"Model status check failed for {model_name}: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail=f"Model status check failed: {str(e)}"
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Chip transfer failed: {str(e)}"
         }
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Barrier creation failed: {str(e)}"
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Barrier wait failed: {str(e)}"