Spaces:

factorstudios
/

INTIV

Sleeping

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

e2b3d8e

verified ·

1 Parent(s): b8076f9

Update virtual_gpu_server_http.py

Browse files

Files changed (1) hide show

virtual_gpu_server_http.py +67 -77

virtual_gpu_server_http.py CHANGED Viewed

@@ -16,14 +16,13 @@ from datetime import datetime, timedelta
 import hashlib
 import gzip
 import base64
-import logging
 from pydantic import BaseModel
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
 # Create FastAPI instance with enhanced configuration
 app = FastAPI(
@@ -127,6 +126,8 @@ class VirtualGPUServer:
         self.state_cache: Dict[str, Any] = {}
         self.memory_cache: Dict[str, Any] = {}
         self.model_cache: Dict[str, Any] = {}
         # Session management for HTTP API
         self.http_sessions: Dict[str, Dict[str, Any]] = {}
@@ -197,6 +198,39 @@ class VirtualGPUServer:
         """Decompress gzip data"""
         return gzip.decompress(data)
     async def handle_vram_operation(self, operation: dict) -> dict:
         """Handle VRAM read/write operations (preserved from WebSocket implementation)"""
         try:
@@ -628,15 +662,6 @@ async def get_cache(
             detail=f"Cache get operation failed: {str(e)}"
         )
-def sanitize_model_name(model_name: str) -> str:
-    """
-    Sanitize model name for safe file system usage.
-    Decodes URL-encoded name and replaces slashes with double underscores.
-    """
-    from urllib.parse import unquote
-    decoded_name = unquote(model_name)
-    return decoded_name.replace('/', '__')
 @app.post("/api/v1/models/{model_name}/load")
 async def load_model(
     model_name: str,
@@ -645,17 +670,12 @@ async def load_model(
 ):
     """Load AI model"""
     try:
-        # Log the received model name for debugging
-        logging.info(f"Received model load request - Raw name: {model_name}")
-        # Sanitize model name for filesystem operations
-        safe_name = sanitize_model_name(model_name)
-        logging.info(f"Sanitized model name: {safe_name}")
-        # Store model information
         model_info = {
-            'model_name': model_name,  # Store original name
-            'safe_name': safe_name,    # Store sanitized name
             'model_data': request.model_data,
             'model_path': request.model_path,
             'model_hash': request.model_hash,
@@ -663,28 +683,28 @@ async def load_model(
             'session_id': session['session_id']
         }
-        # Use sanitized name for cache and file operations
         server.model_cache[model_name] = model_info
-        # Store in persistent storage with safe name
         model_file = server.models_path / f"{safe_name}.json"
-        logging.info(f"Storing model info at: {model_file}")
         with open(model_file, 'w') as f:
             json.dump(model_info, f)
         server.ops_counter += 1
         return {
             "status": "success",
             "message": f"Model {model_name} loaded successfully",
             "model_info": {
                 "name": model_name,
-                "safe_name": safe_name,
                 "loaded_at": model_info['loaded_at']
             }
         }
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Model load operation failed: {str(e)}"
@@ -698,25 +718,12 @@ async def run_inference(
 ):
     """Run model inference"""
     try:
-        logging.info(f"Running inference - Raw model name: {model_name}")
-        safe_name = sanitize_model_name(model_name)
-        logging.info(f"Running inference - Safe model name: {safe_name}")
-        # Check if model is loaded (try both original and safe names)
-        if model_name not in server.model_cache:
-            # Try loading from file system using safe name
-            model_file = server.models_path / f"{safe_name}.json"
-            if not model_file.exists():
-                logging.error(f"Model {model_name} not found in cache or filesystem")
-                raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
-            logging.info(f"Loading model info from file: {model_file}")
-            with open(model_file) as f:
-                model_info = json.load(f)
-            server.model_cache[model_name] = model_info
-        # Simulate inference processing
-        # In a real implementation, this would invoke the actual model
         result = {
             "status": "success",
             "output": request.input_data,  # Echo input for now
@@ -724,17 +731,16 @@ async def run_inference(
                 "inference_time": 0.1,
                 "tokens_processed": len(request.input_data)
             },
-            "model_info": server.model_cache[model_name]
         }
         server.ops_counter += 1
-        logging.info(f"Inference completed successfully for model: {model_name}")
         return result
     except HTTPException:
         raise
     except Exception as e:
-        logging.error(f"Inference operation failed for {model_name}: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail=f"Inference operation failed: {str(e)}"
@@ -747,39 +753,20 @@ async def get_model_status(
 ):
     """Get model status"""
     try:
-        logging.info(f"Checking model status - Raw name: {model_name}")
-        safe_name = sanitize_model_name(model_name)
-        logging.info(f"Checking model status - Safe name: {safe_name}")
-        # Check cache first
-        if model_name in server.model_cache:
-            logging.info(f"Model {model_name} found in cache")
             return {
                 "status": "loaded",
-                "model_info": server.model_cache[model_name]
             }
-        # Check file system using safe name
-        model_file = server.models_path / f"{safe_name}.json"
-        if model_file.exists():
-            logging.info(f"Model file found: {model_file}")
-            with open(model_file) as f:
-                model_info = json.load(f)
-            # Update cache
-            server.model_cache[model_name] = model_info
             return {
-                "status": "loaded",
-                "model_info": model_info
             }
-        logging.info(f"Model {model_name} not found in cache or filesystem")
-        return {
-            "status": "not_loaded",
-            "message": f"Model {model_name} is not loaded"
-        }
     except Exception as e:
-        logging.error(f"Model status check failed for {model_name}: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail=f"Model status check failed: {str(e)}"
@@ -844,6 +831,7 @@ async def transfer_between_chips(
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Chip transfer failed: {str(e)}"
@@ -876,6 +864,7 @@ async def create_sync_barrier(
         }
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Barrier creation failed: {str(e)}"
@@ -913,6 +902,7 @@ async def wait_sync_barrier(
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Barrier wait failed: {str(e)}"

 import hashlib
 import gzip
 import base64
 from pydantic import BaseModel
+import urllib.parse
+import re
+import logging
+# Configure basic logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Create FastAPI instance with enhanced configuration
 app = FastAPI(
         self.state_cache: Dict[str, Any] = {}
         self.memory_cache: Dict[str, Any] = {}
         self.model_cache: Dict[str, Any] = {}
+        # map original model_name -> safe_filename used on disk
+        self.model_name_map: Dict[str, str] = {}
         # Session management for HTTP API
         self.http_sessions: Dict[str, Dict[str, Any]] = {}
         """Decompress gzip data"""
         return gzip.decompress(data)
+    def sanitize_model_name(self, model_name: str) -> str:
+        """Create a filesystem-safe filename from a provided model_name.
+        This will URL-decode percent-encoded values, then replace unsafe characters with underscores.
+        """
+        if not model_name:
+            return "unnamed_model"
+        # URL-decode first (handles cases where client sent %2F)
+        decoded = urllib.parse.unquote(model_name)
+        # Replace characters that are not alphanumeric, dot, underscore or dash
+        safe = re.sub(r'[^0-9A-Za-z._-]', '_', decoded)
+        # Trim length to avoid overly long filenames
+        return safe[:240]
+    def resolve_model_key(self, model_name: str) -> Optional[str]:
+        """Resolve the canonical model key used in model_cache.
+        Accepts several possible incoming forms (percent-encoded, decoded, sanitized) and returns
+        the key present in model_cache if any, otherwise None.
+        """
+        # direct hit
+        if model_name in self.model_cache:
+            return model_name
+        # try URL-decoded
+        decoded = urllib.parse.unquote(model_name)
+        if decoded in self.model_cache:
+            return decoded
+        # try sanitized form matching stored map
+        safe = self.sanitize_model_name(model_name)
+        # see if we have an original key that maps to safe filename
+        for orig, safe_name in self.model_name_map.items():
+            if safe_name == safe:
+                return orig
+        return None
     async def handle_vram_operation(self, operation: dict) -> dict:
         """Handle VRAM read/write operations (preserved from WebSocket implementation)"""
         try:
             detail=f"Cache get operation failed: {str(e)}"
         )
 @app.post("/api/v1/models/{model_name}/load")
 async def load_model(
     model_name: str,
 ):
     """Load AI model"""
     try:
+        logging.info(f"Received model load request for: {model_name}")
+        # Create a safe filename and persist model info under the original key
+        safe_name = server.sanitize_model_name(model_name)
         model_info = {
+            'model_name': model_name,
             'model_data': request.model_data,
             'model_path': request.model_path,
             'model_hash': request.model_hash,
             'session_id': session['session_id']
         }
+        # Store mapping and cache
         server.model_cache[model_name] = model_info
+        server.model_name_map[model_name] = safe_name
+        # Store in persistent storage using safe filename
         model_file = server.models_path / f"{safe_name}.json"
         with open(model_file, 'w') as f:
             json.dump(model_info, f)
         server.ops_counter += 1
+        logging.info(f"Model '{model_name}' saved to disk as '{safe_name}.json'")
         return {
             "status": "success",
             "message": f"Model {model_name} loaded successfully",
             "model_info": {
                 "name": model_name,
                 "loaded_at": model_info['loaded_at']
             }
         }
     except Exception as e:
+        logging.exception("Model load operation failed")
         raise HTTPException(
             status_code=500,
             detail=f"Model load operation failed: {str(e)}"
 ):
     """Run model inference"""
     try:
+        logging.info(f"Inference requested for model: {model_name}")
+        resolved_key = server.resolve_model_key(model_name)
+        if not resolved_key:
+            raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
+        # Simulate inference processing (echo input for now)
         result = {
             "status": "success",
             "output": request.input_data,  # Echo input for now
                 "inference_time": 0.1,
                 "tokens_processed": len(request.input_data)
             },
+            "model_info": server.model_cache.get(resolved_key)
         }
         server.ops_counter += 1
         return result
     except HTTPException:
         raise
     except Exception as e:
+        logging.exception("Inference operation failed")
         raise HTTPException(
             status_code=500,
             detail=f"Inference operation failed: {str(e)}"
 ):
     """Get model status"""
     try:
+        resolved_key = server.resolve_model_key(model_name)
+        if resolved_key:
             return {
                 "status": "loaded",
+                "model_info": server.model_cache[resolved_key]
             }
+        else:
             return {
+                "status": "not_loaded",
+                "message": f"Model {model_name} is not loaded"
             }
     except Exception as e:
+        logging.exception("Model status check failed")
         raise HTTPException(
             status_code=500,
             detail=f"Model status check failed: {str(e)}"
     except HTTPException:
         raise
     except Exception as e:
+        logging.exception("Chip transfer failed")
         raise HTTPException(
             status_code=500,
             detail=f"Chip transfer failed: {str(e)}"
         }
     except Exception as e:
+        logging.exception("Barrier creation failed")
         raise HTTPException(
             status_code=500,
             detail=f"Barrier creation failed: {str(e)}"
     except HTTPException:
         raise
     except Exception as e:
+        logging.exception("Barrier wait failed")
         raise HTTPException(
             status_code=500,
             detail=f"Barrier wait failed: {str(e)}"