Spaces:

factorstudios
/

INTIV

Sleeping

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

b8076f9

verified ·

1 Parent(s): 3babd5a

Update virtual_gpu_server_http.py

Browse files

Files changed (1) hide show

virtual_gpu_server_http.py +72 -10

virtual_gpu_server_http.py CHANGED Viewed

@@ -16,8 +16,15 @@ from datetime import datetime, timedelta
 import hashlib
 import gzip
 import base64
 from pydantic import BaseModel
 # Create FastAPI instance with enhanced configuration
 app = FastAPI(
     title="Virtual GPU Server",
@@ -621,6 +628,15 @@ async def get_cache(
             detail=f"Cache get operation failed: {str(e)}"
         )
 @app.post("/api/v1/models/{model_name}/load")
 async def load_model(
     model_name: str,
@@ -629,9 +645,17 @@ async def load_model(
 ):
     """Load AI model"""
     try:
         # Store model information
         model_info = {
-            'model_name': model_name,
             'model_data': request.model_data,
             'model_path': request.model_path,
             'model_hash': request.model_hash,
@@ -639,10 +663,13 @@ async def load_model(
             'session_id': session['session_id']
         }
         server.model_cache[model_name] = model_info
-        # Store in persistent storage
-        model_file = server.models_path / f"{model_name}.json"
         with open(model_file, 'w') as f:
             json.dump(model_info, f)
@@ -652,6 +679,7 @@ async def load_model(
             "message": f"Model {model_name} loaded successfully",
             "model_info": {
                 "name": model_name,
                 "loaded_at": model_info['loaded_at']
             }
         }
@@ -670,9 +698,22 @@ async def run_inference(
 ):
     """Run model inference"""
     try:
-        # Check if model is loaded
         if model_name not in server.model_cache:
-            raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
         # Simulate inference processing
         # In a real implementation, this would invoke the actual model
@@ -687,11 +728,13 @@ async def run_inference(
         }
         server.ops_counter += 1
         return result
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Inference operation failed: {str(e)}"
@@ -704,18 +747,39 @@ async def get_model_status(
 ):
     """Get model status"""
     try:
         if model_name in server.model_cache:
             return {
                 "status": "loaded",
                 "model_info": server.model_cache[model_name]
             }
-        else:
             return {
-                "status": "not_loaded",
-                "message": f"Model {model_name} is not loaded"
             }
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Model status check failed: {str(e)}"
@@ -987,5 +1051,3 @@ if __name__ == "__main__":
 async def get_status():
     """Get server status"""
     return {"status": "ok", "message": "Virtual GPU Server is running"}

 import hashlib
 import gzip
 import base64
+import logging
 from pydantic import BaseModel
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 # Create FastAPI instance with enhanced configuration
 app = FastAPI(
     title="Virtual GPU Server",
             detail=f"Cache get operation failed: {str(e)}"
         )
+def sanitize_model_name(model_name: str) -> str:
+    """
+    Sanitize model name for safe file system usage.
+    Decodes URL-encoded name and replaces slashes with double underscores.
+    """
+    from urllib.parse import unquote
+    decoded_name = unquote(model_name)
+    return decoded_name.replace('/', '__')
 @app.post("/api/v1/models/{model_name}/load")
 async def load_model(
     model_name: str,
 ):
     """Load AI model"""
     try:
+        # Log the received model name for debugging
+        logging.info(f"Received model load request - Raw name: {model_name}")
+        # Sanitize model name for filesystem operations
+        safe_name = sanitize_model_name(model_name)
+        logging.info(f"Sanitized model name: {safe_name}")
         # Store model information
         model_info = {
+            'model_name': model_name,  # Store original name
+            'safe_name': safe_name,    # Store sanitized name
             'model_data': request.model_data,
             'model_path': request.model_path,
             'model_hash': request.model_hash,
             'session_id': session['session_id']
         }
+        # Use sanitized name for cache and file operations
         server.model_cache[model_name] = model_info
+        # Store in persistent storage with safe name
+        model_file = server.models_path / f"{safe_name}.json"
+        logging.info(f"Storing model info at: {model_file}")
         with open(model_file, 'w') as f:
             json.dump(model_info, f)
             "message": f"Model {model_name} loaded successfully",
             "model_info": {
                 "name": model_name,
+                "safe_name": safe_name,
                 "loaded_at": model_info['loaded_at']
             }
         }
 ):
     """Run model inference"""
     try:
+        logging.info(f"Running inference - Raw model name: {model_name}")
+        safe_name = sanitize_model_name(model_name)
+        logging.info(f"Running inference - Safe model name: {safe_name}")
+        # Check if model is loaded (try both original and safe names)
         if model_name not in server.model_cache:
+            # Try loading from file system using safe name
+            model_file = server.models_path / f"{safe_name}.json"
+            if not model_file.exists():
+                logging.error(f"Model {model_name} not found in cache or filesystem")
+                raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
+            logging.info(f"Loading model info from file: {model_file}")
+            with open(model_file) as f:
+                model_info = json.load(f)
+            server.model_cache[model_name] = model_info
         # Simulate inference processing
         # In a real implementation, this would invoke the actual model
         }
         server.ops_counter += 1
+        logging.info(f"Inference completed successfully for model: {model_name}")
         return result
     except HTTPException:
         raise
     except Exception as e:
+        logging.error(f"Inference operation failed for {model_name}: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail=f"Inference operation failed: {str(e)}"
 ):
     """Get model status"""
     try:
+        logging.info(f"Checking model status - Raw name: {model_name}")
+        safe_name = sanitize_model_name(model_name)
+        logging.info(f"Checking model status - Safe name: {safe_name}")
+        # Check cache first
         if model_name in server.model_cache:
+            logging.info(f"Model {model_name} found in cache")
             return {
                 "status": "loaded",
                 "model_info": server.model_cache[model_name]
             }
+        # Check file system using safe name
+        model_file = server.models_path / f"{safe_name}.json"
+        if model_file.exists():
+            logging.info(f"Model file found: {model_file}")
+            with open(model_file) as f:
+                model_info = json.load(f)
+            # Update cache
+            server.model_cache[model_name] = model_info
             return {
+                "status": "loaded",
+                "model_info": model_info
             }
+        logging.info(f"Model {model_name} not found in cache or filesystem")
+        return {
+            "status": "not_loaded",
+            "message": f"Model {model_name} is not loaded"
+        }
     except Exception as e:
+        logging.error(f"Model status check failed for {model_name}: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail=f"Model status check failed: {str(e)}"
 async def get_status():
     """Get server status"""
     return {"status": "ok", "message": "Virtual GPU Server is running"}