Factor Studios commited on
Commit
b8076f9
·
verified ·
1 Parent(s): 3babd5a

Update virtual_gpu_server_http.py

Browse files
Files changed (1) hide show
  1. virtual_gpu_server_http.py +72 -10
virtual_gpu_server_http.py CHANGED
@@ -16,8 +16,15 @@ from datetime import datetime, timedelta
16
  import hashlib
17
  import gzip
18
  import base64
 
19
  from pydantic import BaseModel
20
 
 
 
 
 
 
 
21
  # Create FastAPI instance with enhanced configuration
22
  app = FastAPI(
23
  title="Virtual GPU Server",
@@ -621,6 +628,15 @@ async def get_cache(
621
  detail=f"Cache get operation failed: {str(e)}"
622
  )
623
 
 
 
 
 
 
 
 
 
 
624
  @app.post("/api/v1/models/{model_name}/load")
625
  async def load_model(
626
  model_name: str,
@@ -629,9 +645,17 @@ async def load_model(
629
  ):
630
  """Load AI model"""
631
  try:
 
 
 
 
 
 
 
632
  # Store model information
633
  model_info = {
634
- 'model_name': model_name,
 
635
  'model_data': request.model_data,
636
  'model_path': request.model_path,
637
  'model_hash': request.model_hash,
@@ -639,10 +663,13 @@ async def load_model(
639
  'session_id': session['session_id']
640
  }
641
 
 
642
  server.model_cache[model_name] = model_info
643
 
644
- # Store in persistent storage
645
- model_file = server.models_path / f"{model_name}.json"
 
 
646
  with open(model_file, 'w') as f:
647
  json.dump(model_info, f)
648
 
@@ -652,6 +679,7 @@ async def load_model(
652
  "message": f"Model {model_name} loaded successfully",
653
  "model_info": {
654
  "name": model_name,
 
655
  "loaded_at": model_info['loaded_at']
656
  }
657
  }
@@ -670,9 +698,22 @@ async def run_inference(
670
  ):
671
  """Run model inference"""
672
  try:
673
- # Check if model is loaded
 
 
 
 
674
  if model_name not in server.model_cache:
675
- raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
 
 
 
 
 
 
 
 
 
676
 
677
  # Simulate inference processing
678
  # In a real implementation, this would invoke the actual model
@@ -687,11 +728,13 @@ async def run_inference(
687
  }
688
 
689
  server.ops_counter += 1
 
690
  return result
691
 
692
  except HTTPException:
693
  raise
694
  except Exception as e:
 
695
  raise HTTPException(
696
  status_code=500,
697
  detail=f"Inference operation failed: {str(e)}"
@@ -704,18 +747,39 @@ async def get_model_status(
704
  ):
705
  """Get model status"""
706
  try:
 
 
 
 
 
707
  if model_name in server.model_cache:
 
708
  return {
709
  "status": "loaded",
710
  "model_info": server.model_cache[model_name]
711
  }
712
- else:
 
 
 
 
 
 
 
 
713
  return {
714
- "status": "not_loaded",
715
- "message": f"Model {model_name} is not loaded"
716
  }
 
 
 
 
 
 
717
 
718
  except Exception as e:
 
719
  raise HTTPException(
720
  status_code=500,
721
  detail=f"Model status check failed: {str(e)}"
@@ -987,5 +1051,3 @@ if __name__ == "__main__":
987
  async def get_status():
988
  """Get server status"""
989
  return {"status": "ok", "message": "Virtual GPU Server is running"}
990
-
991
-
 
16
  import hashlib
17
  import gzip
18
  import base64
19
+ import logging
20
  from pydantic import BaseModel
21
 
22
+ # Configure logging
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(levelname)s - %(message)s'
26
+ )
27
+
28
  # Create FastAPI instance with enhanced configuration
29
  app = FastAPI(
30
  title="Virtual GPU Server",
 
628
  detail=f"Cache get operation failed: {str(e)}"
629
  )
630
 
631
+ def sanitize_model_name(model_name: str) -> str:
632
+ """
633
+ Sanitize model name for safe file system usage.
634
+ Decodes URL-encoded name and replaces slashes with double underscores.
635
+ """
636
+ from urllib.parse import unquote
637
+ decoded_name = unquote(model_name)
638
+ return decoded_name.replace('/', '__')
639
+
640
  @app.post("/api/v1/models/{model_name}/load")
641
  async def load_model(
642
  model_name: str,
 
645
  ):
646
  """Load AI model"""
647
  try:
648
+ # Log the received model name for debugging
649
+ logging.info(f"Received model load request - Raw name: {model_name}")
650
+
651
+ # Sanitize model name for filesystem operations
652
+ safe_name = sanitize_model_name(model_name)
653
+ logging.info(f"Sanitized model name: {safe_name}")
654
+
655
  # Store model information
656
  model_info = {
657
+ 'model_name': model_name, # Store original name
658
+ 'safe_name': safe_name, # Store sanitized name
659
  'model_data': request.model_data,
660
  'model_path': request.model_path,
661
  'model_hash': request.model_hash,
 
663
  'session_id': session['session_id']
664
  }
665
 
666
+ # Use sanitized name for cache and file operations
667
  server.model_cache[model_name] = model_info
668
 
669
+ # Store in persistent storage with safe name
670
+ model_file = server.models_path / f"{safe_name}.json"
671
+ logging.info(f"Storing model info at: {model_file}")
672
+
673
  with open(model_file, 'w') as f:
674
  json.dump(model_info, f)
675
 
 
679
  "message": f"Model {model_name} loaded successfully",
680
  "model_info": {
681
  "name": model_name,
682
+ "safe_name": safe_name,
683
  "loaded_at": model_info['loaded_at']
684
  }
685
  }
 
698
  ):
699
  """Run model inference"""
700
  try:
701
+ logging.info(f"Running inference - Raw model name: {model_name}")
702
+ safe_name = sanitize_model_name(model_name)
703
+ logging.info(f"Running inference - Safe model name: {safe_name}")
704
+
705
+ # Check if model is loaded (try both original and safe names)
706
  if model_name not in server.model_cache:
707
+ # Try loading from file system using safe name
708
+ model_file = server.models_path / f"{safe_name}.json"
709
+ if not model_file.exists():
710
+ logging.error(f"Model {model_name} not found in cache or filesystem")
711
+ raise HTTPException(status_code=404, detail=f"Model {model_name} not loaded")
712
+
713
+ logging.info(f"Loading model info from file: {model_file}")
714
+ with open(model_file) as f:
715
+ model_info = json.load(f)
716
+ server.model_cache[model_name] = model_info
717
 
718
  # Simulate inference processing
719
  # In a real implementation, this would invoke the actual model
 
728
  }
729
 
730
  server.ops_counter += 1
731
+ logging.info(f"Inference completed successfully for model: {model_name}")
732
  return result
733
 
734
  except HTTPException:
735
  raise
736
  except Exception as e:
737
+ logging.error(f"Inference operation failed for {model_name}: {str(e)}")
738
  raise HTTPException(
739
  status_code=500,
740
  detail=f"Inference operation failed: {str(e)}"
 
747
  ):
748
  """Get model status"""
749
  try:
750
+ logging.info(f"Checking model status - Raw name: {model_name}")
751
+ safe_name = sanitize_model_name(model_name)
752
+ logging.info(f"Checking model status - Safe name: {safe_name}")
753
+
754
+ # Check cache first
755
  if model_name in server.model_cache:
756
+ logging.info(f"Model {model_name} found in cache")
757
  return {
758
  "status": "loaded",
759
  "model_info": server.model_cache[model_name]
760
  }
761
+
762
+ # Check file system using safe name
763
+ model_file = server.models_path / f"{safe_name}.json"
764
+ if model_file.exists():
765
+ logging.info(f"Model file found: {model_file}")
766
+ with open(model_file) as f:
767
+ model_info = json.load(f)
768
+ # Update cache
769
+ server.model_cache[model_name] = model_info
770
  return {
771
+ "status": "loaded",
772
+ "model_info": model_info
773
  }
774
+
775
+ logging.info(f"Model {model_name} not found in cache or filesystem")
776
+ return {
777
+ "status": "not_loaded",
778
+ "message": f"Model {model_name} is not loaded"
779
+ }
780
 
781
  except Exception as e:
782
+ logging.error(f"Model status check failed for {model_name}: {str(e)}")
783
  raise HTTPException(
784
  status_code=500,
785
  detail=f"Model status check failed: {str(e)}"
 
1051
  async def get_status():
1052
  """Get server status"""
1053
  return {"status": "ok", "message": "Virtual GPU Server is running"}