Spaces:

factorstudios
/

NMFL

Runtime error

Factor Studios commited on Aug 15, 2025

Commit

2456f91

verified ·

1 Parent(s): c0e2f27

Update test_ai_integration_http.py

Files changed (1) hide show

test_ai_integration_http.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import logging
 import os
 import time
@@ -96,21 +95,22 @@ def test_ai_integration_http():
                 transformers_logger.setLevel(logging.ERROR)
                 try:
-                    # Create pipeline
-                    # Create pipeline with vGPU device
                     pipe = pipeline(
                         "text-generation",
                         model=model_id,
-                        torch_dtype=torch.float32,  # Use full precision,
                         use_safetensors=True,
-                        trust_remote_code=True
                     )
                     status["pipeline_loaded"] = True
-                    # Move pipeline model to vGPU
-                    pipe.model = to_vgpu(pipe.model, vram=vram)
-                    pipe.model.eval()
                     status['model_on_vgpu'] = True
                     # Log model details
                     logger.info(f"Pipeline created with model: {model_id}")

 import logging
 import os
 import time
                 transformers_logger.setLevel(logging.ERROR)
                 try:
+                    # Create pipeline with direct vGPU device mapping
                     pipe = pipeline(
                         "text-generation",
                         model=model_id,
+                        torch_dtype=torch.float32,  # Use full precision
+                        device=device,  # Load directly to vGPU
                         use_safetensors=True,
+                        trust_remote_code=True,
+                        model_kwargs={
+                            "device_map": device,  # Ensure all model parts go to vGPU
+                            "vram": vram  # Pass our vRAM manager
+                        }
                     )
                     status["pipeline_loaded"] = True
                     status['model_on_vgpu'] = True
+                    pipe.model.eval()
                     # Log model details
                     logger.info(f"Pipeline created with model: {model_id}")