Spaces:
Runtime error
Runtime error
Factor Studios
commited on
Update test_ai_integration_http.py
Browse files
test_ai_integration_http.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
import time
|
|
@@ -96,21 +95,22 @@ def test_ai_integration_http():
|
|
| 96 |
transformers_logger.setLevel(logging.ERROR)
|
| 97 |
|
| 98 |
try:
|
| 99 |
-
# Create pipeline
|
| 100 |
-
# Create pipeline with vGPU device
|
| 101 |
pipe = pipeline(
|
| 102 |
"text-generation",
|
| 103 |
model=model_id,
|
| 104 |
-
torch_dtype=torch.float32, # Use full precision
|
|
|
|
| 105 |
use_safetensors=True,
|
| 106 |
-
trust_remote_code=True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
)
|
| 108 |
status["pipeline_loaded"] = True
|
| 109 |
-
|
| 110 |
-
# Move pipeline model to vGPU
|
| 111 |
-
pipe.model = to_vgpu(pipe.model, vram=vram)
|
| 112 |
-
pipe.model.eval()
|
| 113 |
status['model_on_vgpu'] = True
|
|
|
|
| 114 |
|
| 115 |
# Log model details
|
| 116 |
logger.info(f"Pipeline created with model: {model_id}")
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
import time
|
|
|
|
| 95 |
transformers_logger.setLevel(logging.ERROR)
|
| 96 |
|
| 97 |
try:
|
| 98 |
+
# Create pipeline with direct vGPU device mapping
|
|
|
|
| 99 |
pipe = pipeline(
|
| 100 |
"text-generation",
|
| 101 |
model=model_id,
|
| 102 |
+
torch_dtype=torch.float32, # Use full precision
|
| 103 |
+
device=device, # Load directly to vGPU
|
| 104 |
use_safetensors=True,
|
| 105 |
+
trust_remote_code=True,
|
| 106 |
+
model_kwargs={
|
| 107 |
+
"device_map": device, # Ensure all model parts go to vGPU
|
| 108 |
+
"vram": vram # Pass our vRAM manager
|
| 109 |
+
}
|
| 110 |
)
|
| 111 |
status["pipeline_loaded"] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
status['model_on_vgpu'] = True
|
| 113 |
+
pipe.model.eval()
|
| 114 |
|
| 115 |
# Log model details
|
| 116 |
logger.info(f"Pipeline created with model: {model_id}")
|