Spaces:
Runtime error
Runtime error
Factor Studios
commited on
Update test_ai_integration_http.py
Browse files
test_ai_integration_http.py
CHANGED
|
@@ -95,20 +95,19 @@ def test_ai_integration_http():
|
|
| 95 |
transformers_logger.setLevel(logging.ERROR)
|
| 96 |
|
| 97 |
try:
|
| 98 |
-
# Create pipeline
|
| 99 |
-
# Create pipeline with vGPU device
|
| 100 |
pipe = pipeline(
|
| 101 |
"text-generation",
|
| 102 |
model=model_id,
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
| 104 |
use_safetensors=True,
|
| 105 |
trust_remote_code=True,
|
| 106 |
-
device=device
|
| 107 |
)
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
# Move pipeline model to vGPU
|
| 111 |
-
|
| 112 |
status['model_on_vgpu'] = True
|
| 113 |
|
| 114 |
# Log model details
|
|
|
|
| 95 |
transformers_logger.setLevel(logging.ERROR)
|
| 96 |
|
| 97 |
try:
|
| 98 |
+
# Create pipeline with model directly on vGPU
|
|
|
|
| 99 |
pipe = pipeline(
|
| 100 |
"text-generation",
|
| 101 |
model=model_id,
|
| 102 |
+
model_kwargs={
|
| 103 |
+
"torch_dtype": torch.float32, # Use full precision
|
| 104 |
+
"device_map": {"": device}, # Map all modules to our vGPU device
|
| 105 |
+
},
|
| 106 |
use_safetensors=True,
|
| 107 |
trust_remote_code=True,
|
| 108 |
+
device=device # Use our vGPU device
|
| 109 |
)
|
| 110 |
+
status["pipeline_loaded"] = True
|
|
|
|
|
|
|
|
|
|
| 111 |
status['model_on_vgpu'] = True
|
| 112 |
|
| 113 |
# Log model details
|