Factor Studios commited on
Commit
2456f91
·
verified ·
1 Parent(s): c0e2f27

Update test_ai_integration_http.py

Browse files
Files changed (1) hide show
  1. test_ai_integration_http.py +9 -9
test_ai_integration_http.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import logging
3
  import os
4
  import time
@@ -96,21 +95,22 @@ def test_ai_integration_http():
96
  transformers_logger.setLevel(logging.ERROR)
97
 
98
  try:
99
- # Create pipeline
100
- # Create pipeline with vGPU device
101
  pipe = pipeline(
102
  "text-generation",
103
  model=model_id,
104
- torch_dtype=torch.float32, # Use full precision,
 
105
  use_safetensors=True,
106
- trust_remote_code=True
 
 
 
 
107
  )
108
  status["pipeline_loaded"] = True
109
-
110
- # Move pipeline model to vGPU
111
- pipe.model = to_vgpu(pipe.model, vram=vram)
112
- pipe.model.eval()
113
  status['model_on_vgpu'] = True
 
114
 
115
  # Log model details
116
  logger.info(f"Pipeline created with model: {model_id}")
 
 
1
  import logging
2
  import os
3
  import time
 
95
  transformers_logger.setLevel(logging.ERROR)
96
 
97
  try:
98
+ # Create pipeline with direct vGPU device mapping
 
99
  pipe = pipeline(
100
  "text-generation",
101
  model=model_id,
102
+ torch_dtype=torch.float32, # Use full precision
103
+ device=device, # Load directly to vGPU
104
  use_safetensors=True,
105
+ trust_remote_code=True,
106
+ model_kwargs={
107
+ "device_map": device, # Ensure all model parts go to vGPU
108
+ "vram": vram # Pass our vRAM manager
109
+ }
110
  )
111
  status["pipeline_loaded"] = True
 
 
 
 
112
  status['model_on_vgpu'] = True
113
+ pipe.model.eval()
114
 
115
  # Log model details
116
  logger.info(f"Pipeline created with model: {model_id}")