Factor Studios commited on
Commit
54aca07
·
verified ·
1 Parent(s): 7670c1d

Update test_ai_integration_http.py

Browse files
Files changed (1) hide show
  1. test_ai_integration_http.py +18 -12
test_ai_integration_http.py CHANGED
@@ -17,6 +17,10 @@ from torch_vgpu import VGPUDevice, to_vgpu
17
  def setup_vgpu():
18
  """Setup vGPU device"""
19
  try:
 
 
 
 
20
  # Create and register vGPU device
21
  vgpu = VGPUDevice()
22
  device = vgpu.device()
@@ -96,16 +100,17 @@ def test_ai_integration_http():
96
  transformers_logger.setLevel(logging.ERROR)
97
 
98
  try:
99
- # Create pipeline
100
  pipe = pipeline(
101
  "text-generation",
102
  model=model_id,
103
- torch_dtype="auto",
104
- device=device # Use our vGPU device
105
  )
106
  status['pipeline_loaded'] = True
107
 
108
- # Move pipeline to vGPU
 
109
  pipe.model = to_vgpu(pipe.model, vram=vram)
110
  status['model_on_vgpu'] = True
111
 
@@ -131,19 +136,18 @@ def test_ai_integration_http():
131
  peak_mem = initial_mem
132
 
133
  try:
134
- # Prepare messages
135
- messages = [
136
- {"role": "user", "content": "Explain how virtual GPUs work in simple terms."}
137
- ]
138
 
139
  with torch.no_grad():
140
  # Generate text
141
  outputs = pipe(
142
- messages,
143
  max_new_tokens=256,
144
  temperature=0.7,
145
  top_p=0.95,
146
- top_k=40
 
147
  )
148
 
149
  if hasattr(storage, 'get_used_memory'):
@@ -169,8 +173,10 @@ def test_ai_integration_http():
169
  finally:
170
  # Cleanup and status report
171
  try:
172
- del model
173
- del outputs
 
 
174
  torch.cuda.empty_cache() if hasattr(torch, 'cuda') else None
175
  status['cleanup_success'] = True
176
  except Exception as e:
 
17
  def setup_vgpu():
18
  """Setup vGPU device"""
19
  try:
20
+ # Register vGPU device type
21
+ if not hasattr(torch, 'vgpu'):
22
+ torch.register_privateuseone_backend()
23
+
24
  # Create and register vGPU device
25
  vgpu = VGPUDevice()
26
  device = vgpu.device()
 
100
  transformers_logger.setLevel(logging.ERROR)
101
 
102
  try:
103
+ # Create pipeline and manually move to vGPU
104
  pipe = pipeline(
105
  "text-generation",
106
  model=model_id,
107
+ torch_dtype=torch.float32,
108
+ device_map=None # Don't auto-place on devices
109
  )
110
  status['pipeline_loaded'] = True
111
 
112
+ # Move model to vGPU
113
+ pipe.model = pipe.model.to(device)
114
  pipe.model = to_vgpu(pipe.model, vram=vram)
115
  status['model_on_vgpu'] = True
116
 
 
136
  peak_mem = initial_mem
137
 
138
  try:
139
+ # Prepare input text
140
+ text = "Explain how virtual GPUs work in simple terms."
 
 
141
 
142
  with torch.no_grad():
143
  # Generate text
144
  outputs = pipe(
145
+ text,
146
  max_new_tokens=256,
147
  temperature=0.7,
148
  top_p=0.95,
149
+ top_k=40,
150
+ do_sample=True
151
  )
152
 
153
  if hasattr(storage, 'get_used_memory'):
 
173
  finally:
174
  # Cleanup and status report
175
  try:
176
+ if 'pipe' in locals():
177
+ del pipe
178
+ if 'outputs' in locals():
179
+ del outputs
180
  torch.cuda.empty_cache() if hasattr(torch, 'cuda') else None
181
  status['cleanup_success'] = True
182
  except Exception as e: