Factor Studios commited on
Commit
89dccee
·
verified ·
1 Parent(s): c2f9e6c

Update test_ai_integration_http.py

Browse files
Files changed (1) hide show
  1. test_ai_integration_http.py +55 -86
test_ai_integration_http.py CHANGED
@@ -1,23 +1,19 @@
1
  """
2
- Test Florence-2-Large model integration with vGPU.
3
- Configure PyTorch to use vGPU as device and run image inference.
4
  """
5
  import logging
6
  import os
7
  import time
8
  from contextlib import contextmanager
9
- from io import BytesIO
10
  from typing import Any, Optional
11
 
12
  import torch
13
- from torch import nn
14
  import torch.nn.functional as F
15
- from torch.overrides import TorchFunctionMode
16
- from PIL import Image
17
  from transformers import (
18
- AutoProcessor,
19
- AutoModel,
20
- AutoConfig
21
  )
22
  from virtual_vram import VirtualVRAM
23
  from http_storage import HTTPGPUStorage
@@ -68,33 +64,20 @@ def get_model_size(model):
68
  buffer_size += buffer.nelement() * buffer.element_size()
69
  return param_size + buffer_size
70
 
71
- def load_image(image_name):
72
- """Load and preprocess image from sample_task folder"""
73
- try:
74
- image_path = os.path.join("sample_task", image_name)
75
- if not os.path.exists(image_path):
76
- raise FileNotFoundError(f"Image not found: {image_path}")
77
-
78
- image = Image.open(image_path)
79
- # Convert to RGB if needed
80
- if image.mode != 'RGB':
81
- image = image.convert('RGB')
82
- logger.info(f"Loaded image from {image_path}: size={image.size}")
83
- return image
84
- except Exception as e:
85
- logger.error(f"Image loading failed: {str(e)}")
86
- raise
87
 
88
  def test_ai_integration_http():
89
- """Test Florence-2-Large model on vGPU with image inference"""
90
- logger.info("Starting vGPU image inference test")
91
 
92
  status = {
93
  'model_loaded': False,
94
- 'processor_loaded': False,
95
  'model_on_vgpu': False,
96
- 'image_processed': False,
97
- 'inference_complete': False,
98
  'cleanup_success': False
99
  }
100
 
@@ -108,40 +91,37 @@ def test_ai_integration_http():
108
  device = setup_vgpu()
109
  logger.info(f"vGPU initialized with device {device}")
110
 
111
- # Load Florence model and processor
112
- model_name = "microsoft/florence-2-large"
113
  logger.info(f"Loading {model_name}")
114
 
115
  try:
116
  # Disable transformers logging temporarily
117
- import logging
118
  transformers_logger = logging.getLogger("transformers")
119
  original_level = transformers_logger.level
120
  transformers_logger.setLevel(logging.ERROR)
121
 
122
  try:
123
- # Load processor first
124
- processor = AutoProcessor.from_pretrained(
125
  model_name,
126
- trust_remote_code=True
 
127
  )
128
- status['processor_loaded'] = True
129
 
130
- # Import the specific model class
131
- from transformers.models.florence.modeling_florence import Florence2Model
132
-
133
- # Load model directly with specific class
134
- model = Florence2Model.from_pretrained(
135
  model_name,
136
  trust_remote_code=True,
137
- torch_dtype=torch.float32,
138
- device_map=None,
139
- ignore_mismatched_sizes=True
140
  )
141
  status['model_loaded'] = True
142
 
143
  # Log model details
144
- logger.info(f"Processor type: {type(processor).__name__}")
145
  logger.info(f"Model type: {type(model).__name__}")
146
 
147
  # Log model architecture
@@ -175,62 +155,51 @@ def test_ai_integration_http():
175
  logger.error(f"Model transfer to vGPU failed: {str(e)}")
176
  raise
177
 
178
- # Prepare image input from sample_task folder
179
- try:
180
- # Load image from sample_task directory
181
- image_name = "sample1.jpg" # Replace with your image name
182
- image = load_image(image_name)
183
-
184
- # Process image with Florence processor
185
- inputs = processor(images=image, return_tensors="pt")
186
- if not inputs or 'pixel_values' not in inputs:
187
- raise ValueError("Invalid processor output")
188
-
189
- # Move inputs to vGPU
190
- inputs = {k: to_vgpu(v, vram=vram) for k, v in inputs.items()}
191
- status['image_processed'] = True
192
- logger.info(f"Image processed: shape={inputs['pixel_values'].shape}")
193
- except Exception as e:
194
- logger.error(f"Image preparation failed: {str(e)}")
195
- raise
196
-
197
- # Run image inference with monitoring
198
- logger.info("Running image inference...")
199
  start = time.time()
200
  peak_mem = initial_mem
201
 
202
  try:
 
 
 
 
 
 
 
 
 
 
 
203
  with torch.no_grad():
204
- # Get image embeddings
205
- outputs = model(**inputs)
206
- image_features = outputs.last_hidden_state[:, 0] # Take [CLS] token features
207
-
208
- # Normalize features
209
- image_features = F.normalize(image_features, dim=-1)
 
 
 
 
 
210
 
211
  if hasattr(storage, 'get_used_memory'):
212
  peak_mem = max(peak_mem, storage.get_used_memory())
213
 
214
  inference_time = time.time() - start
215
- status['inference_complete'] = True
216
 
217
  # Log performance metrics
218
- logger.info(f"Inference stats:")
219
  logger.info(f"- Time: {inference_time:.4f}s")
220
  logger.info(f"- Memory peak: {(peak_mem - initial_mem)/1e9:.2f} GB")
221
- logger.info(f"- Image features shape: {image_features.shape}")
222
- logger.info(f"- Feature norm: {torch.norm(image_features).item():.4f}")
223
- logger.info(f"- Output device: {image_features.device}")
224
-
225
- # Optionally compute confidence scores
226
- if hasattr(outputs, 'logits'):
227
- logits = outputs.logits
228
- probs = F.softmax(logits, dim=-1)
229
- confidence = torch.max(probs).item()
230
- logger.info(f"- Confidence: {confidence:.4f}")
231
 
232
  except Exception as e:
233
- logger.error(f"Image inference failed: {str(e)}")
234
  raise
235
 
236
  except Exception as e:
 
1
  """
2
+ Test Llama-2-7b-instruct model integration with vGPU.
3
+ Configure PyTorch to use vGPU as device for text generation.
4
  """
5
  import logging
6
  import os
7
  import time
8
  from contextlib import contextmanager
 
9
  from typing import Any, Optional
10
 
11
  import torch
 
12
  import torch.nn.functional as F
 
 
13
  from transformers import (
14
+ AutoTokenizer,
15
+ AutoModelForCausalLM,
16
+ TextStreamer
17
  )
18
  from virtual_vram import VirtualVRAM
19
  from http_storage import HTTPGPUStorage
 
64
  buffer_size += buffer.nelement() * buffer.element_size()
65
  return param_size + buffer_size
66
 
67
+ def prepare_prompt(instruction: str) -> str:
68
+ """Prepare a prompt for Llama-2 using its chat format."""
69
+ # Format: <s>[INST] instruction [/INST] assistant response </s>[INST] ...
70
+ return f"<s>[INST] {instruction} [/INST]"
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def test_ai_integration_http():
73
+ """Test Llama-2-7b-instruct model on vGPU with text generation"""
74
+ logger.info("Starting vGPU text generation test")
75
 
76
  status = {
77
  'model_loaded': False,
78
+ 'tokenizer_loaded': False,
79
  'model_on_vgpu': False,
80
+ 'generation_complete': False,
 
81
  'cleanup_success': False
82
  }
83
 
 
91
  device = setup_vgpu()
92
  logger.info(f"vGPU initialized with device {device}")
93
 
94
+ # Load Llama model and tokenizer
95
+ model_name = "meta-llama/Llama-2-7b-chat-hf"
96
  logger.info(f"Loading {model_name}")
97
 
98
  try:
99
  # Disable transformers logging temporarily
 
100
  transformers_logger = logging.getLogger("transformers")
101
  original_level = transformers_logger.level
102
  transformers_logger.setLevel(logging.ERROR)
103
 
104
  try:
105
+ # Load tokenizer first
106
+ tokenizer = AutoTokenizer.from_pretrained(
107
  model_name,
108
+ trust_remote_code=True,
109
+ use_fast=True
110
  )
111
+ status['tokenizer_loaded'] = True
112
 
113
+ # Load model with full precision
114
+ model = AutoModelForCausalLM.from_pretrained(
 
 
 
115
  model_name,
116
  trust_remote_code=True,
117
+ torch_dtype=torch.float32, # Use full precision
118
+ device_map=None, # Don't auto-map devices
119
+ use_safetensors=True
120
  )
121
  status['model_loaded'] = True
122
 
123
  # Log model details
124
+ logger.info(f"Tokenizer type: {type(tokenizer).__name__}")
125
  logger.info(f"Model type: {type(model).__name__}")
126
 
127
  # Log model architecture
 
155
  logger.error(f"Model transfer to vGPU failed: {str(e)}")
156
  raise
157
 
158
+ # Run text generation
159
+ logger.info("Running text generation...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  start = time.time()
161
  peak_mem = initial_mem
162
 
163
  try:
164
+ # Prepare input prompt
165
+ instruction = "Explain how virtual GPUs work in simple terms."
166
+ prompt = prepare_prompt(instruction)
167
+
168
+ # Tokenize input
169
+ inputs = tokenizer(prompt, return_tensors="pt")
170
+ inputs = {k: to_vgpu(v, vram=vram) for k, v in inputs.items()}
171
+
172
+ # Set up streamer for token-by-token output
173
+ streamer = TextStreamer(tokenizer)
174
+
175
  with torch.no_grad():
176
+ # Generate text
177
+ outputs = model.generate(
178
+ **inputs,
179
+ max_length=512,
180
+ temperature=0.7,
181
+ top_p=0.95,
182
+ top_k=40,
183
+ num_beams=1,
184
+ streamer=streamer,
185
+ pad_token_id=tokenizer.pad_token_id
186
+ )
187
 
188
  if hasattr(storage, 'get_used_memory'):
189
  peak_mem = max(peak_mem, storage.get_used_memory())
190
 
191
  inference_time = time.time() - start
192
+ status['generation_complete'] = True
193
 
194
  # Log performance metrics
195
+ logger.info(f"\nGeneration stats:")
196
  logger.info(f"- Time: {inference_time:.4f}s")
197
  logger.info(f"- Memory peak: {(peak_mem - initial_mem)/1e9:.2f} GB")
198
+ logger.info(f"- Output length: {len(outputs[0])}")
199
+ logger.info(f"- Output device: {outputs.device}")
 
 
 
 
 
 
 
 
200
 
201
  except Exception as e:
202
+ logger.error(f"Text generation failed: {str(e)}")
203
  raise
204
 
205
  except Exception as e: