Factor Studios commited on
Commit
90c82de
·
verified ·
1 Parent(s): 1980145

Upload 29 files

Browse files
Files changed (1) hide show
  1. test_ai_integration_http.py +98 -26
test_ai_integration_http.py CHANGED
@@ -1,18 +1,44 @@
1
  """
2
  Test Florence-2-Large model integration with vGPU.
3
- Configure PyTorch to use vGPU as device and run model inference.
4
  """
5
  import logging
 
6
  import time
7
  from contextlib import contextmanager
 
8
 
9
  import torch
10
  from torch import nn
11
- from transformers import AutoModel, AutoTokenizer
 
 
 
 
 
 
12
  from virtual_vram import VirtualVRAM
13
  from http_storage import HTTPGPUStorage
14
  from torch_vgpu import VGPUDevice, to_vgpu
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Configure logging
17
  logging.basicConfig(
18
  level=logging.INFO,
@@ -42,13 +68,32 @@ def get_model_size(model):
42
  buffer_size += buffer.nelement() * buffer.element_size()
43
  return param_size + buffer_size
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def test_ai_integration_http():
46
- """Test Florence-2-Large model on vGPU with PyTorch integration"""
47
- logger.info("Starting vGPU integration test")
48
 
49
  status = {
50
  'model_loaded': False,
 
51
  'model_on_vgpu': False,
 
52
  'inference_complete': False,
53
  'cleanup_success': False
54
  }
@@ -61,13 +106,20 @@ def test_ai_integration_http():
61
  device = VGPUDevice(vram=vram)
62
  logger.info("vGPU device initialized with HTTP storage backend")
63
 
64
- # Load Florence model with verification
65
  model_name = "microsoft/florence-2-large"
66
  logger.info(f"Loading {model_name}")
67
 
68
  try:
69
- tokenizer = AutoTokenizer.from_pretrained(model_name)
70
- model = AutoModel.from_pretrained(model_name)
 
 
 
 
 
 
 
71
  status['model_loaded'] = True
72
 
73
  # Log model architecture
@@ -95,42 +147,62 @@ def test_ai_integration_http():
95
  logger.error(f"Model transfer to vGPU failed: {str(e)}")
96
  raise
97
 
98
- # Prepare and validate input
99
- text = "Testing inference on vGPU device"
100
  try:
101
- inputs = tokenizer(text, return_tensors="pt")
102
- if not inputs or not all(k in inputs for k in ['input_ids', 'attention_mask']):
103
- raise ValueError("Invalid tokenizer output")
 
 
 
 
 
104
 
105
  # Move inputs to vGPU
106
  inputs = {k: to_vgpu(v, vram=vram) for k, v in inputs.items()}
107
- logger.info(f"Input sequence length: {inputs['input_ids'].size(1)}")
 
108
  except Exception as e:
109
- logger.error(f"Input preparation failed: {str(e)}")
110
  raise
111
 
112
- # Run inference with monitoring
113
- logger.info("Running inference...")
114
  start = time.time()
115
  peak_mem = initial_mem
116
 
117
  try:
118
  with torch.no_grad():
 
119
  outputs = model(**inputs)
 
 
 
 
 
120
  if hasattr(storage, 'get_used_memory'):
121
  peak_mem = max(peak_mem, storage.get_used_memory())
122
 
123
- inference_time = time.time() - start
124
- status['inference_complete'] = True
125
-
126
- # Log performance metrics
127
- logger.info(f"Inference stats:")
128
- logger.info(f"- Time: {inference_time:.4f}s")
129
- logger.info(f"- Memory peak: {(peak_mem - initial_mem)/1e9:.2f} GB")
130
- logger.info(f"- Output shape: {outputs.last_hidden_state.shape}")
131
- logger.info(f"- Output device: {outputs.last_hidden_state.device}")
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
- logger.error(f"Inference failed: {str(e)}")
134
  raise
135
 
136
  except Exception as e:
 
1
  """
2
  Test Florence-2-Large model integration with vGPU.
3
+ Configure PyTorch to use vGPU as device and run image inference.
4
  """
5
  import logging
6
+ import os
7
  import time
8
  from contextlib import contextmanager
9
+ from io import BytesIO
10
 
11
  import torch
12
  from torch import nn
13
+ import torch.nn.functional as F
14
+ from PIL import Image
15
+ from transformers import (
16
+ AutoTokenizer,
17
+ Florence2ForConditionalGeneration,
18
+ Florence2Processor
19
+ )
20
  from virtual_vram import VirtualVRAM
21
  from http_storage import HTTPGPUStorage
22
  from torch_vgpu import VGPUDevice, to_vgpu
23
 
24
+ # Register vGPU device type
25
+ def register_vgpu_device():
26
+ """Register vGPU as a custom device type"""
27
+ try:
28
+ if hasattr(torch.backends, 'register_custom_device'):
29
+ torch.backends.register_custom_device("vgpu", VGPUDevice)
30
+ else:
31
+ # Fallback: Add device type to torch._C
32
+ if not hasattr(torch._C, "_vgpu_device"):
33
+ torch._C._vgpu_device = VGPUDevice
34
+ logger.info("Using fallback vGPU device registration")
35
+ except Exception as e:
36
+ logger.error(f"vGPU device registration failed: {str(e)}")
37
+ raise
38
+
39
+ # Register vGPU device
40
+ register_vgpu_device()
41
+
42
  # Configure logging
43
  logging.basicConfig(
44
  level=logging.INFO,
 
68
  buffer_size += buffer.nelement() * buffer.element_size()
69
  return param_size + buffer_size
70
 
71
+ def load_image(image_name):
72
+ """Load and preprocess image from sample_task folder"""
73
+ try:
74
+ image_path = os.path.join("sample_task", image_name)
75
+ if not os.path.exists(image_path):
76
+ raise FileNotFoundError(f"Image not found: {image_path}")
77
+
78
+ image = Image.open(image_path)
79
+ # Convert to RGB if needed
80
+ if image.mode != 'RGB':
81
+ image = image.convert('RGB')
82
+ logger.info(f"Loaded image from {image_path}: size={image.size}")
83
+ return image
84
+ except Exception as e:
85
+ logger.error(f"Image loading failed: {str(e)}")
86
+ raise
87
+
88
  def test_ai_integration_http():
89
+ """Test Florence-2-Large model on vGPU with image inference"""
90
+ logger.info("Starting vGPU image inference test")
91
 
92
  status = {
93
  'model_loaded': False,
94
+ 'processor_loaded': False,
95
  'model_on_vgpu': False,
96
+ 'image_processed': False,
97
  'inference_complete': False,
98
  'cleanup_success': False
99
  }
 
106
  device = VGPUDevice(vram=vram)
107
  logger.info("vGPU device initialized with HTTP storage backend")
108
 
109
+ # Load Florence model and processor
110
  model_name = "microsoft/florence-2-large"
111
  logger.info(f"Loading {model_name}")
112
 
113
  try:
114
+ processor = Florence2Processor.from_pretrained(
115
+ model_name,
116
+ trust_remote_code=True
117
+ )
118
+ model = Florence2ForConditionalGeneration.from_pretrained(
119
+ model_name,
120
+ trust_remote_code=True
121
+ )
122
+ status['processor_loaded'] = True
123
  status['model_loaded'] = True
124
 
125
  # Log model architecture
 
147
  logger.error(f"Model transfer to vGPU failed: {str(e)}")
148
  raise
149
 
150
+ # Prepare image input from sample_task folder
 
151
  try:
152
+ # Load image from sample_task directory
153
+ image_name = "sample1.jpg" # Replace with your image name
154
+ image = load_image(image_name)
155
+
156
+ # Process image with Florence processor
157
+ inputs = processor(images=image, return_tensors="pt")
158
+ if not inputs or 'pixel_values' not in inputs:
159
+ raise ValueError("Invalid processor output")
160
 
161
  # Move inputs to vGPU
162
  inputs = {k: to_vgpu(v, vram=vram) for k, v in inputs.items()}
163
+ status['image_processed'] = True
164
+ logger.info(f"Image processed: shape={inputs['pixel_values'].shape}")
165
  except Exception as e:
166
+ logger.error(f"Image preparation failed: {str(e)}")
167
  raise
168
 
169
+ # Run image inference with monitoring
170
+ logger.info("Running image inference...")
171
  start = time.time()
172
  peak_mem = initial_mem
173
 
174
  try:
175
  with torch.no_grad():
176
+ # Get image embeddings
177
  outputs = model(**inputs)
178
+ image_features = outputs.last_hidden_state[:, 0] # Take [CLS] token features
179
+
180
+ # Normalize features
181
+ image_features = F.normalize(image_features, dim=-1)
182
+
183
  if hasattr(storage, 'get_used_memory'):
184
  peak_mem = max(peak_mem, storage.get_used_memory())
185
 
186
+ inference_time = time.time() - start
187
+ status['inference_complete'] = True
188
+
189
+ # Log performance metrics
190
+ logger.info(f"Inference stats:")
191
+ logger.info(f"- Time: {inference_time:.4f}s")
192
+ logger.info(f"- Memory peak: {(peak_mem - initial_mem)/1e9:.2f} GB")
193
+ logger.info(f"- Image features shape: {image_features.shape}")
194
+ logger.info(f"- Feature norm: {torch.norm(image_features).item():.4f}")
195
+ logger.info(f"- Output device: {image_features.device}")
196
+
197
+ # Optionally compute confidence scores
198
+ if hasattr(outputs, 'logits'):
199
+ logits = outputs.logits
200
+ probs = F.softmax(logits, dim=-1)
201
+ confidence = torch.max(probs).item()
202
+ logger.info(f"- Confidence: {confidence:.4f}")
203
+
204
  except Exception as e:
205
+ logger.error(f"Image inference failed: {str(e)}")
206
  raise
207
 
208
  except Exception as e: