Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 14, 2025

Commit

90c82de

verified ·

1 Parent(s): 1980145

Upload 29 files

Browse files

Files changed (1) hide show

test_ai_integration_http.py +98 -26

test_ai_integration_http.py CHANGED Viewed

@@ -1,18 +1,44 @@
 """
 Test Florence-2-Large model integration with vGPU.
-Configure PyTorch to use vGPU as device and run model inference.
 """
 import logging
 import time
 from contextlib import contextmanager
 import torch
 from torch import nn
-from transformers import AutoModel, AutoTokenizer
 from virtual_vram import VirtualVRAM
 from http_storage import HTTPGPUStorage
 from torch_vgpu import VGPUDevice, to_vgpu
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -42,13 +68,32 @@ def get_model_size(model):
         buffer_size += buffer.nelement() * buffer.element_size()
     return param_size + buffer_size
 def test_ai_integration_http():
-    """Test Florence-2-Large model on vGPU with PyTorch integration"""
-    logger.info("Starting vGPU integration test")
     status = {
         'model_loaded': False,
         'model_on_vgpu': False,
         'inference_complete': False,
         'cleanup_success': False
     }
@@ -61,13 +106,20 @@ def test_ai_integration_http():
             device = VGPUDevice(vram=vram)
             logger.info("vGPU device initialized with HTTP storage backend")
-            # Load Florence model with verification
             model_name = "microsoft/florence-2-large"
             logger.info(f"Loading {model_name}")
             try:
-                tokenizer = AutoTokenizer.from_pretrained(model_name)
-                model = AutoModel.from_pretrained(model_name)
                 status['model_loaded'] = True
                 # Log model architecture
@@ -95,42 +147,62 @@ def test_ai_integration_http():
                 logger.error(f"Model transfer to vGPU failed: {str(e)}")
                 raise
-            # Prepare and validate input
-            text = "Testing inference on vGPU device"
             try:
-                inputs = tokenizer(text, return_tensors="pt")
-                if not inputs or not all(k in inputs for k in ['input_ids', 'attention_mask']):
-                    raise ValueError("Invalid tokenizer output")
                 # Move inputs to vGPU
                 inputs = {k: to_vgpu(v, vram=vram) for k, v in inputs.items()}
-                logger.info(f"Input sequence length: {inputs['input_ids'].size(1)}")
             except Exception as e:
-                logger.error(f"Input preparation failed: {str(e)}")
                 raise
-            # Run inference with monitoring
-            logger.info("Running inference...")
             start = time.time()
             peak_mem = initial_mem
             try:
                 with torch.no_grad():
                     outputs = model(**inputs)
                     if hasattr(storage, 'get_used_memory'):
                         peak_mem = max(peak_mem, storage.get_used_memory())
-                inference_time = time.time() - start
-                status['inference_complete'] = True
-                # Log performance metrics
-                logger.info(f"Inference stats:")
-                logger.info(f"- Time: {inference_time:.4f}s")
-                logger.info(f"- Memory peak: {(peak_mem - initial_mem)/1e9:.2f} GB")
-                logger.info(f"- Output shape: {outputs.last_hidden_state.shape}")
-                logger.info(f"- Output device: {outputs.last_hidden_state.device}")
             except Exception as e:
-                logger.error(f"Inference failed: {str(e)}")
                 raise
         except Exception as e:

 """
 Test Florence-2-Large model integration with vGPU.
+Configure PyTorch to use vGPU as device and run image inference.
 """
 import logging
+import os
 import time
 from contextlib import contextmanager
+from io import BytesIO
 import torch
 from torch import nn
+import torch.nn.functional as F
+from PIL import Image
+from transformers import (
+    AutoTokenizer,
+    Florence2ForConditionalGeneration,
+    Florence2Processor
+)
 from virtual_vram import VirtualVRAM
 from http_storage import HTTPGPUStorage
 from torch_vgpu import VGPUDevice, to_vgpu
+# Register vGPU device type
+def register_vgpu_device():
+    """Register vGPU as a custom device type"""
+    try:
+        if hasattr(torch.backends, 'register_custom_device'):
+            torch.backends.register_custom_device("vgpu", VGPUDevice)
+        else:
+            # Fallback: Add device type to torch._C
+            if not hasattr(torch._C, "_vgpu_device"):
+                torch._C._vgpu_device = VGPUDevice
+            logger.info("Using fallback vGPU device registration")
+    except Exception as e:
+        logger.error(f"vGPU device registration failed: {str(e)}")
+        raise
+# Register vGPU device
+register_vgpu_device()
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
         buffer_size += buffer.nelement() * buffer.element_size()
     return param_size + buffer_size
+def load_image(image_name):
+    """Load and preprocess image from sample_task folder"""
+    try:
+        image_path = os.path.join("sample_task", image_name)
+        if not os.path.exists(image_path):
+            raise FileNotFoundError(f"Image not found: {image_path}")
+        image = Image.open(image_path)
+        # Convert to RGB if needed
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        logger.info(f"Loaded image from {image_path}: size={image.size}")
+        return image
+    except Exception as e:
+        logger.error(f"Image loading failed: {str(e)}")
+        raise
 def test_ai_integration_http():
+    """Test Florence-2-Large model on vGPU with image inference"""
+    logger.info("Starting vGPU image inference test")
     status = {
         'model_loaded': False,
+        'processor_loaded': False,
         'model_on_vgpu': False,
+        'image_processed': False,
         'inference_complete': False,
         'cleanup_success': False
     }
             device = VGPUDevice(vram=vram)
             logger.info("vGPU device initialized with HTTP storage backend")
+            # Load Florence model and processor
             model_name = "microsoft/florence-2-large"
             logger.info(f"Loading {model_name}")
             try:
+                processor = Florence2Processor.from_pretrained(
+                    model_name,
+                    trust_remote_code=True
+                )
+                model = Florence2ForConditionalGeneration.from_pretrained(
+                    model_name,
+                    trust_remote_code=True
+                )
+                status['processor_loaded'] = True
                 status['model_loaded'] = True
                 # Log model architecture
                 logger.error(f"Model transfer to vGPU failed: {str(e)}")
                 raise
+            # Prepare image input from sample_task folder
             try:
+                # Load image from sample_task directory
+                image_name = "sample1.jpg"  # Replace with your image name
+                image = load_image(image_name)
+                # Process image with Florence processor
+                inputs = processor(images=image, return_tensors="pt")
+                if not inputs or 'pixel_values' not in inputs:
+                    raise ValueError("Invalid processor output")
                 # Move inputs to vGPU
                 inputs = {k: to_vgpu(v, vram=vram) for k, v in inputs.items()}
+                status['image_processed'] = True
+                logger.info(f"Image processed: shape={inputs['pixel_values'].shape}")
             except Exception as e:
+                logger.error(f"Image preparation failed: {str(e)}")
                 raise
+            # Run image inference with monitoring
+            logger.info("Running image inference...")
             start = time.time()
             peak_mem = initial_mem
             try:
                 with torch.no_grad():
+                    # Get image embeddings
                     outputs = model(**inputs)
+                    image_features = outputs.last_hidden_state[:, 0]  # Take [CLS] token features
+                    # Normalize features
+                    image_features = F.normalize(image_features, dim=-1)
                     if hasattr(storage, 'get_used_memory'):
                         peak_mem = max(peak_mem, storage.get_used_memory())
+                    inference_time = time.time() - start
+                    status['inference_complete'] = True
+                    # Log performance metrics
+                    logger.info(f"Inference stats:")
+                    logger.info(f"- Time: {inference_time:.4f}s")
+                    logger.info(f"- Memory peak: {(peak_mem - initial_mem)/1e9:.2f} GB")
+                    logger.info(f"- Image features shape: {image_features.shape}")
+                    logger.info(f"- Feature norm: {torch.norm(image_features).item():.4f}")
+                    logger.info(f"- Output device: {image_features.device}")
+                    # Optionally compute confidence scores
+                    if hasattr(outputs, 'logits'):
+                        logits = outputs.logits
+                        probs = F.softmax(logits, dim=-1)
+                        confidence = torch.max(probs).item()
+                        logger.info(f"- Confidence: {confidence:.4f}")
             except Exception as e:
+                logger.error(f"Image inference failed: {str(e)}")
                 raise
         except Exception as e: