Spaces:

tuandunghcmut
/

viscot-demo

Runtime error

dung-vpt-uney commited on Oct 12, 2025

Commit

420a658

1 Parent(s): 849a3f2

Update Visual-CoT demo - 2025-10-12 23:02:28

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (2) hide show

app.py +1 -1
llava/mm_utils.py +19 -2

app.py CHANGED Viewed

@@ -54,7 +54,7 @@ else:
 # Configuration
 # =============================================================================
-MODEL_PATH = "deepcs233/VisCoT-7b-336"  # Hugging Face model ID
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Benchmark datasets available

 # Configuration
 # =============================================================================
+MODEL_PATH = "deepcs233/VisCoT-7b-224"  # Hugging Face model ID (smallest version)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Benchmark datasets available

llava/mm_utils.py CHANGED Viewed

@@ -26,6 +26,8 @@ def expand2square(pil_img, background_color):
 def process_images(images, image_processor, model_cfg):
     image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
     new_images = []
@@ -36,13 +38,28 @@ def process_images(images, image_processor, model_cfg):
     else:
         processor = image_processor
     if image_aspect_ratio == 'pad':
-        for image in images:
             image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
             image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
             new_images.append(image)
     else:
-        return processor(images, return_tensors='pt')['pixel_values']
     if all(x.shape == new_images[0].shape for x in new_images):
         new_images = torch.stack(new_images, dim=0)
     return new_images

 def process_images(images, image_processor, model_cfg):
+    from PIL import Image
     image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
     new_images = []
     else:
         processor = image_processor
+    # Ensure all images are PIL Images
+    processed_images = []
+    for img in images:
+        if not isinstance(img, Image.Image):
+            # Convert to PIL Image if needed
+            if hasattr(img, 'convert'):
+                img = img.convert('RGB')
+            else:
+                raise ValueError(f"Invalid image type: {type(img)}")
+        processed_images.append(img)
     if image_aspect_ratio == 'pad':
+        for image in processed_images:
             image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
             image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
             new_images.append(image)
     else:
+        # Process each image individually to avoid batching issues
+        for image in processed_images:
+            processed = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
+            new_images.append(processed)
     if all(x.shape == new_images[0].shape for x in new_images):
         new_images = torch.stack(new_images, dim=0)
     return new_images