Spaces:
Runtime error
Runtime error
dung-vpt-uney
commited on
Commit
·
420a658
1
Parent(s):
849a3f2
Update Visual-CoT demo - 2025-10-12 23:02:28
Browse filesFixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script
- app.py +1 -1
- llava/mm_utils.py +19 -2
app.py
CHANGED
|
@@ -54,7 +54,7 @@ else:
|
|
| 54 |
# Configuration
|
| 55 |
# =============================================================================
|
| 56 |
|
| 57 |
-
MODEL_PATH = "deepcs233/VisCoT-7b-
|
| 58 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 59 |
|
| 60 |
# Benchmark datasets available
|
|
|
|
| 54 |
# Configuration
|
| 55 |
# =============================================================================
|
| 56 |
|
| 57 |
+
MODEL_PATH = "deepcs233/VisCoT-7b-224" # Hugging Face model ID (smallest version)
|
| 58 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 59 |
|
| 60 |
# Benchmark datasets available
|
llava/mm_utils.py
CHANGED
|
@@ -26,6 +26,8 @@ def expand2square(pil_img, background_color):
|
|
| 26 |
|
| 27 |
|
| 28 |
def process_images(images, image_processor, model_cfg):
|
|
|
|
|
|
|
| 29 |
image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
|
| 30 |
new_images = []
|
| 31 |
|
|
@@ -36,13 +38,28 @@ def process_images(images, image_processor, model_cfg):
|
|
| 36 |
else:
|
| 37 |
processor = image_processor
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
if image_aspect_ratio == 'pad':
|
| 40 |
-
for image in
|
| 41 |
image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
|
| 42 |
image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
|
| 43 |
new_images.append(image)
|
| 44 |
else:
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
if all(x.shape == new_images[0].shape for x in new_images):
|
| 47 |
new_images = torch.stack(new_images, dim=0)
|
| 48 |
return new_images
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
def process_images(images, image_processor, model_cfg):
|
| 29 |
+
from PIL import Image
|
| 30 |
+
|
| 31 |
image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
|
| 32 |
new_images = []
|
| 33 |
|
|
|
|
| 38 |
else:
|
| 39 |
processor = image_processor
|
| 40 |
|
| 41 |
+
# Ensure all images are PIL Images
|
| 42 |
+
processed_images = []
|
| 43 |
+
for img in images:
|
| 44 |
+
if not isinstance(img, Image.Image):
|
| 45 |
+
# Convert to PIL Image if needed
|
| 46 |
+
if hasattr(img, 'convert'):
|
| 47 |
+
img = img.convert('RGB')
|
| 48 |
+
else:
|
| 49 |
+
raise ValueError(f"Invalid image type: {type(img)}")
|
| 50 |
+
processed_images.append(img)
|
| 51 |
+
|
| 52 |
if image_aspect_ratio == 'pad':
|
| 53 |
+
for image in processed_images:
|
| 54 |
image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
|
| 55 |
image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
|
| 56 |
new_images.append(image)
|
| 57 |
else:
|
| 58 |
+
# Process each image individually to avoid batching issues
|
| 59 |
+
for image in processed_images:
|
| 60 |
+
processed = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
|
| 61 |
+
new_images.append(processed)
|
| 62 |
+
|
| 63 |
if all(x.shape == new_images[0].shape for x in new_images):
|
| 64 |
new_images = torch.stack(new_images, dim=0)
|
| 65 |
return new_images
|