dung-vpt-uney commited on
Commit
420a658
·
1 Parent(s): 849a3f2

Update Visual-CoT demo - 2025-10-12 23:02:28

Browse files

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (2) hide show
  1. app.py +1 -1
  2. llava/mm_utils.py +19 -2
app.py CHANGED
@@ -54,7 +54,7 @@ else:
54
  # Configuration
55
  # =============================================================================
56
 
57
- MODEL_PATH = "deepcs233/VisCoT-7b-336" # Hugging Face model ID
58
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
59
 
60
  # Benchmark datasets available
 
54
  # Configuration
55
  # =============================================================================
56
 
57
+ MODEL_PATH = "deepcs233/VisCoT-7b-224" # Hugging Face model ID (smallest version)
58
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
59
 
60
  # Benchmark datasets available
llava/mm_utils.py CHANGED
@@ -26,6 +26,8 @@ def expand2square(pil_img, background_color):
26
 
27
 
28
  def process_images(images, image_processor, model_cfg):
 
 
29
  image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
30
  new_images = []
31
 
@@ -36,13 +38,28 @@ def process_images(images, image_processor, model_cfg):
36
  else:
37
  processor = image_processor
38
 
 
 
 
 
 
 
 
 
 
 
 
39
  if image_aspect_ratio == 'pad':
40
- for image in images:
41
  image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
42
  image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
43
  new_images.append(image)
44
  else:
45
- return processor(images, return_tensors='pt')['pixel_values']
 
 
 
 
46
  if all(x.shape == new_images[0].shape for x in new_images):
47
  new_images = torch.stack(new_images, dim=0)
48
  return new_images
 
26
 
27
 
28
  def process_images(images, image_processor, model_cfg):
29
+ from PIL import Image
30
+
31
  image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
32
  new_images = []
33
 
 
38
  else:
39
  processor = image_processor
40
 
41
+ # Ensure all images are PIL Images
42
+ processed_images = []
43
+ for img in images:
44
+ if not isinstance(img, Image.Image):
45
+ # Convert to PIL Image if needed
46
+ if hasattr(img, 'convert'):
47
+ img = img.convert('RGB')
48
+ else:
49
+ raise ValueError(f"Invalid image type: {type(img)}")
50
+ processed_images.append(img)
51
+
52
  if image_aspect_ratio == 'pad':
53
+ for image in processed_images:
54
  image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
55
  image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
56
  new_images.append(image)
57
  else:
58
+ # Process each image individually to avoid batching issues
59
+ for image in processed_images:
60
+ processed = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
61
+ new_images.append(processed)
62
+
63
  if all(x.shape == new_images[0].shape for x in new_images):
64
  new_images = torch.stack(new_images, dim=0)
65
  return new_images