dung-vpt-uney commited on
Commit
3461177
·
1 Parent(s): 2aa3056

Update Visual-CoT demo - 2025-10-12 22:46:22

Browse files

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (1) hide show
  1. llava/mm_utils.py +11 -3
llava/mm_utils.py CHANGED
@@ -28,13 +28,21 @@ def expand2square(pil_img, background_color):
28
  def process_images(images, image_processor, model_cfg):
29
  image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
30
  new_images = []
 
 
 
 
 
 
 
 
31
  if image_aspect_ratio == 'pad':
32
  for image in images:
33
- image = expand2square(image, tuple(int(x*255) for x in image_processor.image_mean))
34
- image = image_processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
35
  new_images.append(image)
36
  else:
37
- return image_processor(images, return_tensors='pt')['pixel_values']
38
  if all(x.shape == new_images[0].shape for x in new_images):
39
  new_images = torch.stack(new_images, dim=0)
40
  return new_images
 
28
  def process_images(images, image_processor, model_cfg):
29
  image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
30
  new_images = []
31
+
32
+ # Handle both single processor and list of processors (multi-scale vision)
33
+ if isinstance(image_processor, list):
34
+ # Multi-scale: use first processor for preprocessing
35
+ processor = image_processor[0]
36
+ else:
37
+ processor = image_processor
38
+
39
  if image_aspect_ratio == 'pad':
40
  for image in images:
41
+ image = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
42
+ image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
43
  new_images.append(image)
44
  else:
45
+ return processor(images, return_tensors='pt')['pixel_values']
46
  if all(x.shape == new_images[0].shape for x in new_images):
47
  new_images = torch.stack(new_images, dim=0)
48
  return new_images