Hug0endob commited on
Commit
35d219a
·
verified ·
1 Parent(s): 7aed240

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -54,7 +54,7 @@ processor = AutoProcessor.from_pretrained(
54
  num_additional_image_tokens=1,
55
  **({} if not HF_TOKEN else {"token": HF_TOKEN})
56
  )
57
- # Use float32 on CPU; if CPU-only, torch.bfloat16 may not be supported
58
  llava_model = LlavaForConditionalGeneration.from_pretrained(
59
  MODEL_NAME,
60
  device_map="cpu",
@@ -83,14 +83,13 @@ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") ->
83
  except Exception as e:
84
  return f"Image processing error: {e}"
85
 
86
- # Resize to a conservative size (512) expected by many VLMs
87
  try:
88
  img = img.resize((512, 512), resample=Image.BICUBIC)
89
  except Exception:
90
  pass
91
 
92
  try:
93
- # Use chat-like conversation so processor inserts image token correctly
94
  conversation = [
95
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt}]}
96
  ]
@@ -106,7 +105,7 @@ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") ->
106
  if "pixel_values" in inputs:
107
  inputs["pixel_values"] = inputs["pixel_values"].to(dtype=llava_model.dtype, device=device)
108
 
109
- # Debug prints (will appear in Space logs)
110
  if "pixel_values" in inputs:
111
  print("pixel_values.shape:", inputs["pixel_values"].shape)
112
  if "input_ids" in inputs:
 
54
  num_additional_image_tokens=1,
55
  **({} if not HF_TOKEN else {"token": HF_TOKEN})
56
  )
57
+ # CPU Space -> use float32
58
  llava_model = LlavaForConditionalGeneration.from_pretrained(
59
  MODEL_NAME,
60
  device_map="cpu",
 
83
  except Exception as e:
84
  return f"Image processing error: {e}"
85
 
86
+ # Resize to conservative default
87
  try:
88
  img = img.resize((512, 512), resample=Image.BICUBIC)
89
  except Exception:
90
  pass
91
 
92
  try:
 
93
  conversation = [
94
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt}]}
95
  ]
 
105
  if "pixel_values" in inputs:
106
  inputs["pixel_values"] = inputs["pixel_values"].to(dtype=llava_model.dtype, device=device)
107
 
108
+ # Minimal debug info (appears in Space logs)
109
  if "pixel_values" in inputs:
110
  print("pixel_values.shape:", inputs["pixel_values"].shape)
111
  if "input_ids" in inputs: