foxtrot126 commited on
Commit
884a96b
·
verified ·
1 Parent(s): 0cd7c51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -12,14 +12,14 @@ model = AutoModelForVisualQuestionAnswering.from_pretrained("Salesforce/blip-vqa
12
  # Define inference function
13
  def process_image(image, prompt):
14
  # Process the image and prompt using the processor
15
- inputs = processor(image.convert("RGB"), prompt, return_tensors="pt")
16
 
17
  try:
18
  # Generate output from the model
19
- output = model.generate(**inputs, max_new_tokens=20)
20
 
21
  # Decode and return the output
22
- decoded_output = processor.decode(output[0], skip_special_tokens=True)
23
 
24
  # Return the answer (exclude the prompt part from output)
25
  return decoded_output[len(prompt):]
 
12
  # Define inference function
13
  def process_image(image, prompt):
14
  # Process the image and prompt using the processor
15
+ inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
16
 
17
  try:
18
  # Generate output from the model
19
+ output = model.generate(**inputs, max_new_tokens=10)
20
 
21
  # Decode and return the output
22
+ decoded_output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
23
 
24
  # Return the answer (exclude the prompt part from output)
25
  return decoded_output[len(prompt):]