Spaces:

foxtrot126
/

DATA460ass8

Sleeping

foxtrot126 commited on Nov 18

Commit

884a96b

verified ·

1 Parent(s): 0cd7c51

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,14 +12,14 @@ model = AutoModelForVisualQuestionAnswering.from_pretrained("Salesforce/blip-vqa
 # Define inference function
 def process_image(image, prompt):
    # Process the image and prompt using the processor
-   inputs = processor(image.convert("RGB"), prompt, return_tensors="pt")
    try:
        # Generate output from the model
-       output = model.generate(**inputs, max_new_tokens=20)
        # Decode and return the output
-       decoded_output = processor.decode(output[0], skip_special_tokens=True)
        # Return the answer (exclude the prompt part from output)
        return decoded_output[len(prompt):]

 # Define inference function
 def process_image(image, prompt):
    # Process the image and prompt using the processor
+   inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
    try:
        # Generate output from the model
+       output = model.generate(**inputs, max_new_tokens=10)
        # Decode and return the output
+       decoded_output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        # Return the answer (exclude the prompt part from output)
        return decoded_output[len(prompt):]