Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import ViltProcessor, ViltForQuestionAnswering | |
| from PIL import Image | |
| import torch | |
| # Load the processor and model | |
| processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200") | |
| model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| def predict(image, question): | |
| # prepare inputs | |
| inputs = processor(image, question, return_tensors="pt").to(device) | |
| # forward pass | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| idx = logits.argmax(-1).item() | |
| predicted_answer = model.config.id2label[idx] | |
| return predicted_answer | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.Image(type="pil"), | |
| gr.Textbox(lines=1, placeholder="Enter your question here..."), | |
| ], | |
| outputs="text", | |
| title="Visual Question Answering with Fine-tuned Vilt", | |
| description="Upload an image and ask a question about it!", | |
| ) | |
| # Launch the interface | |
| iface.launch(share=True) # Set share=True to share the space |