rishabsinghh's picture
Changes
1055490
import gradio as gr
from transformers import LayoutLMv2ForQuestionAnswering, LayoutLMv2Processor
import torch
from PIL import Image
# Load the model and processor
model_name = "impira/layoutlm-document-qa"
model = LayoutLMv2ForQuestionAnswering.from_pretrained(model_name)
processor = LayoutLMv2Processor.from_pretrained(model_name)
def predict(image, question):
# Process the image and question
encoding = processor(image, question, return_tensors="pt")
with torch.no_grad():
outputs = model(**encoding)
# Extract the answer
start_logits = outputs.start_logits
end_logits = outputs.end_logits
start_index = torch.argmax(start_logits)
end_index = torch.argmax(end_logits)
answer = processor.tokenizer.decode(encoding.input_ids[0][start_index:end_index+1])
return answer
# Create a Gradio interface
interface = gr.Interface(
fn=predict,
inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(lines=1, placeholder="Enter your question")],
outputs="text",
title="Document Question Answering with LayoutLM",
description="Ask questions about the content of a document."
)
# Launch the interface
interface.launch()