pratikshahp's picture
Update app.py
dc094a5 verified
import gradio as gr
from PIL import Image
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import easyocr
# Load the question-answering model and tokenizer
model_name = "openai-community/gpt2"
qa_model =AutoModelForCausalLM.from_pretrained(model_name)
qa_tokenizer = AutoTokenizer.from_pretrained(model_name)
reader = easyocr.Reader(['en'])
# Function to extract text from image using easyocr
def extract_text_from_image(image):
# Perform OCR on the image using easyocr
ocr_result = reader.readtext(image, detail=0)
text = " ".join(ocr_result)
return text
# Function to get response from the language model
def get_response_from_llm(extracted_text, question):
# Prepare the input for the model
input_text = f"question: {question} context: {extracted_text}"
inputs = qa_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
# Generate the response
with torch.no_grad():
outputs = qa_model.generate(inputs, max_length=150, num_return_sequences=1)
# Decode the response
response = qa_tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Define the Gradio interface function
def gradio_interface(image, question):
if image is None:
return "Please upload an image."
elif question.strip() == "":
return "Please enter a question."
else:
extracted_text = extract_text_from_image(image)
response = get_response_from_llm(extracted_text, question)
return response
# Gradio Interface
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.inputs.Image(type="pil", label="Upload Invoice Image"),
gr.inputs.Textbox(lines=2, placeholder="Enter your question about the invoice", label="Question")
],
outputs=gr.outputs.Textbox(label="Extracted Information"),
title="Invoice Extractor",
description="Upload your invoice image and ask a question about it."
)
# Launch the Gradio app
iface.launch()