import gradio as gr from PIL import Image from transformers import AutoTokenizer, AutoModelForCausalLM import torch import easyocr # Load the question-answering model and tokenizer model_name = "openai-community/gpt2" qa_model =AutoModelForCausalLM.from_pretrained(model_name) qa_tokenizer = AutoTokenizer.from_pretrained(model_name) reader = easyocr.Reader(['en']) # Function to extract text from image using easyocr def extract_text_from_image(image): # Perform OCR on the image using easyocr ocr_result = reader.readtext(image, detail=0) text = " ".join(ocr_result) return text # Function to get response from the language model def get_response_from_llm(extracted_text, question): # Prepare the input for the model input_text = f"question: {question} context: {extracted_text}" inputs = qa_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) # Generate the response with torch.no_grad(): outputs = qa_model.generate(inputs, max_length=150, num_return_sequences=1) # Decode the response response = qa_tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Define the Gradio interface function def gradio_interface(image, question): if image is None: return "Please upload an image." elif question.strip() == "": return "Please enter a question." else: extracted_text = extract_text_from_image(image) response = get_response_from_llm(extracted_text, question) return response # Gradio Interface iface = gr.Interface( fn=gradio_interface, inputs=[ gr.inputs.Image(type="pil", label="Upload Invoice Image"), gr.inputs.Textbox(lines=2, placeholder="Enter your question about the invoice", label="Question") ], outputs=gr.outputs.Textbox(label="Extracted Information"), title="Invoice Extractor", description="Upload your invoice image and ask a question about it." ) # Launch the Gradio app iface.launch()