import os import gradio as gr import torch from PIL import Image import traceback # Disable tokenizers parallelism to avoid warnings os.environ["TOKENIZERS_PARALLELISM"] = "false" # Use docquery library instead of direct transformer usage # This should handle the model correctly as shown in your reference code from docquery import pipeline from docquery.document import ImageDocument from docquery.ocr_reader import get_ocr_reader # Global variables MODEL_NAME = "LayoutLMv1 for Invoices" CHECKPOINT = "impira/layoutlm-invoices" PIPELINES = {} def construct_pipeline(model_name): """Create and cache a document QA pipeline""" global PIPELINES if model_name in PIPELINES: return PIPELINES[model_name] device = "cuda" if torch.cuda.is_available() else "cpu" qa_pipeline = pipeline( task="document-question-answering", model=CHECKPOINT, device=device ) PIPELINES[model_name] = qa_pipeline return qa_pipeline def process_document(file): """Process the uploaded document""" if file is None: return None, gr.update(visible=False) try: # Open the image image = Image.open(file.name) if image.mode != "RGB": image = image.convert("RGB") # Create a document using docquery document = ImageDocument(image, get_ocr_reader()) return document, gr.update(visible=True, value=image) except Exception as e: traceback.print_exc() return None, gr.update(visible=False, value=f"Error: {str(e)}") def answer_question(question, document): """Process question using the document QA pipeline""" if document is None or not question: return "Please upload a document and enter a question" try: # Get the pipeline qa_pipeline = construct_pipeline(MODEL_NAME) # Run question answering results = qa_pipeline(question=question, **document.context, top_k=1) # Extract the answer if results: answer = results[0]["answer"] return answer else: return "No answer found in the document" except Exception as e: traceback.print_exc() return f"Error processing document: {str(e)}" # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# Invoice Question Answering") gr.Markdown("Upload an invoice image and ask questions like 'What is the invoice number?', 'What is the total amount?', etc.") # Document storage document = gr.State(None) with gr.Row(): with gr.Column(): gr.Markdown("## 1. Upload a document") upload = gr.File(label="Upload Invoice Image") image_preview = gr.Image(label="Preview", visible=False) gr.Markdown("## 2. Ask a question") question = gr.Textbox( label="Question", placeholder="e.g. What is the invoice number?", lines=1 ) submit_button = gr.Button("Submit", variant="primary") with gr.Column(): gr.Markdown("## Results") answer_text = gr.Textbox(label="Answer", lines=2) # Set up event handlers upload.change( fn=process_document, inputs=[upload], outputs=[document, image_preview] ) submit_button.click( fn=answer_question, inputs=[question, document], outputs=[answer_text] ) # Also trigger on pressing Enter in question box question.submit( fn=answer_question, inputs=[question, document], outputs=[answer_text] ) if __name__ == "__main__": demo.launch()