Spaces:

valyx
/

layout-lm

Runtime error

App Files Files Community

anirudh-valyx commited on May 15, 2025

Commit

8a3d98d

1 Parent(s): 0474ce4

fix input text

Browse files

Files changed (1) hide show

app.py +56 -14

app.py CHANGED Viewed

@@ -7,28 +7,70 @@ processor = AutoProcessor.from_pretrained("impira/layoutlm-invoices")
 model = AutoModelForDocumentQuestionAnswering.from_pretrained("impira/layoutlm-invoices")
 def answer_question(image, question):
     # Ensure RGB mode
     if image.mode != "RGB":
         image = image.convert("RGB")
-    inputs = processor(image, question, return_tensors="pt")
-    outputs = model(**inputs)
-    start = outputs.start_logits.argmax(-1).item()
-    end = outputs.end_logits.argmax(-1).item() + 1
-    answer = processor.tokenizer.decode(inputs["input_ids"][0][start:end])
-    return answer
 iface = gr.Interface(
     fn=answer_question,
     inputs=[
-        gr.Image(type="pil", label="Document Image"),
-        gr.Textbox(label="Question")
     ],
-    outputs="text",
-    title="LayoutLM Invoice QA"
 )
-iface.launch()

 model = AutoModelForDocumentQuestionAnswering.from_pretrained("impira/layoutlm-invoices")
 def answer_question(image, question):
+    """
+    Process an invoice image and answer a question about its content
+    Args:
+        image: PIL image of the invoice
+        question: String question about the invoice
+    Returns:
+        String answer extracted from the invoice
+    """
     # Ensure RGB mode
+    if image is None:
+        return "Please upload an image"
+    if question is None or question.strip() == "":
+        return "Please enter a question"
     if image.mode != "RGB":
         image = image.convert("RGB")
+    # Ensure question is a string (the error was likely here)
+    if not isinstance(question, str):
+        question = str(question)
+    try:
+        # Process the image and question
+        inputs = processor(image, question=question, return_tensors="pt")
+        # Get model predictions
+        outputs = model(**inputs)
+        # Extract answer
+        start = outputs.start_logits.argmax(-1).item()
+        end = outputs.end_logits.argmax(-1).item() + 1
+        answer = processor.tokenizer.decode(inputs["input_ids"][0][start:end])
+        # Clean up answer (remove special tokens if present)
+        answer = answer.replace("[CLS]", "").replace("[SEP]", "").strip()
+        if not answer:
+            return "No answer found in the document"
+        return answer
+    except Exception as e:
+        return f"Error processing document: {str(e)}"
+# Create Gradio interface
 iface = gr.Interface(
     fn=answer_question,
     inputs=[
+        gr.Image(type="pil", label="Upload Invoice Image"),
+        gr.Textbox(placeholder="Ask a question about the invoice...", label="Question")
+    ],
+    outputs=gr.Textbox(label="Answer"),
+    title="Invoice Question Answering with LayoutLM",
+    description="Upload an invoice image and ask questions like 'What is the invoice number?', 'What is the total amount?', 'Who is the vendor?', etc.",
+    examples=[
+        ["invoice_sample.jpg", "What is the invoice number?"],
+        ["invoice_sample.jpg", "What is the total amount?"],
+        ["invoice_sample.jpg", "What is the date?"]
     ],
+    allow_flagging="never"
 )
+# Launch the app
+if __name__ == "__main__":
+    iface.launch()