anirudh-valyx commited on
Commit
8a3d98d
·
1 Parent(s): 0474ce4

fix input text

Browse files
Files changed (1) hide show
  1. app.py +56 -14
app.py CHANGED
@@ -7,28 +7,70 @@ processor = AutoProcessor.from_pretrained("impira/layoutlm-invoices")
7
  model = AutoModelForDocumentQuestionAnswering.from_pretrained("impira/layoutlm-invoices")
8
 
9
  def answer_question(image, question):
 
 
 
 
 
 
 
 
 
 
10
  # Ensure RGB mode
 
 
 
 
 
 
11
  if image.mode != "RGB":
12
  image = image.convert("RGB")
13
-
14
- inputs = processor(image, question, return_tensors="pt")
15
- outputs = model(**inputs)
16
-
17
- start = outputs.start_logits.argmax(-1).item()
18
- end = outputs.end_logits.argmax(-1).item() + 1
19
-
20
- answer = processor.tokenizer.decode(inputs["input_ids"][0][start:end])
21
 
22
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
24
  iface = gr.Interface(
25
  fn=answer_question,
26
  inputs=[
27
- gr.Image(type="pil", label="Document Image"),
28
- gr.Textbox(label="Question")
 
 
 
 
 
 
 
 
29
  ],
30
- outputs="text",
31
- title="LayoutLM Invoice QA"
32
  )
33
 
34
- iface.launch()
 
 
 
7
  model = AutoModelForDocumentQuestionAnswering.from_pretrained("impira/layoutlm-invoices")
8
 
9
  def answer_question(image, question):
10
+ """
11
+ Process an invoice image and answer a question about its content
12
+
13
+ Args:
14
+ image: PIL image of the invoice
15
+ question: String question about the invoice
16
+
17
+ Returns:
18
+ String answer extracted from the invoice
19
+ """
20
  # Ensure RGB mode
21
+ if image is None:
22
+ return "Please upload an image"
23
+
24
+ if question is None or question.strip() == "":
25
+ return "Please enter a question"
26
+
27
  if image.mode != "RGB":
28
  image = image.convert("RGB")
 
 
 
 
 
 
 
 
29
 
30
+ # Ensure question is a string (the error was likely here)
31
+ if not isinstance(question, str):
32
+ question = str(question)
33
+
34
+ try:
35
+ # Process the image and question
36
+ inputs = processor(image, question=question, return_tensors="pt")
37
+
38
+ # Get model predictions
39
+ outputs = model(**inputs)
40
+
41
+ # Extract answer
42
+ start = outputs.start_logits.argmax(-1).item()
43
+ end = outputs.end_logits.argmax(-1).item() + 1
44
+ answer = processor.tokenizer.decode(inputs["input_ids"][0][start:end])
45
+
46
+ # Clean up answer (remove special tokens if present)
47
+ answer = answer.replace("[CLS]", "").replace("[SEP]", "").strip()
48
+
49
+ if not answer:
50
+ return "No answer found in the document"
51
+
52
+ return answer
53
+ except Exception as e:
54
+ return f"Error processing document: {str(e)}"
55
 
56
+ # Create Gradio interface
57
  iface = gr.Interface(
58
  fn=answer_question,
59
  inputs=[
60
+ gr.Image(type="pil", label="Upload Invoice Image"),
61
+ gr.Textbox(placeholder="Ask a question about the invoice...", label="Question")
62
+ ],
63
+ outputs=gr.Textbox(label="Answer"),
64
+ title="Invoice Question Answering with LayoutLM",
65
+ description="Upload an invoice image and ask questions like 'What is the invoice number?', 'What is the total amount?', 'Who is the vendor?', etc.",
66
+ examples=[
67
+ ["invoice_sample.jpg", "What is the invoice number?"],
68
+ ["invoice_sample.jpg", "What is the total amount?"],
69
+ ["invoice_sample.jpg", "What is the date?"]
70
  ],
71
+ allow_flagging="never"
 
72
  )
73
 
74
+ # Launch the app
75
+ if __name__ == "__main__":
76
+ iface.launch()