rosemariafontana commited on
Commit
435050e
Β·
verified Β·
1 Parent(s): cdcfcc9

Updates to make this about grain tickets

Browse files
Files changed (1) hide show
  1. app.py +82 -40
app.py CHANGED
@@ -1,65 +1,107 @@
1
  import gradio as gr
2
  import pandas as pd
3
 
4
- from transformers import pipeline
 
 
 
5
 
6
 
7
  # Chatbot model
8
- model = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def parse_ticket_image(image, question):
12
  """Basically just runs through these questions for the document"""
 
 
 
 
 
 
 
 
 
13
  # Define questions you want to ask the model
14
 
15
  questions = [
16
- {"question": "What is the ticket number?", "context": image},
17
- {"question": "What is the type of grain (For example: corn, soy, wheat)?", "context": image},
18
- {"question": "What is the date?", "context": image},
19
- {"question": "What is the time?", "context": image},
20
- {"question": "What is the gross weight?", "context": image},
21
- {"question": "What is the tare weight?", "context": image},
22
- {"question": "What is the net weight?", "context": image},
23
- {"question": "What is the moisture (moist) percentage?", "context": image},
24
- {"question": "What is the damage percentage?", "context": image},
25
- {"question": "What is the gross units?", "context": image},
26
- {"question": "What is the dock units?", "context": image},
27
- {"question": "What is the comment?", "context": image},
28
- {"question": "What is the assembly number?", "context": image},
29
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Use the model to answer each question
32
- results = [model(q["question"], q["context"]) for q in questions]
 
 
 
 
 
 
 
33
 
34
  # Extract answers from the results
35
- ticket_number = results[0][0]['answer']
36
- date = results[1][0]['answer']
37
- time = results[2][0]['answer']
38
- gross_weight = results[3][0]['answer']
39
- tare_weight = results[4][0]['answer']
40
- net_weight = results[5][0]['answer']
41
- moisture = results[6][0]['answer']
42
- damage = results[7][0]['answer']
43
- gross_units = results[8][0]['answer']
44
- dock_units = results[9][0]['answer']
45
- comment = results[10][0]['answer']
46
- assembly_number = results[11][0]['answer']
 
47
 
48
  # Create a structured format (like a table) using pandas
49
  data = {
50
- "Ticket Number": [ticket_number],
51
- "Assembly Number": [assembly_number],
52
- "Date": [date],
53
- "Time": [time],
54
- "Gross Weight": [gross_weight],
55
- "Tare Weight": [tare_weight],
56
- "Net Weight": [net_weight],
57
- "Moisture": [moisture],
58
- "Damage": [damage],
59
- "Gross Units": [gross_units],
60
- "Dock Units": [dock_units],
61
- "Comment": [comment],
62
  }
 
 
 
 
 
 
 
 
 
 
 
 
63
  df = pd.DataFrame(data)
64
 
65
  return df
 
1
  import gradio as gr
2
  import pandas as pd
3
 
4
+ #from transformers import pipeline
5
+
6
+ from docquery import pipeline
7
+ from docquery.document import load_document
8
 
9
 
10
  # Chatbot model
11
+ #model = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
12
+
13
+ def run_pipeline(question, document):
14
+ pipeline = construct_pipeline("document-question-answering", "impira/layoutlm-document-qa")
15
+ return pipeline(question=question, **document.context, top_k=3)
16
+
17
+ def process_question(question, document):
18
+ if not question or document is None:
19
+ return None, None, None
20
 
21
+ text_value = None
22
+ predictions = run_pipeline(question, document)
23
+
24
+ for i, p in enumerate(ensure_list(predictions)):
25
+ if i == 0:
26
+ text_value = p["answer"]
27
+ else:
28
+ # Keep the code around to produce multiple boxes, but only show the top
29
+ # prediction for now
30
+ break
31
+
32
+ return text_value
33
 
34
  def parse_ticket_image(image, question):
35
  """Basically just runs through these questions for the document"""
36
+ # Processing the image
37
+ if image:
38
+ try:
39
+ document = load_document(image.name)
40
+ except Exception as e:
41
+ traceback.print_exc()
42
+ error = str(e)
43
+
44
+
45
  # Define questions you want to ask the model
46
 
47
  questions = [
48
+ {"question": "What is the ticket number?", "context": image}
 
 
 
 
 
 
 
 
 
 
 
 
49
  ]
50
+ #{"question": "What is the type of grain (For example: corn, soy, wheat)?", "context": image},
51
+ #{"question": "What is the date?", "context": image},
52
+ #{"question": "What is the time?", "context": image},
53
+ #{"question": "What is the gross weight?", "context": image},
54
+ #{"question": "What is the tare weight?", "context": image},
55
+ #{"question": "What is the net weight?", "context": image},
56
+ #{"question": "What is the moisture (moist) percentage?", "context": image},
57
+ #{"question": "What is the damage percentage?", "context": image},
58
+ #{"question": "What is the gross units?", "context": image},
59
+ #{"question": "What is the dock units?", "context": image},
60
+ #{"question": "What is the comment?", "context": image},
61
+ #{"question": "What is the assembly number?", "context": image},
62
+ #]
63
 
64
  # Use the model to answer each question
65
+ #results = [model(q["question"], q["context"]) for q in questions]
66
+
67
+ answers = {}
68
+ for q in questions:
69
+ answer_text = process_question(q, document)
70
+ answers[q["question"]] = answer_text
71
+
72
+
73
 
74
  # Extract answers from the results
75
+ ticket_number = answers["What is the ticket number?"]
76
+ #ticket_number = results[0][0]['answer']
77
+ #date = results[1][0]['answer']
78
+ #time = results[2][0]['answer']
79
+ #gross_weight = results[3][0]['answer']
80
+ #tare_weight = results[4][0]['answer']
81
+ #net_weight = results[5][0]['answer']
82
+ #moisture = results[6][0]['answer']
83
+ #damage = results[7][0]['answer']
84
+ #gross_units = results[8][0]['answer']
85
+ #dock_units = results[9][0]['answer']
86
+ #comment = results[10][0]['answer']
87
+ #assembly_number = results[11][0]['answer']
88
 
89
  # Create a structured format (like a table) using pandas
90
  data = {
91
+ "Ticket Number": [ticket_number]
 
 
 
 
 
 
 
 
 
 
 
92
  }
93
+ #"Assembly Number": [assembly_number],
94
+ #"Date": [date],
95
+ #"Time": [time],
96
+ #"Gross Weight": [gross_weight],
97
+ #"Tare Weight": [tare_weight],
98
+ #"Net Weight": [net_weight],
99
+ #"Moisture": [moisture],
100
+ #"Damage": [damage],
101
+ #"Gross Units": [gross_units],
102
+ #"Dock Units": [dock_units],
103
+ #"Comment": [comment],
104
+ #}
105
  df = pd.DataFrame(data)
106
 
107
  return df