Spaces:

our-sci
/

agriculture-experiments-document-processing

Runtime error

App Files Files Community

rosemariafontana commited on Aug 29, 2024

Commit

435050e

verified ·

1 Parent(s): cdcfcc9

Updates to make this about grain tickets

Browse files

Files changed (1) hide show

app.py +82 -40

app.py CHANGED Viewed

@@ -1,65 +1,107 @@
 import gradio as gr
 import pandas as pd
-from transformers import pipeline
 # Chatbot model
-model = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
 def parse_ticket_image(image, question):
     """Basically just runs through these questions for the document"""
     # Define questions you want to ask the model
     questions = [
-        {"question": "What is the ticket number?", "context": image},
-        {"question": "What is the type of grain (For example: corn, soy, wheat)?", "context": image},
-        {"question": "What is the date?", "context": image},
-        {"question": "What is the time?", "context": image},
-        {"question": "What is the gross weight?", "context": image},
-        {"question": "What is the tare weight?", "context": image},
-        {"question": "What is the net weight?", "context": image},
-        {"question": "What is the moisture (moist) percentage?", "context": image},
-        {"question": "What is the damage percentage?", "context": image},
-        {"question": "What is the gross units?", "context": image},
-        {"question": "What is the dock units?", "context": image},
-        {"question": "What is the comment?", "context": image},
-        {"question": "What is the assembly number?", "context": image},
     ]
     # Use the model to answer each question
-    results = [model(q["question"], q["context"]) for q in questions]
     # Extract answers from the results
-    ticket_number = results[0][0]['answer']
-    date = results[1][0]['answer']
-    time = results[2][0]['answer']
-    gross_weight = results[3][0]['answer']
-    tare_weight = results[4][0]['answer']
-    net_weight = results[5][0]['answer']
-    moisture = results[6][0]['answer']
-    damage = results[7][0]['answer']
-    gross_units = results[8][0]['answer']
-    dock_units = results[9][0]['answer']
-    comment = results[10][0]['answer']
-    assembly_number = results[11][0]['answer']
     # Create a structured format (like a table) using pandas
     data = {
-        "Ticket Number": [ticket_number],
-        "Assembly Number": [assembly_number],
-        "Date": [date],
-        "Time": [time],
-        "Gross Weight": [gross_weight],
-        "Tare Weight": [tare_weight],
-        "Net Weight": [net_weight],
-        "Moisture": [moisture],
-        "Damage": [damage],
-        "Gross Units": [gross_units],
-        "Dock Units": [dock_units],
-        "Comment": [comment],
     }
     df = pd.DataFrame(data)
     return df

 import gradio as gr
 import pandas as pd
+#from transformers import pipeline
+from docquery import pipeline
+from docquery.document import load_document
 # Chatbot model
+#model = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
+def run_pipeline(question, document):
+    pipeline = construct_pipeline("document-question-answering", "impira/layoutlm-document-qa")
+    return pipeline(question=question, **document.context, top_k=3)
+def process_question(question, document):
+    if not question or document is None:
+        return None, None, None
+    text_value = None
+    predictions = run_pipeline(question, document)
+    for i, p in enumerate(ensure_list(predictions)):
+        if i == 0:
+            text_value = p["answer"]
+        else:
+            # Keep the code around to produce multiple boxes, but only show the top
+            # prediction for now
+            break
+    return text_value
 def parse_ticket_image(image, question):
     """Basically just runs through these questions for the document"""
+    # Processing the image
+    if image:
+        try:
+            document = load_document(image.name)
+        except Exception as e:
+            traceback.print_exc()
+            error = str(e)
     # Define questions you want to ask the model
     questions = [
+        {"question": "What is the ticket number?", "context": image}
     ]
+        #{"question": "What is the type of grain (For example: corn, soy, wheat)?", "context": image},
+        #{"question": "What is the date?", "context": image},
+        #{"question": "What is the time?", "context": image},
+        #{"question": "What is the gross weight?", "context": image},
+        #{"question": "What is the tare weight?", "context": image},
+        #{"question": "What is the net weight?", "context": image},
+        #{"question": "What is the moisture (moist) percentage?", "context": image},
+        #{"question": "What is the damage percentage?", "context": image},
+        #{"question": "What is the gross units?", "context": image},
+        #{"question": "What is the dock units?", "context": image},
+        #{"question": "What is the comment?", "context": image},
+        #{"question": "What is the assembly number?", "context": image},
+    #]
     # Use the model to answer each question
+    #results = [model(q["question"], q["context"]) for q in questions]
+    answers = {}
+    for q in questions:
+        answer_text = process_question(q, document)
+        answers[q["question"]] = answer_text
     # Extract answers from the results
+    ticket_number = answers["What is the ticket number?"]
+    #ticket_number = results[0][0]['answer']
+    #date = results[1][0]['answer']
+    #time = results[2][0]['answer']
+    #gross_weight = results[3][0]['answer']
+    #tare_weight = results[4][0]['answer']
+    #net_weight = results[5][0]['answer']
+    #moisture = results[6][0]['answer']
+    #damage = results[7][0]['answer']
+    #gross_units = results[8][0]['answer']
+    #dock_units = results[9][0]['answer']
+    #comment = results[10][0]['answer']
+    #assembly_number = results[11][0]['answer']
     # Create a structured format (like a table) using pandas
     data = {
+        "Ticket Number": [ticket_number]
     }
+        #"Assembly Number": [assembly_number],
+        #"Date": [date],
+        #"Time": [time],
+        #"Gross Weight": [gross_weight],
+        #"Tare Weight": [tare_weight],
+        #"Net Weight": [net_weight],
+        #"Moisture": [moisture],
+        #"Damage": [damage],
+        #"Gross Units": [gross_units],
+        #"Dock Units": [dock_units],
+        #"Comment": [comment],
+    #}
     df = pd.DataFrame(data)
     return df