Spaces:

our-sci
/

agriculture-experiments-document-processing

Runtime error

App Files Files Community

rosemariafontana commited on Aug 29, 2024

Commit

d787446

verified ·

1 Parent(s): 4d13fcf

changed model

Browse files

Files changed (1) hide show

app.py +29 -34

app.py CHANGED Viewed

@@ -1,52 +1,47 @@
 import gradio as gr
 import pandas as pd
-#from transformers import pipeline
-from docquery import pipeline
-from docquery.document import load_document
-# Chatbot model
-#model = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
-def construct_pipeline(task, model):
-    global PIPELINES
-    if model in PIPELINES:
-        return PIPELINES[model]
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    ret = pipeline(task=task, model=CHECKPOINTS[model], device=device)
-    PIPELINES[model] = ret
-    return ret
-def run_pipeline(question, document):
-    pipeline = construct_pipeline("document-question-answering", "impira/layoutlm-document-qa")
-    return pipeline(question=question, **document.context, top_k=3)
-def process_question(question, document):
-    if not question or document is None:
-        return None, None, None
-    text_value = None
-    predictions = run_pipeline(question, document)
-    for i, p in enumerate(ensure_list(predictions)):
-        if i == 0:
-            text_value = p["answer"]
-        else:
-            # Keep the code around to produce multiple boxes, but only show the top
-            # prediction for now
-            break
-    return text_value
 def parse_ticket_image(image, question):
     """Basically just runs through these questions for the document"""
     # Processing the image
     if image:
         try:
-            document = load_document(image.name)
         except Exception as e:
             traceback.print_exc()
             error = str(e)

 import gradio as gr
 import pandas as pd
+from transformers import LayoutLMv2Processor, LayoutLMv3ForQuestionAnswering
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv3-base")
+model = LayoutLMv3ForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
+def process_question(question, document):
+    encoding = processor(document, question, return_tensors="pt")
+    outputs = mode(**encoding)
+    predicted_start_idx = outputs.start_logits.argmax(-1).item()
+    predicted_end_idx = outputs.end_logits.argmax(-1).item()
+    answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx: predicted_end_idx + 1]
+    answer = processor.tokenizer.decode(answer_tokens)
+    return answer
+#def process_question(question, document):
+#    if not question or document is None:
+#        return None, None, None
+#
+#    text_value = None
+#    predictions = run_pipeline(question, document)
+#
+#    for i, p in enumerate(ensure_list(predictions)):
+#        if i == 0:
+#            text_value = p["answer"]
+#        else:
+#            # Keep the code around to produce multiple boxes, but only show the top
+#            # prediction for now
+#            break
+#
+#    return text_value
 def parse_ticket_image(image, question):
     """Basically just runs through these questions for the document"""
     # Processing the image
     if image:
         try:
+            document = Image.open(image.name).convert("RGB")
         except Exception as e:
             traceback.print_exc()
             error = str(e)