Spaces:

our-sci
/

agriculture-experiments-document-processing

Runtime error

App Files Files Community

rosemariafontana commited on Aug 30, 2024

Commit

623e388

verified ·

1 Parent(s): cad3a8c

Changed some values to be updated

Browse files

Files changed (1) hide show

app.py +10 -17

app.py CHANGED Viewed

@@ -15,47 +15,40 @@ model.to(device)
 labels = model.config.id2label
 print(labels)
-# helper function to unnormalize bounding boxes
-def unnormalize_box(bbox, width, height):
-    return [
-        width * (bbox[0] / 1000),
-        height * (bbox[1] / 1000),
-        width * (bbox[2] / 1000),
-        height * (bbox[3] / 1000),
-    ]
 # process the image in the correct format
 # extract token classifications
 def parse_ticket_image(image):
     if image:
         document = image.convert("RGB") if image.mode != "RGB" else image
     else:
         print(f"Warning - no image or malformed image!")
         return pd.DataFrame()
     encoding = processor(document, return_tensors="pt", truncation=True)
     for k, v in encoding.items():
         encoding[k] = v.to(device)
     outputs = model(**encoding)
     predictions = outputs.logits.argmax(-1).squeeze().tolist()
-    token_boxes = encoding.bbox.squeeze().tolist()
     input_ids = encoding.input_ids.squeeze().tolist()
     words = [processor.tokenizer.decode(id) for id in input_ids]
-    width, height = document.size
-    true_predictions = []
-    true_boxes = []
     for idx, pred in enumerate(predictions):
         label = model.config.id2label[pred]
         # apparently 'O' stands for non-entity tokens
         if label != 'O':
-            true_predictions.append(label)
-            true_boxes.append(unnormalize_box(token_boxes[idx], width, height))
     if len(extracted_fields) == 0:
         print(f"Warning - no fields were extracted!")
@@ -67,8 +60,8 @@ def parse_ticket_image(image):
     values = values[:min_length]
     data = {
-        "Field": true_predictions,
-        "Value": words[1:len(true_predictions)+1]
     }
     df = pd.DataFrame(data)

 labels = model.config.id2label
 print(labels)
 # process the image in the correct format
 # extract token classifications
 def parse_ticket_image(image):
+    # Process image
     if image:
         document = image.convert("RGB") if image.mode != "RGB" else image
     else:
         print(f"Warning - no image or malformed image!")
         return pd.DataFrame()
+    # Encode document image
     encoding = processor(document, return_tensors="pt", truncation=True)
+    # Move encoding to appropriate device
     for k, v in encoding.items():
         encoding[k] = v.to(device)
+    # Perform inference
     outputs = model(**encoding)
+    # extract predictions
     predictions = outputs.logits.argmax(-1).squeeze().tolist()
     input_ids = encoding.input_ids.squeeze().tolist()
     words = [processor.tokenizer.decode(id) for id in input_ids]
+    extracted_fields = []
     for idx, pred in enumerate(predictions):
         label = model.config.id2label[pred]
         # apparently 'O' stands for non-entity tokens
         if label != 'O':
+            extracted_fields.append((label, words[idx]))
     if len(extracted_fields) == 0:
         print(f"Warning - no fields were extracted!")
     values = values[:min_length]
     data = {
+        "Field": fields,
+        "Value": values
     }
     df = pd.DataFrame(data)