tatr-demo

Sleeping

App Files Files Community

nielsr HF Staff commited on Nov 19, 2023

Commit

b706fa9

1 Parent(s): 5279027

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -4

app.py CHANGED Viewed

@@ -149,15 +149,84 @@ def recognize_table(image):
     for cell in cells:
         draw.rectangle(cell["bbox"], outline="red")
-    return image
 def process_pdf(image):
     cropped_table = detect_and_crop_table(image)
-    image = recognize_table(cropped_table)
-    return image
 title = "Demo: table detection with Table Transformer"
@@ -166,7 +235,7 @@ examples =[['image.png']]
 app = gr.Interface(fn=process_pdf,
                      inputs=gr.Image(type="pil"),
-                     outputs=gr.Image(type="pil", label="Detected table"),
                      title=title,
                      description=description,
                      examples=examples)

     for cell in cells:
         draw.rectangle(cell["bbox"], outline="red")
+    return image, cells
+def get_cell_coordinates_by_row(table_data):
+    # Extract rows and columns
+    rows = [entry for entry in table_data if entry['label'] == 'table row']
+    columns = [entry for entry in table_data if entry['label'] == 'table column']
+    # Sort rows and columns by their Y and X coordinates, respectively
+    rows.sort(key=lambda x: x['bbox'][1])
+    columns.sort(key=lambda x: x['bbox'][0])
+    # Function to find cell coordinates
+    def find_cell_coordinates(row, column):
+        cell_bbox = [column['bbox'][0], row['bbox'][1], column['bbox'][2], row['bbox'][3]]
+        return cell_bbox
+    # Generate cell coordinates and count cells in each row
+    cell_coordinates = []
+    for row in rows:
+        row_cells = []
+        for column in columns:
+            cell_bbox = find_cell_coordinates(row, column)
+            row_cells.append({'column': column['bbox'], 'cell': cell_bbox})
+        # Sort cells in the row by X coordinate
+        row_cells.sort(key=lambda x: x['column'][0])
+        # Append row information to cell_coordinates
+        cell_coordinates.append({'row': row['bbox'], 'cells': row_cells, 'cell_count': len(row_cells)})
+    # Sort rows from top to bottom
+    cell_coordinates.sort(key=lambda x: x['row'][1])
+    return cell_coordinates
+def apply_ocr(cell_coordinates):
+    # let's OCR row by row
+    data = dict()
+    max_num_columns = 0
+    for idx, row in enumerate(cell_coordinates):
+      row_text = []
+      for cell in row["cells"]:
+        # crop cell out of image
+        cell_image = np.array(cropped_table.crop(cell["cell"]))
+        # apply OCR
+        result = reader.readtext(np.array(cell_image))
+        if len(result) > 0:
+          text = " ".join([x[1] for x in result])
+          row_text.append(text)
+      if len(row_text) > max_num_columns:
+          max_num_columns = len(row_text)
+      data[idx] = row_text
+    # pad rows which don't have max_num_columns elements
+    # to make sure all rows have the same number of columns
+    for row, row_data in data.copy().items():
+        if len(row_data) != max_num_columns:
+          row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
+        data[row] = row_data
+    return data
 def process_pdf(image):
     cropped_table = detect_and_crop_table(image)
+    image, cells = recognize_table(cropped_table)
+    cell_coordinates = get_cell_coordinates_by_row(cells)
+    data = apply_ocr(cell_coordinates)
+    return image, data
 title = "Demo: table detection with Table Transformer"
 app = gr.Interface(fn=process_pdf,
                      inputs=gr.Image(type="pil"),
+                     outputs=[gr.Image(type="pil", label="Detected table"), "json"],
                      title=title,
                      description=description,
                      examples=examples)