Marthee commited on
Commit
19c2c38
·
verified ·
1 Parent(s): 4710371

Update Azure_api.py

Browse files
Files changed (1) hide show
  1. Azure_api.py +1 -0
Azure_api.py CHANGED
@@ -35,6 +35,7 @@ def detect_tables(pdf_source):
35
  for c in table.cells:
36
  grid[c.row_index][c.column_index] = c.content
37
  df = pd.DataFrame(grid)
 
38
  df["page_number"] = table.bounding_regions[0].page_number
39
  tables.append(df)
40
 
 
35
  for c in table.cells:
36
  grid[c.row_index][c.column_index] = c.content
37
  df = pd.DataFrame(grid)
38
+ df = df.replace(r':+(?:selected|unselected):*', '', regex=True) # :contentReference[oaicite:1]{index=1}
39
  df["page_number"] = table.bounding_regions[0].page_number
40
  tables.append(df)
41