kebson commited on
Commit
daa9804
·
verified ·
1 Parent(s): 81e98af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -145,12 +145,13 @@ def extract_second_column(image):
145
  for line in merged:
146
  nt = normalize(line)
147
 
148
-
149
- if len(nt) < 4:
150
  continue
151
- if sum(c.isdigit() for c in line) > len(line) / 2:
152
  continue
153
- if any(k in nt for k in ["dpo","dao","ref","reference","date","nme",":"]):
 
 
154
  continue
155
 
156
  final.append(line)
 
145
  for line in merged:
146
  nt = normalize(line)
147
 
148
+ if nt in COL_TITLES:
 
149
  continue
150
+ if len(nt) < 5:
151
  continue
152
+ if sum(c.isdigit() for c in line) > len(line) / 3:
153
+ continue
154
+ if not line[0].isupper():
155
  continue
156
 
157
  final.append(line)