Marthee commited on
Commit
94261fc
·
verified ·
1 Parent(s): 339752c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -157,10 +157,17 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
157
  y1 = spans[0]['bbox'][3]
158
  # if y0 < top_margin or y1 > (page_height - bottom_margin):
159
  # continue
160
- text = " ".join(s.get('text','') for s in spans).strip()
161
- if text:
 
 
162
  # prefix with page for easier mapping back
163
- lines_for_prompt.append(f"PAGE {pno+1}: {text}")
 
 
 
 
 
164
  lines_on_page += 1
165
 
166
  if lines_on_page > 0:
 
157
  y1 = spans[0]['bbox'][3]
158
  # if y0 < top_margin or y1 > (page_height - bottom_margin):
159
  # continue
160
+ for s in spans:
161
+ # text,font,size,flags,color
162
+ ArrayofTextWithFormat={'Font':s.get('font')},{'Size':s.get('size')},{'Flags':s.get('flags')},{'Color':s.get('color')},{'Text':s.get('text')}
163
+
164
  # prefix with page for easier mapping back
165
+ lines_for_prompt.append(f"PAGE {pno+1}: {ArrayofTextWithFormat}")
166
+
167
+ # text = " ".join(s.get('text','') for s in spans).strip()
168
+ # if text:
169
+ # # prefix with page for easier mapping back
170
+ # lines_for_prompt.append(f"PAGE {pno+1}: {text}")
171
  lines_on_page += 1
172
 
173
  if lines_on_page > 0: