Marthee commited on
Commit
56a515e
·
verified ·
1 Parent(s): da33c89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -158,18 +158,18 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
158
 
159
  # Use first span to check vertical position
160
  y0 = spans[0]["bbox"][1]
161
- y1 = spans[0]["bbox"][3]
162
-
163
  # if y0 < top_margin or y1 > (page_height - bottom_margin):
164
  # continue
165
-
166
- for span in spans:
167
- text = span.get("text", "").strip()
168
- if not text:
169
- continue
170
-
171
- lines_for_prompt.append(f"PAGE {pno + 1}: {text}")
172
  lines_on_page += 1
 
 
173
 
174
  # page = doc.load_page(pno)
175
  # page_height = page.rect.height
 
158
 
159
  # Use first span to check vertical position
160
  y0 = spans[0]["bbox"][1]
161
+ y1 = spans[0]['bbox'][3]
 
162
  # if y0 < top_margin or y1 > (page_height - bottom_margin):
163
  # continue
164
+ text = " ".join(s.get('text','') for s in spans).strip()
165
+ if text:
166
+
167
+
168
+ # prefix with page for easier mapping back
169
+ lines_for_prompt.append(f"PAGE {pno+1}: {text}")
 
170
  lines_on_page += 1
171
+
172
+ # if lines_on_page > 0:
173
 
174
  # page = doc.load_page(pno)
175
  # page_height = page.rect.height