Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -158,18 +158,18 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
|
|
| 158 |
|
| 159 |
# Use first span to check vertical position
|
| 160 |
y0 = spans[0]["bbox"][1]
|
| 161 |
-
y1 = spans[0][
|
| 162 |
-
|
| 163 |
# if y0 < top_margin or y1 > (page_height - bottom_margin):
|
| 164 |
# continue
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
lines_for_prompt.append(f"PAGE {pno + 1}: {text}")
|
| 172 |
lines_on_page += 1
|
|
|
|
|
|
|
| 173 |
|
| 174 |
# page = doc.load_page(pno)
|
| 175 |
# page_height = page.rect.height
|
|
|
|
| 158 |
|
| 159 |
# Use first span to check vertical position
|
| 160 |
y0 = spans[0]["bbox"][1]
|
| 161 |
+
y1 = spans[0]['bbox'][3]
|
|
|
|
| 162 |
# if y0 < top_margin or y1 > (page_height - bottom_margin):
|
| 163 |
# continue
|
| 164 |
+
text = " ".join(s.get('text','') for s in spans).strip()
|
| 165 |
+
if text:
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# prefix with page for easier mapping back
|
| 169 |
+
lines_for_prompt.append(f"PAGE {pno+1}: {text}")
|
|
|
|
| 170 |
lines_on_page += 1
|
| 171 |
+
|
| 172 |
+
# if lines_on_page > 0:
|
| 173 |
|
| 174 |
# page = doc.load_page(pno)
|
| 175 |
# page_height = page.rect.height
|