Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -109,7 +109,7 @@ def openPDF(pdf_path):
|
|
| 109 |
logger.info(f"PDF opened successfully, {len(doc)} pages")
|
| 110 |
return doc
|
| 111 |
|
| 112 |
-
def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check=None, top_margin=
|
| 113 |
"""Ask an LLM (OpenRouter) to identify headers in the document.
|
| 114 |
Returns a list of dicts: {text, page, suggested_level, confidence}.
|
| 115 |
The function sends plain page-line strings to the LLM (including page numbers)
|
|
@@ -155,8 +155,8 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
|
|
| 155 |
continue
|
| 156 |
y0 = spans[0]['bbox'][1]
|
| 157 |
y1 = spans[0]['bbox'][3]
|
| 158 |
-
if y0 < top_margin or y1 > (page_height - bottom_margin):
|
| 159 |
-
|
| 160 |
text = " ".join(s.get('text','') for s in spans).strip()
|
| 161 |
if text:
|
| 162 |
# prefix with page for easier mapping back
|
|
|
|
| 109 |
logger.info(f"PDF opened successfully, {len(doc)} pages")
|
| 110 |
return doc
|
| 111 |
|
| 112 |
+
def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check=None, top_margin=0, bottom_margin=0):
|
| 113 |
"""Ask an LLM (OpenRouter) to identify headers in the document.
|
| 114 |
Returns a list of dicts: {text, page, suggested_level, confidence}.
|
| 115 |
The function sends plain page-line strings to the LLM (including page numbers)
|
|
|
|
| 155 |
continue
|
| 156 |
y0 = spans[0]['bbox'][1]
|
| 157 |
y1 = spans[0]['bbox'][3]
|
| 158 |
+
# if y0 < top_margin or y1 > (page_height - bottom_margin):
|
| 159 |
+
# continue
|
| 160 |
text = " ".join(s.get('text','') for s in spans).strip()
|
| 161 |
if text:
|
| 162 |
# prefix with page for easier mapping back
|