Marthee commited on
Commit
339752c
·
verified ·
1 Parent(s): 1f0d7ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -109,7 +109,7 @@ def openPDF(pdf_path):
109
  logger.info(f"PDF opened successfully, {len(doc)} pages")
110
  return doc
111
 
112
- def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check=None, top_margin=70, bottom_margin=85):
113
  """Ask an LLM (OpenRouter) to identify headers in the document.
114
  Returns a list of dicts: {text, page, suggested_level, confidence}.
115
  The function sends plain page-line strings to the LLM (including page numbers)
@@ -155,8 +155,8 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
155
  continue
156
  y0 = spans[0]['bbox'][1]
157
  y1 = spans[0]['bbox'][3]
158
- if y0 < top_margin or y1 > (page_height - bottom_margin):
159
- continue
160
  text = " ".join(s.get('text','') for s in spans).strip()
161
  if text:
162
  # prefix with page for easier mapping back
 
109
  logger.info(f"PDF opened successfully, {len(doc)} pages")
110
  return doc
111
 
112
+ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check=None, top_margin=0, bottom_margin=0):
113
  """Ask an LLM (OpenRouter) to identify headers in the document.
114
  Returns a list of dicts: {text, page, suggested_level, confidence}.
115
  The function sends plain page-line strings to the LLM (including page numbers)
 
155
  continue
156
  y0 = spans[0]['bbox'][1]
157
  y1 = spans[0]['bbox'][3]
158
+ # if y0 < top_margin or y1 > (page_height - bottom_margin):
159
+ # continue
160
  text = " ".join(s.get('text','') for s in spans).strip()
161
  if text:
162
  # prefix with page for easier mapping back