Marthee commited on
Commit
4ac61d6
·
verified ·
1 Parent(s): f25349d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -3,6 +3,9 @@ import os
3
  import json
4
  import requests
5
  from io import BytesIO
 
 
 
6
  import fitz # PyMuPDF
7
 
8
  from urllib.parse import urlparse, unquote
@@ -228,33 +231,25 @@ def identify_headers_with_openrouter(pdf_path, model,LLM_prompt, pages_to_check=
228
  out.append({'text': t, 'page': page-1, 'suggested_level': level, 'confidence': conf})
229
  return out
230
 
231
- import gradio as gr
232
- import pandas as pd
233
- from io import BytesIO
234
 
235
- def identify_headers_with_table(pdf_path, model, LLM_prompt):
236
- # Call your existing function
237
- result = identify_headers_with_openrouter(pdf_path, model, LLM_prompt)
238
 
239
  if not result:
240
- return None # nothing to return
241
 
242
- # Convert to DataFrame
243
  df = pd.DataFrame(result)
244
 
245
- # Optional: adjust page numbers for human-readable format
246
- df['page'] = df['page'] + 1
247
-
248
- # Save to in-memory Excel file
249
  output = BytesIO()
250
- df.to_excel(output, index=False)
251
- output.seek(0)
252
 
253
- return output # return file-like object for download
254
 
255
- # Gradio Interface
256
  iface = gr.Interface(
257
- fn=identify_headers_with_table,
258
  inputs=[
259
  gr.Textbox(label="Document Link"),
260
  gr.Textbox(label="Model Type"),
@@ -264,3 +259,4 @@ iface = gr.Interface(
264
  )
265
 
266
  iface.launch()
 
 
3
  import json
4
  import requests
5
  from io import BytesIO
6
+ import gradio as gr
7
+ import pandas as pd
8
+ from io import BytesIO
9
  import fitz # PyMuPDF
10
 
11
  from urllib.parse import urlparse, unquote
 
231
  out.append({'text': t, 'page': page-1, 'suggested_level': level, 'confidence': conf})
232
  return out
233
 
 
 
 
234
 
235
+ def identify_headers_and_save_excel(pdf_path, model, llm_prompt):
236
+ # This calls your existing header extraction function
237
+ result = identify_headers_with_openrouter(pdf_path, model, llm_prompt)
238
 
239
  if not result:
240
+ return None
241
 
 
242
  df = pd.DataFrame(result)
243
 
244
+ # Save to BytesIO
 
 
 
245
  output = BytesIO()
246
+ df.to_excel(output, index=False, engine='openpyxl')
247
+ output.seek(0) # reset pointer to start
248
 
249
+ return output
250
 
 
251
  iface = gr.Interface(
252
+ fn=identify_headers_and_save_excel,
253
  inputs=[
254
  gr.Textbox(label="Document Link"),
255
  gr.Textbox(label="Model Type"),
 
259
  )
260
 
261
  iface.launch()
262
+