Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,9 @@ import os
|
|
| 3 |
import json
|
| 4 |
import requests
|
| 5 |
from io import BytesIO
|
|
|
|
|
|
|
|
|
|
| 6 |
import fitz # PyMuPDF
|
| 7 |
|
| 8 |
from urllib.parse import urlparse, unquote
|
|
@@ -228,33 +231,25 @@ def identify_headers_with_openrouter(pdf_path, model,LLM_prompt, pages_to_check=
|
|
| 228 |
out.append({'text': t, 'page': page-1, 'suggested_level': level, 'confidence': conf})
|
| 229 |
return out
|
| 230 |
|
| 231 |
-
import gradio as gr
|
| 232 |
-
import pandas as pd
|
| 233 |
-
from io import BytesIO
|
| 234 |
|
| 235 |
-
def
|
| 236 |
-
#
|
| 237 |
-
result = identify_headers_with_openrouter(pdf_path, model,
|
| 238 |
|
| 239 |
if not result:
|
| 240 |
-
return None
|
| 241 |
|
| 242 |
-
# Convert to DataFrame
|
| 243 |
df = pd.DataFrame(result)
|
| 244 |
|
| 245 |
-
#
|
| 246 |
-
df['page'] = df['page'] + 1
|
| 247 |
-
|
| 248 |
-
# Save to in-memory Excel file
|
| 249 |
output = BytesIO()
|
| 250 |
-
df.to_excel(output, index=False)
|
| 251 |
-
output.seek(0)
|
| 252 |
|
| 253 |
-
return output
|
| 254 |
|
| 255 |
-
# Gradio Interface
|
| 256 |
iface = gr.Interface(
|
| 257 |
-
fn=
|
| 258 |
inputs=[
|
| 259 |
gr.Textbox(label="Document Link"),
|
| 260 |
gr.Textbox(label="Model Type"),
|
|
@@ -264,3 +259,4 @@ iface = gr.Interface(
|
|
| 264 |
)
|
| 265 |
|
| 266 |
iface.launch()
|
|
|
|
|
|
| 3 |
import json
|
| 4 |
import requests
|
| 5 |
from io import BytesIO
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from io import BytesIO
|
| 9 |
import fitz # PyMuPDF
|
| 10 |
|
| 11 |
from urllib.parse import urlparse, unquote
|
|
|
|
| 231 |
out.append({'text': t, 'page': page-1, 'suggested_level': level, 'confidence': conf})
|
| 232 |
return out
|
| 233 |
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
def identify_headers_and_save_excel(pdf_path, model, llm_prompt):
|
| 236 |
+
# This calls your existing header extraction function
|
| 237 |
+
result = identify_headers_with_openrouter(pdf_path, model, llm_prompt)
|
| 238 |
|
| 239 |
if not result:
|
| 240 |
+
return None
|
| 241 |
|
|
|
|
| 242 |
df = pd.DataFrame(result)
|
| 243 |
|
| 244 |
+
# Save to BytesIO
|
|
|
|
|
|
|
|
|
|
| 245 |
output = BytesIO()
|
| 246 |
+
df.to_excel(output, index=False, engine='openpyxl')
|
| 247 |
+
output.seek(0) # reset pointer to start
|
| 248 |
|
| 249 |
+
return output
|
| 250 |
|
|
|
|
| 251 |
iface = gr.Interface(
|
| 252 |
+
fn=identify_headers_and_save_excel,
|
| 253 |
inputs=[
|
| 254 |
gr.Textbox(label="Document Link"),
|
| 255 |
gr.Textbox(label="Model Type"),
|
|
|
|
| 259 |
)
|
| 260 |
|
| 261 |
iface.launch()
|
| 262 |
+
|