Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -226,29 +226,31 @@ def identify_headers_with_openrouter(pdf_path, model,LLM_prompt, pages_to_check=
|
|
| 226 |
conf = float(obj.get('confidence') or 0)
|
| 227 |
if t and page is not None:
|
| 228 |
out.append({'text': t, 'page': page-1, 'suggested_level': level, 'confidence': conf})
|
| 229 |
-
df = pd.DataFrame(out)
|
| 230 |
-
|
| 231 |
-
# Save to Excel
|
| 232 |
-
df.to_excel("output.xlsx", index=False)
|
| 233 |
-
|
| 234 |
-
print("Saved JSON output to output.xlsx")
|
| 235 |
return out
|
| 236 |
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
| 238 |
def identify_headers_with_table(pdf_path, model, LLM_prompt):
|
| 239 |
# Call your existing function
|
| 240 |
result = identify_headers_with_openrouter(pdf_path, model, LLM_prompt)
|
| 241 |
|
| 242 |
-
# Convert list of dicts to list of lists for Gradio Dataframe
|
| 243 |
if not result:
|
| 244 |
-
return
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
#
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
# Gradio Interface
|
| 254 |
iface = gr.Interface(
|
|
@@ -258,7 +260,7 @@ iface = gr.Interface(
|
|
| 258 |
gr.Textbox(label="Model Type"),
|
| 259 |
gr.Textbox(label="LLM Prompt")
|
| 260 |
],
|
| 261 |
-
outputs=gr.
|
| 262 |
)
|
| 263 |
|
| 264 |
iface.launch()
|
|
|
|
| 226 |
conf = float(obj.get('confidence') or 0)
|
| 227 |
if t and page is not None:
|
| 228 |
out.append({'text': t, 'page': page-1, 'suggested_level': level, 'confidence': conf})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
return out
|
| 230 |
|
| 231 |
+
import gradio as gr
|
| 232 |
+
import pandas as pd
|
| 233 |
+
from io import BytesIO
|
| 234 |
+
|
| 235 |
def identify_headers_with_table(pdf_path, model, LLM_prompt):
|
| 236 |
# Call your existing function
|
| 237 |
result = identify_headers_with_openrouter(pdf_path, model, LLM_prompt)
|
| 238 |
|
|
|
|
| 239 |
if not result:
|
| 240 |
+
return None # nothing to return
|
| 241 |
|
| 242 |
+
# Convert to DataFrame
|
| 243 |
+
df = pd.DataFrame(result)
|
| 244 |
+
|
| 245 |
+
# Optional: adjust page numbers for human-readable format
|
| 246 |
+
df['page'] = df['page'] + 1
|
| 247 |
+
|
| 248 |
+
# Save to in-memory Excel file
|
| 249 |
+
output = BytesIO()
|
| 250 |
+
df.to_excel(output, index=False)
|
| 251 |
+
output.seek(0)
|
| 252 |
+
|
| 253 |
+
return output # return file-like object for download
|
| 254 |
|
| 255 |
# Gradio Interface
|
| 256 |
iface = gr.Interface(
|
|
|
|
| 260 |
gr.Textbox(label="Model Type"),
|
| 261 |
gr.Textbox(label="LLM Prompt")
|
| 262 |
],
|
| 263 |
+
outputs=gr.File(label="Download Excel")
|
| 264 |
)
|
| 265 |
|
| 266 |
iface.launch()
|