Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -409,32 +409,43 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
|
|
| 409 |
|
| 410 |
|
| 411 |
def identify_headers_and_save_excel(pdf_path, model, llm_prompt):
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
|
|
|
|
|
|
| 425 |
|
|
|
|
| 426 |
iface = gr.Interface(
|
| 427 |
fn=identify_headers_and_save_excel,
|
| 428 |
inputs=[
|
| 429 |
-
gr.Textbox(label="PDF URL"),
|
| 430 |
-
gr.Textbox(label="Model Type"),
|
| 431 |
-
gr.Textbox(label="LLM Prompt")
|
| 432 |
],
|
| 433 |
-
outputs=gr.File(label="Download Excel")
|
|
|
|
| 434 |
)
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
except:
|
| 439 |
-
pass
|
| 440 |
-
iface.launch()
|
|
|
|
| 409 |
|
| 410 |
|
| 411 |
def identify_headers_and_save_excel(pdf_path, model, llm_prompt):
|
| 412 |
+
try:
|
| 413 |
+
# 1. Get the result from your LLM function
|
| 414 |
+
result = identify_headers_with_openrouter(pdf_path, model, llm_prompt)
|
| 415 |
+
|
| 416 |
+
# 2. Safety Check: If LLM failed or returned nothing
|
| 417 |
+
if not result:
|
| 418 |
+
logger.warning("No headers found or LLM failed. Creating an empty report.")
|
| 419 |
+
df = pd.DataFrame([{"System Message": "No headers were identified by the LLM."}])
|
| 420 |
+
else:
|
| 421 |
+
df = pd.DataFrame(result)
|
| 422 |
+
|
| 423 |
+
# 3. Use an Absolute Path for the output
|
| 424 |
+
# This ensures Gradio knows exactly where the file is
|
| 425 |
+
output_path = os.path.abspath("header_analysis_output.xlsx")
|
| 426 |
+
|
| 427 |
+
# 4. Save using the engine explicitly
|
| 428 |
+
df.to_excel(output_path, index=False, engine='openpyxl')
|
| 429 |
+
|
| 430 |
+
logger.info(f"File successfully saved to {output_path}")
|
| 431 |
+
return output_path
|
| 432 |
|
| 433 |
+
except Exception as e:
|
| 434 |
+
logger.error(f"Critical error in processing: {str(e)}")
|
| 435 |
+
# Return None or a custom error message to Gradio
|
| 436 |
+
return None
|
| 437 |
|
| 438 |
+
# Improved launch with debug mode enabled
|
| 439 |
iface = gr.Interface(
|
| 440 |
fn=identify_headers_and_save_excel,
|
| 441 |
inputs=[
|
| 442 |
+
gr.Textbox(label="PDF URL", placeholder="https://example.com/file.pdf"),
|
| 443 |
+
gr.Textbox(label="Model Type", value="google/gemini-2.0-flash-001"), # Default example
|
| 444 |
+
gr.Textbox(label="LLM Prompt", lines=3)
|
| 445 |
],
|
| 446 |
+
outputs=gr.File(label="Download Excel Results"),
|
| 447 |
+
title="PDF Header Extractor"
|
| 448 |
)
|
| 449 |
+
|
| 450 |
+
# Launch with debug=True to see errors in the console
|
| 451 |
+
iface.launch(debug=True)
|
|
|
|
|
|
|
|
|