Marthee commited on
Commit
1143358
·
verified ·
1 Parent(s): 691ff01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -22
app.py CHANGED
@@ -409,32 +409,43 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
409
 
410
 
411
  def identify_headers_and_save_excel(pdf_path, model, llm_prompt):
412
- # Call your existing function
413
- result = identify_headers_with_openrouter(pdf_path, model, llm_prompt)
414
-
415
- if not result:
416
- return None
417
- df = pd.DataFrame(result)
418
-
419
- # Save Excel to a file on disk
420
- output_path = "output.xlsx"
421
- df.to_excel(output_path, index=False, engine='openpyxl')
 
 
 
 
 
 
 
 
 
 
422
 
423
-
424
- return output_path # return file path, not BytesIO
 
 
425
 
 
426
  iface = gr.Interface(
427
  fn=identify_headers_and_save_excel,
428
  inputs=[
429
- gr.Textbox(label="PDF URL"),
430
- gr.Textbox(label="Model Type"),
431
- gr.Textbox(label="LLM Prompt")
432
  ],
433
- outputs=gr.File(label="Download Excel")
 
434
  )
435
- if __name__ == "__main__":
436
- try:
437
- iface.close() # Close existing instance if it exists
438
- except:
439
- pass
440
- iface.launch()
 
409
 
410
 
411
  def identify_headers_and_save_excel(pdf_path, model, llm_prompt):
412
+ try:
413
+ # 1. Get the result from your LLM function
414
+ result = identify_headers_with_openrouter(pdf_path, model, llm_prompt)
415
+
416
+ # 2. Safety Check: If LLM failed or returned nothing
417
+ if not result:
418
+ logger.warning("No headers found or LLM failed. Creating an empty report.")
419
+ df = pd.DataFrame([{"System Message": "No headers were identified by the LLM."}])
420
+ else:
421
+ df = pd.DataFrame(result)
422
+
423
+ # 3. Use an Absolute Path for the output
424
+ # This ensures Gradio knows exactly where the file is
425
+ output_path = os.path.abspath("header_analysis_output.xlsx")
426
+
427
+ # 4. Save using the engine explicitly
428
+ df.to_excel(output_path, index=False, engine='openpyxl')
429
+
430
+ logger.info(f"File successfully saved to {output_path}")
431
+ return output_path
432
 
433
+ except Exception as e:
434
+ logger.error(f"Critical error in processing: {str(e)}")
435
+ # Return None or a custom error message to Gradio
436
+ return None
437
 
438
+ # Improved launch with debug mode enabled
439
  iface = gr.Interface(
440
  fn=identify_headers_and_save_excel,
441
  inputs=[
442
+ gr.Textbox(label="PDF URL", placeholder="https://example.com/file.pdf"),
443
+ gr.Textbox(label="Model Type", value="google/gemini-2.0-flash-001"), # Default example
444
+ gr.Textbox(label="LLM Prompt", lines=3)
445
  ],
446
+ outputs=gr.File(label="Download Excel Results"),
447
+ title="PDF Header Extractor"
448
  )
449
+
450
+ # Launch with debug=True to see errors in the console
451
+ iface.launch(debug=True)