Marthee commited on
Commit
0a8644a
·
verified ·
1 Parent(s): 8592290

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ import requests
5
+ from io import BytesIO
6
+ from datetime import datetime
7
+ from difflib import SequenceMatcher
8
+ import pandas as pd
9
+ from io import BytesIO
10
+ import fitz # PyMuPDF
11
+ from collections import defaultdict, Counter
12
+ from urllib.parse import urlparse, unquote
13
+ import os
14
+ from io import BytesIO
15
+ import re
16
+ import requests
17
+ import pandas as pd
18
+ import fitz # PyMuPDF
19
+ import re
20
+ import urllib.parse
21
+ import difflib
22
+
23
+ import copy
24
+ # import tsadropboxretrieval
25
+
26
+ import urllib.parse
27
+ import logging
28
+
29
+
30
+ # Set up logging to see everything
31
+ logging.basicConfig(
32
+ level=logging.DEBUG,
33
+ format='%(asctime)s - %(levelname)s - %(message)s',
34
+ handlers=[
35
+ logging.StreamHandler(), # Print to console
36
+ logging.FileHandler('debug.log', mode='w') # Save to file
37
+ ]
38
+ )
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+
44
+ # Improved launch with debug mode enabled
45
+ iface = gr.Interface(
46
+ fn=identify_headers_and_save_excel,
47
+ inputs=[
48
+ gr.Textbox(label="PDF URL"),
49
+ gr.Textbox(label="Model Type"), # Default example
50
+ gr.Textbox(label="LLM Prompt"),
51
+ gr.Textbox(label="LLM Prompt Hierarchy")
52
+ ],
53
+ outputs=gr.File(label="Download Excel Results"),
54
+ title="PDF Header Extractor"
55
+ )
56
+
57
+ # Launch with debug=True to see errors in the console
58
+ iface.launch(debug=True)