Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import json | |
| import requests | |
| from io import BytesIO | |
| from datetime import datetime | |
| from difflib import SequenceMatcher | |
| import pandas as pd | |
| from io import BytesIO | |
| import fitz # PyMuPDF | |
| from collections import defaultdict, Counter | |
| from urllib.parse import urlparse, unquote | |
| import os | |
| from io import BytesIO | |
| import re | |
| import requests | |
| import pandas as pd | |
| import InitialMarkupsLLM_huggingFace | |
| import fitz # PyMuPDF | |
| import re | |
| import urllib.parse | |
| import difflib | |
| import copy | |
| # import tsadropboxretrieval | |
| import urllib.parse | |
| import logging | |
| # Set up logging to see everything | |
| logging.basicConfig( | |
| level=logging.DEBUG, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(), # Print to console | |
| logging.FileHandler('debug.log', mode='w') # Save to file | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Improved launch with debug mode enabled | |
| iface = gr.Interface( | |
| fn=InitialMarkupsLLM_huggingFace.identify_headers_and_save_excel, | |
| inputs=[ | |
| gr.Textbox(label="PDF URL"), | |
| gr.Textbox(label="Model Type"), # Default example | |
| gr.Textbox(label="LLM Prompt"), | |
| gr.Textbox(label="LLM Prompt Hierarchy") | |
| ], | |
| outputs=gr.File(label="Download Excel Results"), | |
| title="PDF Header Extractor" | |
| ) | |
| # Launch with debug=True to see errors in the console | |
| iface.launch(debug=True) |