Spaces:

alx-d
/

pdf2txt

Sleeping

App Files Files Community

alxd commited on Mar 24, 2025

Commit

50ffeff

1 Parent(s): b7811cf

basic cleaning tasks

Browse files

Files changed (2) hide show

pdf2txt.py +98 -173
requirements.txt +2 -0

pdf2txt.py CHANGED Viewed

@@ -8,17 +8,16 @@ import threading
 import uuid
 import queue
 import time
 from transformers import AutoTokenizer
 from mistralai import Mistral
 from huggingface_hub import InferenceClient
 # ------------------------------
 # Helper functions and globals
 # ------------------------------
 sheet_data = None
 file_name = None
-sheet = None
 def debug_print(message: str):
     print(f"[{datetime.datetime.now().isoformat()}] {message}", flush=True)
@@ -41,7 +40,7 @@ def count_tokens(text: str) -> int:
     return len(text.split())
 def generate_response(prompt: str, model_name: str, sheet_data: str) -> str:
-    full_prompt = f"{prompt}\n\nSheet Data:\n{sheet_data}"  # Append sheet data to prompt
     if "Mistral" in model_name:
         mistral_api_key = os.getenv("MISTRAL_API_KEY")
@@ -73,30 +72,61 @@ def generate_response(prompt: str, model_name: str, sheet_data: str) -> str:
     else:
         raise ValueError("Invalid model selection. Please choose either 'Mistral-API' or 'Meta-Llama-3'.")
 def process_query(prompt: str, model_name: str):
     global sheet_data
-    # Handle the case where sheet_data might be None
     if sheet_data is None:
         sheet_data = get_sheet_data()
-    full_prompt = f"{prompt}\n\nSheet Data:\n{sheet_data}"  # Append sheet data to prompt
     debug_print(f"Processing query with model {model_name}: {full_prompt}")
-    # Generate the response using the specified model and sheet data
     response = generate_response(prompt, model_name, sheet_data)
-    # Count the number of tokens for input and output
-    input_tokens = count_tokens(prompt + "\n\n" + sheet_data)  # Include sheet data in the input token count
     output_tokens = count_tokens(response)
-    # Return the response along with token counts
     return response, f"Input tokens: {input_tokens}", f"Output tokens: {output_tokens}"
 def ui_process_query(prompt, model_name):
     return process_query(prompt, model_name)
 # ------------------------------
 # Global variables for background jobs
 # ------------------------------
@@ -114,7 +144,6 @@ def get_job_list():
     if not jobs:
         return "No jobs found. Submit a query or load files to create jobs."
-    # Sort jobs by start time (newest first)
     sorted_jobs = sorted(
         [(job_id, job_info) for job_id, job_info in jobs.items()],
         key=lambda x: x[1].get("start_time", 0),
@@ -127,11 +156,8 @@ def get_job_list():
         query = job_info.get("query", "")
         start_time = job_info.get("start_time", 0)
         time_str = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")
-        # Create a shortened query preview
         query_preview = query[:30] + "..." if query and len(query) > 30 else query or "N/A"
-        # Color-code the status display
         if status == "processing":
             status_formatted = f"<span style='color: red'>⏳ {status}</span>"
         elif status == "completed":
@@ -148,33 +174,14 @@ def get_job_list():
 def get_sheet_data():
     global sheet_data
-    global file_name
-    global sheet
-    file = file_name
-    sheet_name = sheet
-    print ("file name: ",file," sheet name: ",sheet_name," ")
-    if sheet_data is None:
-        try:
-            df = pd.read_excel(file.name, sheet_name=sheet_name)
-            sheet_data = df.to_string(index=False)  # Convert sheet data to string format
-            return sheet_data  # Display sheet data in UI
-        except Exception as e:
-            return f"Error reading sheet: {str(e)}"
-    else:
-        return sheet_data
-# Assuming process_in_background is using threading to call process_query
 def process_in_background(job_id, func, args):
-    """Runs a function in the background and stores its result in a shared queue."""
     result = func(*args)
     results_queue.put((job_id, result))
     debug_print(f"Job {job_id} finished processing in background.")
 def submit_query_async(query, model_choice=None):
-    """Asynchronous version of submit_query_updated to prevent timeouts."""
     global last_job_id
     global sheet_data
@@ -184,8 +191,6 @@ def submit_query_async(query, model_choice=None):
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for query: {query}")
-    # Start background thread to process the query
     threading.Thread(
         target=process_in_background,
         args=(job_id, process_query, [query, model_choice or "Mistral-API"])
@@ -207,9 +212,9 @@ def submit_query_async(query, model_choice=None):
         f"Job ID: {job_id}",
         f"Input tokens: {count_tokens(query)}",
         "Output tokens: pending",
-        job_id,  # For UI job id update
-        query,  # For UI query display update
-        get_job_list()  # Updated job list
     )
 def job_selected(job_id):
@@ -228,7 +233,6 @@ def check_job_status(job_id):
         html_response = "<div style='font-family: monospace;'><p>Please enter a job ID.</p></div>"
         return html_response, "", "", "", ""
-    # Process any completed jobs in the results queue
     try:
         while not results_queue.empty():
             completed_id, result = results_queue.get_nowait()
@@ -287,7 +291,6 @@ def cleanup_old_jobs():
     to_delete = []
     for job_id, job in jobs.items():
-        # Completed jobs older than 24 hours and processing jobs older than 48 hours will be removed.
         if job["status"] == "completed" and (current_time - job.get("end_time", 0)) > 86400:
             to_delete.append(job_id)
         elif job["status"] == "processing" and (current_time - job.get("start_time", 0)) > 172800:
@@ -301,10 +304,8 @@ def cleanup_old_jobs():
 # Function to run query (dummy function)
 def run_query(max_value):
-    # Simulate a data retrieval or processing function
     return [[i, i**2] for i in range(1, max_value + 1)]
-# Function to call both refresh_job_list and check_job_status using the last job ID
 def periodic_update(is_checked):
     interval = 3 if is_checked else None
     debug_print(f"Auto-refresh checkbox is {'checked' if is_checked else 'unchecked'}, every={interval}")
@@ -312,111 +313,46 @@ def periodic_update(is_checked):
         global last_job_id
         job_list_md = refresh_job_list()
         job_status = check_job_status(last_job_id) if last_job_id else ("No job ID available", "", "", "", "")
-        # Extract plain text from HTML for status_text
         from bs4 import BeautifulSoup
         html_content = job_status[0]
         plain_text = ""
         if html_content:
             soup = BeautifulSoup(html_content, "html.parser")
             plain_text = soup.get_text()
-        # Return all expected outputs, including status_text
         return job_list_md, job_status[0], plain_text, job_status[1], job_status[2], job_status[3], job_status[4]
     else:
-        # Return empty values to stop updates - make sure to match the number of expected outputs
         return "", "", "", "", "", "", ""
-# Add email sending function
-def send_email(email_address, content, is_formatted=True):
-    if not email_address or "@" not in email_address:
-        return "Please enter a valid email address"
-    try:
-        creds = get_gmail_credentials()
-        service = build("gmail", "v1", credentials=creds)
-        # Create email message with appropriate MIME type
-        msg = MIMEMultipart()
-        msg["to"] = email_address
-        msg["subject"] = "Scouting AI Report"
-        msg.attach(MIMEText(content, "html" if is_formatted else "plain"))
-        # Encode email message in base64
-        encoded_msg = base64.urlsafe_b64encode(msg.as_bytes()).decode()
-        send_message = {"raw": encoded_msg}
-        # Send email using Gmail API
-        service.users().messages().send(userId="me", body=send_message).execute()
-        return "Email sent successfully via Gmail API!"
-    except Exception as e:
-        return f"Failed to send email: {str(e)}"
-# Function to copy content to clipboard
-def copy_to_clipboard(content):
-    import pyperclip
-    pyperclip.copy(content)
-    return "Copied to clipboard!"
-# Function to convert HTML to plain text using BeautifulSoup
-def copy_plain_text(html_content):
-    try:
-        from bs4 import BeautifulSoup
-    except ImportError:
-        return "Error: BeautifulSoup is required to convert HTML to plain text. Please install it."
-    soup = BeautifulSoup(html_content, "html.parser")
-    plain_text = soup.get_text()
-    import pyperclip
-    pyperclip.copy(plain_text)
-    return "Copied to clipboard!"
-# Default prompt template
-default_prompt = (
-    "you are a scouter and play against this player with this stats. "
-    "Make an scouting report for head coach with weaknesses and strength, and present strategy to stop his strength "
-    "and explore his weaknesses acoording with this stats, make easily to read combine strength with strategy to stop "
-    "and weaknesses with explore and in the final of the raport Key points of emphesize. Use html to output the image and dark color backgrounds (pallette dark green, dark red, etc.) for he different sections of the formatted output. "
-)
 # ------------------------------
 # Gradio UI Layout: Scouting AI App
 # ------------------------------
 with gr.Blocks() as app:
     # App Title and Description
-    gr.Markdown("## PDF 2 TXT")
-    gr.Markdown("Welcome to the PDF conversion App.")
-    # Two-column layout for top section (File Load and Job Information)
     with gr.Row():
         # Left Column: File Load Section (50% width)
         with gr.Column(scale=1):
             gr.Markdown("### 📁 Load File Section")
-            gr.Markdown("Upload your **.pdf** file below, specify the sheet name, and click *Load File* to process your file.")
             file_input = gr.File(label="Upload .pdf File")
             page_start_input_file = gr.Textbox(label="Page Start")
             page_end_input_file = gr.Textbox(label="Page End")
             load_button_file = gr.Button("Load File")
-            sheet_output_file = gr.Textbox(label="Pages", interactive=False)
         # Right Column: Job Information Section (50% width)
         with gr.Column(scale=1):
             gr.Markdown("### 📊 Job Information")
             gr.Markdown("View all submitted jobs, refresh the list, and check the status of individual jobs.")
-            # Fixed-height job list with scrollbar
             job_list_display = gr.Markdown(
                 get_job_list(),
                 elem_id="job-list-display",
                 elem_classes=["scrollable-job-list"]
             )
-            # Add CSS for scrollable job list
             gr.HTML("""
             <style>
             .scrollable-job-list {
@@ -428,57 +364,49 @@ with gr.Blocks() as app:
             }
             </style>
             """)
             refresh_button = gr.Button("Refresh Job List")
             gr.Markdown("#### 🔍 Check Job Status")
             job_id_input = gr.Textbox(label="Enter Job ID")
             check_status_button = gr.Button("Check Job Status")
-    # Cleaning Task Section (left column, below File Load)
     with gr.Row():
-        # Left Column: Submit Query Section
         with gr.Column(scale=1):
-            gr.Markdown("### Cleaning Tasks")
-            with gr.Row():
-                auto_refresh_checkbox = gr.Checkbox(
-                    label="Enable Auto Refresh",
-                    value=False  # Default to unchecked
-                )
-                submit_button = gr.Button("Submit Cleaning Task ")
-                # Use a Checkbox to control the periodic updates
-    # Submit Query Section (left column, below Cleaning Tasks)
     with gr.Row():
-        # Left Column: Submit Query Section
         with gr.Column(scale=1):
             gr.Markdown("### 🚀 Submit Query")
             gr.Markdown("Enter your prompt below and choose a model. Your query will be processed in the background.")
             model_dropdown = gr.Dropdown(
                 choices=["🇺🇸 Remote Meta-Llama-3", "🇪🇺 Mistral-API"],
-                value="🇪🇺 Mistral-API",  # Default model set to Mistral
                 label="Select Model"
             )
-            prompt_input = gr.Textbox(label="Enter your prompt", value=default_prompt, lines=6)
             with gr.Row():
-                auto_refresh_checkbox = gr.Checkbox(
                     label="Enable Auto Refresh",
-                    value=False  # Default to unchecked
                 )
-                submit_button = gr.Button("Submit Query ")
-                # Use a Checkbox to control the periodic updates
-            # Add a textarea to store the plain text version for copying
-            status_text = gr.Textbox(label="Response Text ", visible=True)
             response_output = gr.Textbox(label="Response", interactive=False)
             token_info = gr.Textbox(label="Token Info", interactive=False)
-        # Job Status Output in right column
         with gr.Column(scale=1):
-            # Change Job Status output to an HTML component for proper formatting
             status_output = gr.HTML(label="Job Status", interactive=False)
             job_id_display = gr.Textbox(label="Job ID", interactive=False)
             input_tokens_display = gr.Textbox(label="Input Tokens", interactive=False)
             output_tokens_display = gr.Textbox(label="Output Tokens", interactive=False)
@@ -488,32 +416,39 @@ with gr.Blocks() as app:
     # Set up interactions
     # ------------------------------
-    # Load file interaction (dummy function for now)
-    def load_file(file, sheet_name):
-        global sheet_data
-        global file_name
-        global sheet
         file_name = file
-        sheet = sheet_name
-        if file is None or sheet_name.strip() == "":
-            return "Please upload a file and enter a valid sheet name."
         try:
-            df = pd.read_excel(file.name, sheet_name=sheet_name)
-            sheet_data = df.to_string(index=False)  # Convert sheet data to string format
-            return sheet_data  # Display sheet data in UI
         except Exception as e:
-            return f"Error reading sheet: {str(e)}"
     load_button_file.click(
         fn=load_file,
-        inputs=[file_input, sheet_input_file],
         outputs=sheet_output_file
     )
-    # When submitting a query asynchronously
-    submit_button.click(
         fn=submit_query_async,
         inputs=[prompt_input, model_dropdown],
         outputs=[
@@ -523,7 +458,6 @@ with gr.Blocks() as app:
         ]
     )
-    # Check job status interaction
     check_status_button.click(
         fn=check_job_status,
         inputs=[job_id_input],
@@ -531,28 +465,19 @@ with gr.Blocks() as app:
                  output_tokens_display, job_query_display]
     )
-    # Refresh the job list
     refresh_button.click(
         fn=refresh_job_list,
         inputs=[],
         outputs=job_list_display
     )
-    # Use the Checkbox to control the periodic updates
-    auto_refresh_checkbox.change(
         fn=periodic_update,
-        inputs=[auto_refresh_checkbox],
         outputs=[job_list_display, status_output, status_text, job_id_display, input_tokens_display, output_tokens_display, job_query_display],
         every=3
     )
-    # Connect the copy button to show the text in the textbox and make it visible temporarily
-    def show_copy_text(text):
-        # Simply return the text value and make the component visible
-        return gr.update(value=text, visible=True)
 if __name__ == "__main__":
     debug_print("Launching Gradio UI...")
     app.queue().launch(share=False)

 import uuid
 import queue
 import time
+import fitz  # PyMuPDF for reading PDF files
 from transformers import AutoTokenizer
 from mistralai import Mistral
 from huggingface_hub import InferenceClient
 # ------------------------------
 # Helper functions and globals
 # ------------------------------
 sheet_data = None
 file_name = None
 def debug_print(message: str):
     print(f"[{datetime.datetime.now().isoformat()}] {message}", flush=True)
     return len(text.split())
 def generate_response(prompt: str, model_name: str, sheet_data: str) -> str:
+    full_prompt = f"{prompt}\n\nSheet Data:\n{sheet_data}"  # Append loaded text to prompt
     if "Mistral" in model_name:
         mistral_api_key = os.getenv("MISTRAL_API_KEY")
     else:
         raise ValueError("Invalid model selection. Please choose either 'Mistral-API' or 'Meta-Llama-3'.")
 def process_query(prompt: str, model_name: str):
     global sheet_data
     if sheet_data is None:
         sheet_data = get_sheet_data()
+    full_prompt = f"{prompt}\n\nSheet Data:\n{sheet_data}"
     debug_print(f"Processing query with model {model_name}: {full_prompt}")
     response = generate_response(prompt, model_name, sheet_data)
+    input_tokens = count_tokens(prompt + "\n\n" + sheet_data)
     output_tokens = count_tokens(response)
     return response, f"Input tokens: {input_tokens}", f"Output tokens: {output_tokens}"
 def ui_process_query(prompt, model_name):
     return process_query(prompt, model_name)
+# ------------------------------
+# Cleaning Functions
+# ------------------------------
+def clean_text(text: str, remove_spaces: bool, remove_headers_footers: bool, lowercase: bool, remove_special: bool) -> str:
+    """
+    Cleans the given text based on the provided options.
+    """
+    # Remove extra spaces & newlines
+    if remove_spaces:
+        text = re.sub(r'\s+', ' ', text).strip()
+    # Remove headers/footers: a simple heuristic to remove lines that repeat
+    if remove_headers_footers:
+        lines = text.split('\n')
+        freq = {}
+        for line in lines:
+            line_stripped = line.strip()
+            if line_stripped:
+                freq[line] = freq.get(line, 0) + 1
+        lines = [line for line in lines if freq.get(line, 0) <= 1]
+        text = "\n".join(lines)
+    if lowercase:
+        text = text.lower()
+    if remove_special:
+        text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
+    return text
+def execute_cleaning(text: str, remove_spaces: bool, remove_headers: bool, lowercase: bool, remove_special: bool) -> str:
+    if not text or text.strip() == "":
+        return "No text available for cleaning."
+    cleaned = clean_text(text, remove_spaces, remove_headers, lowercase, remove_special)
+    return cleaned
 # ------------------------------
 # Global variables for background jobs
 # ------------------------------
     if not jobs:
         return "No jobs found. Submit a query or load files to create jobs."
     sorted_jobs = sorted(
         [(job_id, job_info) for job_id, job_info in jobs.items()],
         key=lambda x: x[1].get("start_time", 0),
         query = job_info.get("query", "")
         start_time = job_info.get("start_time", 0)
         time_str = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")
         query_preview = query[:30] + "..." if query and len(query) > 30 else query or "N/A"
         if status == "processing":
             status_formatted = f"<span style='color: red'>⏳ {status}</span>"
         elif status == "completed":
 def get_sheet_data():
     global sheet_data
+    return sheet_data if sheet_data else "No data loaded."
 def process_in_background(job_id, func, args):
     result = func(*args)
     results_queue.put((job_id, result))
     debug_print(f"Job {job_id} finished processing in background.")
 def submit_query_async(query, model_choice=None):
     global last_job_id
     global sheet_data
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for query: {query}")
     threading.Thread(
         target=process_in_background,
         args=(job_id, process_query, [query, model_choice or "Mistral-API"])
         f"Job ID: {job_id}",
         f"Input tokens: {count_tokens(query)}",
         "Output tokens: pending",
+        job_id,
+        query,
+        get_job_list()
     )
 def job_selected(job_id):
         html_response = "<div style='font-family: monospace;'><p>Please enter a job ID.</p></div>"
         return html_response, "", "", "", ""
     try:
         while not results_queue.empty():
             completed_id, result = results_queue.get_nowait()
     to_delete = []
     for job_id, job in jobs.items():
         if job["status"] == "completed" and (current_time - job.get("end_time", 0)) > 86400:
             to_delete.append(job_id)
         elif job["status"] == "processing" and (current_time - job.get("start_time", 0)) > 172800:
 # Function to run query (dummy function)
 def run_query(max_value):
     return [[i, i**2] for i in range(1, max_value + 1)]
 def periodic_update(is_checked):
     interval = 3 if is_checked else None
     debug_print(f"Auto-refresh checkbox is {'checked' if is_checked else 'unchecked'}, every={interval}")
         global last_job_id
         job_list_md = refresh_job_list()
         job_status = check_job_status(last_job_id) if last_job_id else ("No job ID available", "", "", "", "")
         from bs4 import BeautifulSoup
         html_content = job_status[0]
         plain_text = ""
         if html_content:
             soup = BeautifulSoup(html_content, "html.parser")
             plain_text = soup.get_text()
         return job_list_md, job_status[0], plain_text, job_status[1], job_status[2], job_status[3], job_status[4]
     else:
         return "", "", "", "", "", "", ""
 # ------------------------------
 # Gradio UI Layout: Scouting AI App
 # ------------------------------
 with gr.Blocks() as app:
     # App Title and Description
+    gr.Markdown("## 📖 PDF Conversion")
+    gr.Markdown("Text cleaning and processing tools.")
+    # Top section: File Load and Job Information (two columns)
     with gr.Row():
         # Left Column: File Load Section (50% width)
         with gr.Column(scale=1):
             gr.Markdown("### 📁 Load File Section")
+            gr.Markdown("Upload your **.pdf** file below and specify the page range to extract text.")
             file_input = gr.File(label="Upload .pdf File")
             page_start_input_file = gr.Textbox(label="Page Start")
             page_end_input_file = gr.Textbox(label="Page End")
             load_button_file = gr.Button("Load File")
+            sheet_output_file = gr.Textbox(label="Extracted Text", interactive=False)
         # Right Column: Job Information Section (50% width)
         with gr.Column(scale=1):
             gr.Markdown("### 📊 Job Information")
             gr.Markdown("View all submitted jobs, refresh the list, and check the status of individual jobs.")
             job_list_display = gr.Markdown(
                 get_job_list(),
                 elem_id="job-list-display",
                 elem_classes=["scrollable-job-list"]
             )
             gr.HTML("""
             <style>
             .scrollable-job-list {
             }
             </style>
             """)
             refresh_button = gr.Button("Refresh Job List")
             gr.Markdown("#### 🔍 Check Job Status")
             job_id_input = gr.Textbox(label="Enter Job ID")
             check_status_button = gr.Button("Check Job Status")
+    # New row: Cleaning Tasks placed in two equal columns under the load section
     with gr.Row():
+        # Left half: Cleaning Tasks checkboxes and Clean button
         with gr.Column(scale=1):
+            gr.Markdown("### Cleaning Options")
+            remove_spaces_checkbox = gr.Checkbox(label="Remove extra spaces & newlines: Clean unnecessary whitespace.", value=True)
+            remove_headers_checkbox = gr.Checkbox(label="Remove headers/footers: If repeated text appears on every page", value=False)
+            lowercase_checkbox = gr.Checkbox(label="Convert text to lowercase: For uniformity in text analysis.", value=False)
+            remove_special_checkbox = gr.Checkbox(label="Remove special characters: Useful for structured data extraction", value=False)
+            clean_button = gr.Button("Clean")
+        # Right half: Display Cleaned Text
+        with gr.Column(scale=1):
+            cleaned_output = gr.Textbox(label="Cleaned Text", interactive=False)
+    # Submit Query Section remains unchanged
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 🚀 Submit Query")
             gr.Markdown("Enter your prompt below and choose a model. Your query will be processed in the background.")
             model_dropdown = gr.Dropdown(
                 choices=["🇺🇸 Remote Meta-Llama-3", "🇪🇺 Mistral-API"],
+                value="🇪🇺 Mistral-API",
                 label="Select Model"
             )
+            prompt_input = gr.Textbox(label="Enter your prompt", value="", lines=6)
             with gr.Row():
+                auto_refresh_checkbox_query = gr.Checkbox(
                     label="Enable Auto Refresh",
+                    value=False
                 )
+                submit_query_button = gr.Button("Submit Query")
+            status_text = gr.Textbox(label="Response Text", visible=True)
             response_output = gr.Textbox(label="Response", interactive=False)
             token_info = gr.Textbox(label="Token Info", interactive=False)
         with gr.Column(scale=1):
             status_output = gr.HTML(label="Job Status", interactive=False)
             job_id_display = gr.Textbox(label="Job ID", interactive=False)
             input_tokens_display = gr.Textbox(label="Input Tokens", interactive=False)
             output_tokens_display = gr.Textbox(label="Output Tokens", interactive=False)
     # Set up interactions
     # ------------------------------
+    # Updated Load file interaction: read PDF pages
+    def load_file(file, page_start, page_end):
+        global sheet_data, file_name
         file_name = file
+        if file is None or str(page_start).strip() == "" or str(page_end).strip() == "":
+            return "Please upload a file and enter valid page numbers."
         try:
+            doc = fitz.open(file.name)
+            ps = int(page_start)
+            pe = int(page_end)
+            text = ""
+            # Convert page numbers from 1-indexed to 0-indexed
+            for page_num in range(ps - 1, pe):
+                text += doc[page_num].get_text() + "\n"
+            sheet_data = text
+            return text
         except Exception as e:
+            return f"Error reading PDF: {str(e)}"
     load_button_file.click(
         fn=load_file,
+        inputs=[file_input, page_start_input_file, page_end_input_file],
         outputs=sheet_output_file
     )
+    # Cleaning button interaction: clean the loaded text using selected options.
+    clean_button.click(
+        fn=execute_cleaning,
+        inputs=[sheet_output_file, remove_spaces_checkbox, remove_headers_checkbox, lowercase_checkbox, remove_special_checkbox],
+        outputs=cleaned_output
+    )
+    submit_query_button.click(
         fn=submit_query_async,
         inputs=[prompt_input, model_dropdown],
         outputs=[
         ]
     )
     check_status_button.click(
         fn=check_job_status,
         inputs=[job_id_input],
                  output_tokens_display, job_query_display]
     )
     refresh_button.click(
         fn=refresh_job_list,
         inputs=[],
         outputs=job_list_display
     )
+    auto_refresh_checkbox_query.change(
         fn=periodic_update,
+        inputs=[auto_refresh_checkbox_query],
         outputs=[job_list_display, status_output, status_text, job_id_display, input_tokens_display, output_tokens_display, job_query_display],
         every=3
     )
 if __name__ == "__main__":
     debug_print("Launching Gradio UI...")
     app.queue().launch(share=False)

requirements.txt CHANGED Viewed

@@ -41,3 +41,5 @@ pydantic==2.9.0
 sentence-transformers>=2.4.0
 mistralai==1.5.0

 sentence-transformers>=2.4.0
 mistralai==1.5.0
+PyMuPDF