import gradio as gr
import pdfplumber
import pandas as pd
import os

# Define the path for input and output
input_file_path = '/mnt/data/extracted_data (1).xlsx'
output_file_path = '/mnt/data/filtered_positions_10_to_450.xlsx'

# Ensure the directory exists
if not os.path.exists('/mnt/data/'):
    os.makedirs('/mnt/data/')

# Check if the file exists
if os.path.exists(input_file_path):
    # Read the Excel file if it exists
    df = pd.read_excel(input_file_path)
    print("File loaded successfully.")
else:
    print(f"File not found: {input_file_path}. Please ensure the file is in the directory.")

# Check if directory exists and list files
directory_path = "/mnt/data/"
if os.path.exists(directory_path):
    files = os.listdir(directory_path)
    print("Files in /mnt/data/:", files)
else:
    os.makedirs(directory_path)
    print("Directory created:", directory_path)

# Define the function to extract data from PDF
def extract_data(pdf_file_path, start_pos, end_pos):
    try:
        # Load and process the PDF
        with pdfplumber.open(pdf_file_path) as pdf:
            data = []
            for page in pdf.pages:
                text = page.extract_text()
                if text is None:
                    return "Error: Could not extract text from the PDF. Please check the file format."
                
                print("Extracted Text:", text)  # Debugging line

            # Example extracted data structure
            extracted_data = {
                "Pos": [10, 20, 30],
                "Item Code": ["155569003011", "155569003012", "155569003013"],
                "Quantity": [10, 10, 10],
                "Basic Price": [57.66, 57.66, 57.66],
                "Sub Total": [576.60, 576.60, 576.60]
            }
            
            # Convert to DataFrame and save to Excel
            df = pd.DataFrame(extracted_data)
            output_path = "/mnt/data/extracted_data.xlsx"
            df.to_excel(output_path, index=False)

            if os.path.exists(output_path):
                print("File saved successfully:", output_path)
                return output_path
            else:
                return "Error: Failed to save the Excel file."

    except Exception as e:
        print("Error encountered:", str(e))
        return f"Error: {e}"

# Set up Gradio interface
interface = gr.Interface(
    fn=extract_data,
    inputs=[
        gr.File(type="filepath", label="Upload PDF File"),
        gr.Number(value=10, label="Start Position"),
        gr.Number(value=450, label="End Position")
    ],
    outputs=gr.File(label="Download Extracted Excel")
)

# Additional Excel filtering logic
if os.path.exists(input_file_path):
    df = pd.read_excel(input_file_path)
    # Filter for positions between 10 and 450
    filtered_df = df[(df['Pos'] >= 10) & (df['Pos'] <= 450)]
    filtered_df.to_excel(output_file_path, index=False)
    print(f"Filtered data saved to: {output_file_path}")
else:
    print(f"Input file not found: {input_file_path}. Skipping filtering.")

interface.launch()