import streamlit as st
import pandas as pd
from io import BytesIO
from helper import get_res_df
import nltk

nltk.download('wordnet')

def to_excel(df):
    """
    Convert a Pandas DataFrame to an Excel file in memory.
    
    Parameters:
    df (DataFrame): The DataFrame to be converted to Excel format.

    Returns:
    bytes: The in-memory Excel file data.
    """
    output = BytesIO()
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        df.to_excel(writer, index=False)
    processed_data = output.getvalue()
    return processed_data

def validate_columns(df):
    required_columns = {
        "Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province",
        "Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses"
    }
    missing_columns = required_columns - set(df.columns)
    
    if missing_columns:
        st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}")
        return None
    else:
        return df

def process_files(excel_file, text_file):
    """
    Process the uploaded Excel/CSV and text files and return cleaned dataframes.

    Parameters:
    excel_file (UploadedFile): The uploaded Excel or CSV file.
    text_file (UploadedFile): The uploaded text file.

    Returns:
    Tuple[DataFrame, DataFrame]: A tuple containing the cleaned DataFrame from the Excel/CSV file
                                 and a DataFrame created from the text file data.
    """
    print(excel_file, text_file)  # Debugging information

    # Read the Excel/CSV file into a DataFrame
    if excel_file.name.endswith('.csv'):
        df_excel = pd.read_csv(excel_file)
    else:
        df_excel = pd.read_excel(excel_file)

    # Validate Master Sheet
    df_excel = validate_columns(df_excel)
    if df_excel is None:
        return None, None
    
    # Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
    df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
    df_excel = df_excel.dropna(subset=['Employer Name'])

    # Read and process the text file content into a list of lines
    lines = text_file.read().decode('utf-8').splitlines()
    data = [line.strip().split(',') for line in lines]  # Split each line by commas

    # Create a DataFrame from the parsed text file data
    df = pd.DataFrame(data)
    df = df[df[1].isin(['1001010071', '1001233102'])]

    return df_excel, df


# Streamlit UI section
st.title("Generate E-Batch")  # Application title

# Sidebar file uploader widgets to allow users to upload an Excel/CSV file and a text file
with st.sidebar:
    st.header("Upload Files")
    uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
    uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])

    threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12)

# Check if both files are uploaded
if uploaded_excel and uploaded_text:
    master_data, df = process_files(uploaded_excel, uploaded_text)  # Process the files
    if master_data is None and df is None:
        st.stop()

    res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold)  # Generate the result DataFrame using the helper function
    
    # Create a layout with four columns to display the metrics
    col1, col2, col3, col4 = st.columns(4)
    with col1:
        st.metric("Direct Match", fbdm)
    with col2:
        st.metric("Employer Match", fben)
    with col3:
        st.metric("AI Based Match", fbbm25)
    with col4:
        st.metric("No Clear Match", nf)

    st.write("Final Output")  # Display the result of file processing
    st.dataframe(res)  # Show the result in a table format on the web app

    # Convert the result DataFrame to an Excel file for download
    excel_data = to_excel(res)

    # Provide a button for the user to download the result as an Excel file
    st.download_button(label="Download Excel",
                       data=excel_data,
                       file_name='_Consolidated E-Batch for Banking_.xlsx',
                       mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')