# import pandas as pd
# import numpy as np
#
# # Read the Excel file
# df = pd.read_excel('your_file.xlsx')
#
#
# # Define a function to compare tags
# def compare_tags(row):
#     htag = row['human_tag']
#     ltag = row[f"{clm}"]
#     if htag:
#         htag = str(htag)
#         if htag == 'nan':
#             htag = set()
#         else:
#             htag = set(htag.split(', '))
#     if ltag:
#         ltag = str(ltag)
#         if ltag == 'nan':
#             ltag = set()
#         else:
#             ltag = set(ltag.split(', '))
#
#
#     if pd.isna(htag) or pd.isna(ltag):
#         return np.nan, np.nan
#
#     missing_tag = list(htag - ltag)
#     incorrect_tags = [tag for tag in ltag if tag not in htag]
#     correct_tags = list(htag & ltag)
#
#     return len(missing_tag), len(incorrect_tags), len(correct_tags)
#
#
# # Apply the function to each row
# df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
#
# # Calculate cumulative sums
# df['Cumulative Missing'] = df['Missing Tag'].cumsum()
# df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum()
# df['Correct Tag'] = df['Correct Tag'].cumsum()
#
# # Display the results
# print(df)
#
# # Write the results to a new Excel file
# df.to_excel('output.xlsx', index=False)

#############################################################################
import gradio as gr
import pandas as pd
import numpy as np

def process_xlsx(file, clm):
    df = pd.read_excel(file.name)

    # Add S.No column
    df['S.No'] = range(1, len(df) + 1)

    def compare_tags(row):
        htag = row['human_tag']
        ltag = row.get(clm, None)
        if htag:
            htag = str(htag)
            if htag == 'nan':
                htag = set()
            else:
                htag = set(htag.split(', '))
        if ltag:
            ltag = str(ltag)
            if ltag == 'nan':
                ltag = set()
            else:
                ltag = set(ltag.split(', '))

        if pd.isna(htag) or pd.isna(ltag):
            return np.nan, np.nan, np.nan

        missing_tag = list(htag - ltag)
        incorrect_tags = [tag for tag in ltag if tag not in htag]
        correct_tags = list(htag & ltag)

        return len(missing_tag), len(incorrect_tags), len(correct_tags)

    df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')

    # Select only the desired columns
    desired_columns = ['S.No', 'first_subject', 'Missing Tag', 'Incorrect Tag', 'human_tag', clm]

    # Apply styling to highlight zeros in green
    def highlight_zeros(val):
        color = 'background-color: lightgreen' if val == 0 else ''
        return color

    styled_df = df[desired_columns].style.applymap(highlight_zeros, subset=['Missing Tag', 'Incorrect Tag'])

    # Calculate counts and percentages
    missing_zero_count = (df['Missing Tag'] == 0).sum()
    incorrect_zero_count = (df['Incorrect Tag'] == 0).sum()
    perfect_match_count = ((df['Missing Tag'] == 0) & (df['Incorrect Tag'] == 0)).sum()
    row_of_incorrect = (df['Incorrect Tag'] != 0).sum()
    row_of_missing = (df['Missing Tag'] != 0).sum()

    total_rows = len(df)

    missing_zero_percentage = f"{(1-(row_of_missing / total_rows)) * 100:.2f}%"
    incorrect_zero_percentage = f"{(1-(row_of_incorrect / total_rows)) * 100:.2f}%"
    perfect_match_percentage = f"{perfect_match_count / total_rows * 100:.2f}%"

    return styled_df, str(row_of_missing), missing_zero_percentage, str(
        row_of_incorrect), incorrect_zero_percentage, str(perfect_match_count), perfect_match_percentage

def update_dataframe(file, clm):
    if file is None or not clm:
        return None, None, None, None, None, None, None
    return process_xlsx(file, clm)

with gr.Blocks(fill_height=True) as demo:
    gr.Markdown("# XLSX File Processor")
    gr.Markdown("Upload an XLSX file to process and view selected rows.")

    with gr.Row():
        file_input = gr.File(label="Upload XLSX File")
        column_input = gr.Textbox(label="Enter Column Name", placeholder="Enter the column name to compare")
        process_button = gr.Button("Process")

    dataframe_output = gr.DataFrame(
        label="Processed Data",
        headers=['S.No', 'Missing Tag', 'Incorrect Tag', 'human_tag'],
        interactive=False
    )

    with gr.Row():
        missing_zero_label = gr.Label(label="No of threads with missing tags")
        missing_zero_percentage = gr.Label(label="Missing to cover tags Percentage:")

    with gr.Row():
        incorrect_zero_label = gr.Label(label="No of threads with incorrect tags")
        incorrect_zero_percentage = gr.Label(label="Incorrect tags to cover Percentage:")

    with gr.Row():
        perfect_match_label = gr.Label(label="Perfect Match Count:")
        perfect_match_percentage = gr.Label(label="Perfect Match Percentage:")

    process_button.click(update_dataframe, inputs=[file_input, column_input], outputs=[
        dataframe_output,
        missing_zero_label, missing_zero_percentage,
        incorrect_zero_label, incorrect_zero_percentage,
        perfect_match_label, perfect_match_percentage
    ])

demo.launch()


##################################################################################
# import gradio as gr
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
#
#
# def process_xlsx(file):
#     # Read the Excel file
#     df = pd.read_excel(file.name)
#
#     # Define a function to compare tags
#     def compare_tags(row):
#         htag = row['human_tag']
#         ltag = row[f"{clm}"]
#         htag = row['human_tag']
#         ltag = row[f"{clm}"]
#         if htag:
#             htag = str(htag)
#             if htag == 'nan':
#                 htag = set()
#             else:
#                 htag = set(htag.split(', '))
#         if ltag:
#             ltag = str(ltag)
#             if ltag == 'nan':
#                 ltag = set()
#             else:
#                 ltag = set(ltag.split(', '))
#
#         if pd.isna(htag) or pd.isna(ltag):
#             return np.nan, np.nan, np.nan
#
#         missing_tag = list(htag - ltag)
#         incorrect_tags = [tag for tag in ltag if tag not in htag]
#         correct_tags = list(htag & ltag)
#
#         return len(missing_tag), len(incorrect_tags), len(correct_tags)
#
#     # Apply the function to each row
#     df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
#
#     # Calculate cumulative sums
#     df['Cumulative Missing'] = df['Missing Tag'].cumsum()
#     df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum()
#     df['Correct Tag'] = df['Correct Tag'].cumsum()
#
#     # Get the last row's missing tag and incorrect tag counts
#     last_row = df.iloc[-1]
#     missing_tag_count = last_row['Cumulative Missing']
#     incorrect_tag_count = last_row['Cumulative Incorrect']
#     correct_tag = last_row['Correct Tag']
#
#     # Create pie chart
#     labels = ['Missing Tags', 'Incorrect Tags', 'Correct Tags']
#     sizes = [missing_tag_count, incorrect_tag_count, correct_tag]
#
#     plt.figure(figsize=(8, 8))
#     patches, texts, autotexts = plt.pie(sizes, labels=labels, autopct='%1.1f%%',
#                                         startangle=90, textprops={'fontsize': 14})
#
#     plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
#     plt.title("Tag Comparison", fontsize=16)
#
#     # Save the figure
#     image_path = "tag_comparison_pie_chart.png"
#     plt.savefig(image_path)
#     plt.close()  # Close the figure to free up memory
#
#     return image_path
#
#
# iface = gr.Interface(
#     process_xlsx,
#     gr.File(label="Upload XLSX File"),
#     "image",
#     title="XLSX File Processor",
#     description="Upload an XLSX file to process and get a pie chart comparison of tags."
# )
#
# iface.launch()