acc / app.py
MathanKarthik's picture
Update app.py
f493e83 verified
# import pandas as pd
# import numpy as np
#
# # Read the Excel file
# df = pd.read_excel('your_file.xlsx')
#
#
# # Define a function to compare tags
# def compare_tags(row):
# htag = row['human_tag']
# ltag = row[f"{clm}"]
# if htag:
# htag = str(htag)
# if htag == 'nan':
# htag = set()
# else:
# htag = set(htag.split(', '))
# if ltag:
# ltag = str(ltag)
# if ltag == 'nan':
# ltag = set()
# else:
# ltag = set(ltag.split(', '))
#
#
# if pd.isna(htag) or pd.isna(ltag):
# return np.nan, np.nan
#
# missing_tag = list(htag - ltag)
# incorrect_tags = [tag for tag in ltag if tag not in htag]
# correct_tags = list(htag & ltag)
#
# return len(missing_tag), len(incorrect_tags), len(correct_tags)
#
#
# # Apply the function to each row
# df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
#
# # Calculate cumulative sums
# df['Cumulative Missing'] = df['Missing Tag'].cumsum()
# df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum()
# df['Correct Tag'] = df['Correct Tag'].cumsum()
#
# # Display the results
# print(df)
#
# # Write the results to a new Excel file
# df.to_excel('output.xlsx', index=False)
#############################################################################
import gradio as gr
import pandas as pd
import numpy as np
def process_xlsx(file, clm):
df = pd.read_excel(file.name)
# Add S.No column
df['S.No'] = range(1, len(df) + 1)
def compare_tags(row):
htag = row['human_tag']
ltag = row.get(clm, None)
if htag:
htag = str(htag)
if htag == 'nan':
htag = set()
else:
htag = set(htag.split(', '))
if ltag:
ltag = str(ltag)
if ltag == 'nan':
ltag = set()
else:
ltag = set(ltag.split(', '))
if pd.isna(htag) or pd.isna(ltag):
return np.nan, np.nan, np.nan
missing_tag = list(htag - ltag)
incorrect_tags = [tag for tag in ltag if tag not in htag]
correct_tags = list(htag & ltag)
return len(missing_tag), len(incorrect_tags), len(correct_tags)
df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
# Select only the desired columns
desired_columns = ['S.No', 'first_subject', 'Missing Tag', 'Incorrect Tag', 'human_tag', clm]
# Apply styling to highlight zeros in green
def highlight_zeros(val):
color = 'background-color: lightgreen' if val == 0 else ''
return color
styled_df = df[desired_columns].style.applymap(highlight_zeros, subset=['Missing Tag', 'Incorrect Tag'])
# Calculate counts and percentages
missing_zero_count = (df['Missing Tag'] == 0).sum()
incorrect_zero_count = (df['Incorrect Tag'] == 0).sum()
perfect_match_count = ((df['Missing Tag'] == 0) & (df['Incorrect Tag'] == 0)).sum()
row_of_incorrect = (df['Incorrect Tag'] != 0).sum()
row_of_missing = (df['Missing Tag'] != 0).sum()
total_rows = len(df)
missing_zero_percentage = f"{(1-(row_of_missing / total_rows)) * 100:.2f}%"
incorrect_zero_percentage = f"{(1-(row_of_incorrect / total_rows)) * 100:.2f}%"
perfect_match_percentage = f"{perfect_match_count / total_rows * 100:.2f}%"
return styled_df, str(row_of_missing), missing_zero_percentage, str(
row_of_incorrect), incorrect_zero_percentage, str(perfect_match_count), perfect_match_percentage
def update_dataframe(file, clm):
if file is None or not clm:
return None, None, None, None, None, None, None
return process_xlsx(file, clm)
with gr.Blocks(fill_height=True) as demo:
gr.Markdown("# XLSX File Processor")
gr.Markdown("Upload an XLSX file to process and view selected rows.")
with gr.Row():
file_input = gr.File(label="Upload XLSX File")
column_input = gr.Textbox(label="Enter Column Name", placeholder="Enter the column name to compare")
process_button = gr.Button("Process")
dataframe_output = gr.DataFrame(
label="Processed Data",
headers=['S.No', 'Missing Tag', 'Incorrect Tag', 'human_tag'],
interactive=False
)
with gr.Row():
missing_zero_label = gr.Label(label="No of threads with missing tags")
missing_zero_percentage = gr.Label(label="Missing to cover tags Percentage:")
with gr.Row():
incorrect_zero_label = gr.Label(label="No of threads with incorrect tags")
incorrect_zero_percentage = gr.Label(label="Incorrect tags to cover Percentage:")
with gr.Row():
perfect_match_label = gr.Label(label="Perfect Match Count:")
perfect_match_percentage = gr.Label(label="Perfect Match Percentage:")
process_button.click(update_dataframe, inputs=[file_input, column_input], outputs=[
dataframe_output,
missing_zero_label, missing_zero_percentage,
incorrect_zero_label, incorrect_zero_percentage,
perfect_match_label, perfect_match_percentage
])
demo.launch()
##################################################################################
# import gradio as gr
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
#
#
# def process_xlsx(file):
# # Read the Excel file
# df = pd.read_excel(file.name)
#
# # Define a function to compare tags
# def compare_tags(row):
# htag = row['human_tag']
# ltag = row[f"{clm}"]
# htag = row['human_tag']
# ltag = row[f"{clm}"]
# if htag:
# htag = str(htag)
# if htag == 'nan':
# htag = set()
# else:
# htag = set(htag.split(', '))
# if ltag:
# ltag = str(ltag)
# if ltag == 'nan':
# ltag = set()
# else:
# ltag = set(ltag.split(', '))
#
# if pd.isna(htag) or pd.isna(ltag):
# return np.nan, np.nan, np.nan
#
# missing_tag = list(htag - ltag)
# incorrect_tags = [tag for tag in ltag if tag not in htag]
# correct_tags = list(htag & ltag)
#
# return len(missing_tag), len(incorrect_tags), len(correct_tags)
#
# # Apply the function to each row
# df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
#
# # Calculate cumulative sums
# df['Cumulative Missing'] = df['Missing Tag'].cumsum()
# df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum()
# df['Correct Tag'] = df['Correct Tag'].cumsum()
#
# # Get the last row's missing tag and incorrect tag counts
# last_row = df.iloc[-1]
# missing_tag_count = last_row['Cumulative Missing']
# incorrect_tag_count = last_row['Cumulative Incorrect']
# correct_tag = last_row['Correct Tag']
#
# # Create pie chart
# labels = ['Missing Tags', 'Incorrect Tags', 'Correct Tags']
# sizes = [missing_tag_count, incorrect_tag_count, correct_tag]
#
# plt.figure(figsize=(8, 8))
# patches, texts, autotexts = plt.pie(sizes, labels=labels, autopct='%1.1f%%',
# startangle=90, textprops={'fontsize': 14})
#
# plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
# plt.title("Tag Comparison", fontsize=16)
#
# # Save the figure
# image_path = "tag_comparison_pie_chart.png"
# plt.savefig(image_path)
# plt.close() # Close the figure to free up memory
#
# return image_path
#
#
# iface = gr.Interface(
# process_xlsx,
# gr.File(label="Upload XLSX File"),
# "image",
# title="XLSX File Processor",
# description="Upload an XLSX file to process and get a pie chart comparison of tags."
# )
#
# iface.launch()