# import pandas as pd # import numpy as np # # # Read the Excel file # df = pd.read_excel('your_file.xlsx') # # # # Define a function to compare tags # def compare_tags(row): # htag = row['human_tag'] # ltag = row[f"{clm}"] # if htag: # htag = str(htag) # if htag == 'nan': # htag = set() # else: # htag = set(htag.split(', ')) # if ltag: # ltag = str(ltag) # if ltag == 'nan': # ltag = set() # else: # ltag = set(ltag.split(', ')) # # # if pd.isna(htag) or pd.isna(ltag): # return np.nan, np.nan # # missing_tag = list(htag - ltag) # incorrect_tags = [tag for tag in ltag if tag not in htag] # correct_tags = list(htag & ltag) # # return len(missing_tag), len(incorrect_tags), len(correct_tags) # # # # Apply the function to each row # df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand') # # # Calculate cumulative sums # df['Cumulative Missing'] = df['Missing Tag'].cumsum() # df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum() # df['Correct Tag'] = df['Correct Tag'].cumsum() # # # Display the results # print(df) # # # Write the results to a new Excel file # df.to_excel('output.xlsx', index=False) ############################################################################# import gradio as gr import pandas as pd import numpy as np def process_xlsx(file, clm): df = pd.read_excel(file.name) # Add S.No column df['S.No'] = range(1, len(df) + 1) def compare_tags(row): htag = row['human_tag'] ltag = row.get(clm, None) if htag: htag = str(htag) if htag == 'nan': htag = set() else: htag = set(htag.split(', ')) if ltag: ltag = str(ltag) if ltag == 'nan': ltag = set() else: ltag = set(ltag.split(', ')) if pd.isna(htag) or pd.isna(ltag): return np.nan, np.nan, np.nan missing_tag = list(htag - ltag) incorrect_tags = [tag for tag in ltag if tag not in htag] correct_tags = list(htag & ltag) return len(missing_tag), len(incorrect_tags), len(correct_tags) df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand') # Select only the desired columns desired_columns = ['S.No', 'first_subject', 'Missing Tag', 'Incorrect Tag', 'human_tag', clm] # Apply styling to highlight zeros in green def highlight_zeros(val): color = 'background-color: lightgreen' if val == 0 else '' return color styled_df = df[desired_columns].style.applymap(highlight_zeros, subset=['Missing Tag', 'Incorrect Tag']) # Calculate counts and percentages missing_zero_count = (df['Missing Tag'] == 0).sum() incorrect_zero_count = (df['Incorrect Tag'] == 0).sum() perfect_match_count = ((df['Missing Tag'] == 0) & (df['Incorrect Tag'] == 0)).sum() row_of_incorrect = (df['Incorrect Tag'] != 0).sum() row_of_missing = (df['Missing Tag'] != 0).sum() total_rows = len(df) missing_zero_percentage = f"{(1-(row_of_missing / total_rows)) * 100:.2f}%" incorrect_zero_percentage = f"{(1-(row_of_incorrect / total_rows)) * 100:.2f}%" perfect_match_percentage = f"{perfect_match_count / total_rows * 100:.2f}%" return styled_df, str(row_of_missing), missing_zero_percentage, str( row_of_incorrect), incorrect_zero_percentage, str(perfect_match_count), perfect_match_percentage def update_dataframe(file, clm): if file is None or not clm: return None, None, None, None, None, None, None return process_xlsx(file, clm) with gr.Blocks(fill_height=True) as demo: gr.Markdown("# XLSX File Processor") gr.Markdown("Upload an XLSX file to process and view selected rows.") with gr.Row(): file_input = gr.File(label="Upload XLSX File") column_input = gr.Textbox(label="Enter Column Name", placeholder="Enter the column name to compare") process_button = gr.Button("Process") dataframe_output = gr.DataFrame( label="Processed Data", headers=['S.No', 'Missing Tag', 'Incorrect Tag', 'human_tag'], interactive=False ) with gr.Row(): missing_zero_label = gr.Label(label="No of threads with missing tags") missing_zero_percentage = gr.Label(label="Missing to cover tags Percentage:") with gr.Row(): incorrect_zero_label = gr.Label(label="No of threads with incorrect tags") incorrect_zero_percentage = gr.Label(label="Incorrect tags to cover Percentage:") with gr.Row(): perfect_match_label = gr.Label(label="Perfect Match Count:") perfect_match_percentage = gr.Label(label="Perfect Match Percentage:") process_button.click(update_dataframe, inputs=[file_input, column_input], outputs=[ dataframe_output, missing_zero_label, missing_zero_percentage, incorrect_zero_label, incorrect_zero_percentage, perfect_match_label, perfect_match_percentage ]) demo.launch() ################################################################################## # import gradio as gr # import pandas as pd # import numpy as np # import matplotlib.pyplot as plt # # # def process_xlsx(file): # # Read the Excel file # df = pd.read_excel(file.name) # # # Define a function to compare tags # def compare_tags(row): # htag = row['human_tag'] # ltag = row[f"{clm}"] # htag = row['human_tag'] # ltag = row[f"{clm}"] # if htag: # htag = str(htag) # if htag == 'nan': # htag = set() # else: # htag = set(htag.split(', ')) # if ltag: # ltag = str(ltag) # if ltag == 'nan': # ltag = set() # else: # ltag = set(ltag.split(', ')) # # if pd.isna(htag) or pd.isna(ltag): # return np.nan, np.nan, np.nan # # missing_tag = list(htag - ltag) # incorrect_tags = [tag for tag in ltag if tag not in htag] # correct_tags = list(htag & ltag) # # return len(missing_tag), len(incorrect_tags), len(correct_tags) # # # Apply the function to each row # df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand') # # # Calculate cumulative sums # df['Cumulative Missing'] = df['Missing Tag'].cumsum() # df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum() # df['Correct Tag'] = df['Correct Tag'].cumsum() # # # Get the last row's missing tag and incorrect tag counts # last_row = df.iloc[-1] # missing_tag_count = last_row['Cumulative Missing'] # incorrect_tag_count = last_row['Cumulative Incorrect'] # correct_tag = last_row['Correct Tag'] # # # Create pie chart # labels = ['Missing Tags', 'Incorrect Tags', 'Correct Tags'] # sizes = [missing_tag_count, incorrect_tag_count, correct_tag] # # plt.figure(figsize=(8, 8)) # patches, texts, autotexts = plt.pie(sizes, labels=labels, autopct='%1.1f%%', # startangle=90, textprops={'fontsize': 14}) # # plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. # plt.title("Tag Comparison", fontsize=16) # # # Save the figure # image_path = "tag_comparison_pie_chart.png" # plt.savefig(image_path) # plt.close() # Close the figure to free up memory # # return image_path # # # iface = gr.Interface( # process_xlsx, # gr.File(label="Upload XLSX File"), # "image", # title="XLSX File Processor", # description="Upload an XLSX file to process and get a pie chart comparison of tags." # ) # # iface.launch()