Spaces:
Build error
Build error
| # import pandas as pd | |
| # import numpy as np | |
| # | |
| # # Read the Excel file | |
| # df = pd.read_excel('your_file.xlsx') | |
| # | |
| # | |
| # # Define a function to compare tags | |
| # def compare_tags(row): | |
| # htag = row['human_tag'] | |
| # ltag = row[f"{clm}"] | |
| # if htag: | |
| # htag = str(htag) | |
| # if htag == 'nan': | |
| # htag = set() | |
| # else: | |
| # htag = set(htag.split(', ')) | |
| # if ltag: | |
| # ltag = str(ltag) | |
| # if ltag == 'nan': | |
| # ltag = set() | |
| # else: | |
| # ltag = set(ltag.split(', ')) | |
| # | |
| # | |
| # if pd.isna(htag) or pd.isna(ltag): | |
| # return np.nan, np.nan | |
| # | |
| # missing_tag = list(htag - ltag) | |
| # incorrect_tags = [tag for tag in ltag if tag not in htag] | |
| # correct_tags = list(htag & ltag) | |
| # | |
| # return len(missing_tag), len(incorrect_tags), len(correct_tags) | |
| # | |
| # | |
| # # Apply the function to each row | |
| # df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand') | |
| # | |
| # # Calculate cumulative sums | |
| # df['Cumulative Missing'] = df['Missing Tag'].cumsum() | |
| # df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum() | |
| # df['Correct Tag'] = df['Correct Tag'].cumsum() | |
| # | |
| # # Display the results | |
| # print(df) | |
| # | |
| # # Write the results to a new Excel file | |
| # df.to_excel('output.xlsx', index=False) | |
| ############################################################################# | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| def process_xlsx(file, clm): | |
| df = pd.read_excel(file.name) | |
| # Add S.No column | |
| df['S.No'] = range(1, len(df) + 1) | |
| def compare_tags(row): | |
| htag = row['human_tag'] | |
| ltag = row.get(clm, None) | |
| if htag: | |
| htag = str(htag) | |
| if htag == 'nan': | |
| htag = set() | |
| else: | |
| htag = set(htag.split(', ')) | |
| if ltag: | |
| ltag = str(ltag) | |
| if ltag == 'nan': | |
| ltag = set() | |
| else: | |
| ltag = set(ltag.split(', ')) | |
| if pd.isna(htag) or pd.isna(ltag): | |
| return np.nan, np.nan, np.nan | |
| missing_tag = list(htag - ltag) | |
| incorrect_tags = [tag for tag in ltag if tag not in htag] | |
| correct_tags = list(htag & ltag) | |
| return len(missing_tag), len(incorrect_tags), len(correct_tags) | |
| df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand') | |
| # Select only the desired columns | |
| desired_columns = ['S.No', 'first_subject', 'Missing Tag', 'Incorrect Tag', 'human_tag', clm] | |
| # Apply styling to highlight zeros in green | |
| def highlight_zeros(val): | |
| color = 'background-color: lightgreen' if val == 0 else '' | |
| return color | |
| styled_df = df[desired_columns].style.applymap(highlight_zeros, subset=['Missing Tag', 'Incorrect Tag']) | |
| # Calculate counts and percentages | |
| missing_zero_count = (df['Missing Tag'] == 0).sum() | |
| incorrect_zero_count = (df['Incorrect Tag'] == 0).sum() | |
| perfect_match_count = ((df['Missing Tag'] == 0) & (df['Incorrect Tag'] == 0)).sum() | |
| row_of_incorrect = (df['Incorrect Tag'] != 0).sum() | |
| row_of_missing = (df['Missing Tag'] != 0).sum() | |
| total_rows = len(df) | |
| missing_zero_percentage = f"{(1-(row_of_missing / total_rows)) * 100:.2f}%" | |
| incorrect_zero_percentage = f"{(1-(row_of_incorrect / total_rows)) * 100:.2f}%" | |
| perfect_match_percentage = f"{perfect_match_count / total_rows * 100:.2f}%" | |
| return styled_df, str(row_of_missing), missing_zero_percentage, str( | |
| row_of_incorrect), incorrect_zero_percentage, str(perfect_match_count), perfect_match_percentage | |
| def update_dataframe(file, clm): | |
| if file is None or not clm: | |
| return None, None, None, None, None, None, None | |
| return process_xlsx(file, clm) | |
| with gr.Blocks(fill_height=True) as demo: | |
| gr.Markdown("# XLSX File Processor") | |
| gr.Markdown("Upload an XLSX file to process and view selected rows.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload XLSX File") | |
| column_input = gr.Textbox(label="Enter Column Name", placeholder="Enter the column name to compare") | |
| process_button = gr.Button("Process") | |
| dataframe_output = gr.DataFrame( | |
| label="Processed Data", | |
| headers=['S.No', 'Missing Tag', 'Incorrect Tag', 'human_tag'], | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| missing_zero_label = gr.Label(label="No of threads with missing tags") | |
| missing_zero_percentage = gr.Label(label="Missing to cover tags Percentage:") | |
| with gr.Row(): | |
| incorrect_zero_label = gr.Label(label="No of threads with incorrect tags") | |
| incorrect_zero_percentage = gr.Label(label="Incorrect tags to cover Percentage:") | |
| with gr.Row(): | |
| perfect_match_label = gr.Label(label="Perfect Match Count:") | |
| perfect_match_percentage = gr.Label(label="Perfect Match Percentage:") | |
| process_button.click(update_dataframe, inputs=[file_input, column_input], outputs=[ | |
| dataframe_output, | |
| missing_zero_label, missing_zero_percentage, | |
| incorrect_zero_label, incorrect_zero_percentage, | |
| perfect_match_label, perfect_match_percentage | |
| ]) | |
| demo.launch() | |
| ################################################################################## | |
| # import gradio as gr | |
| # import pandas as pd | |
| # import numpy as np | |
| # import matplotlib.pyplot as plt | |
| # | |
| # | |
| # def process_xlsx(file): | |
| # # Read the Excel file | |
| # df = pd.read_excel(file.name) | |
| # | |
| # # Define a function to compare tags | |
| # def compare_tags(row): | |
| # htag = row['human_tag'] | |
| # ltag = row[f"{clm}"] | |
| # htag = row['human_tag'] | |
| # ltag = row[f"{clm}"] | |
| # if htag: | |
| # htag = str(htag) | |
| # if htag == 'nan': | |
| # htag = set() | |
| # else: | |
| # htag = set(htag.split(', ')) | |
| # if ltag: | |
| # ltag = str(ltag) | |
| # if ltag == 'nan': | |
| # ltag = set() | |
| # else: | |
| # ltag = set(ltag.split(', ')) | |
| # | |
| # if pd.isna(htag) or pd.isna(ltag): | |
| # return np.nan, np.nan, np.nan | |
| # | |
| # missing_tag = list(htag - ltag) | |
| # incorrect_tags = [tag for tag in ltag if tag not in htag] | |
| # correct_tags = list(htag & ltag) | |
| # | |
| # return len(missing_tag), len(incorrect_tags), len(correct_tags) | |
| # | |
| # # Apply the function to each row | |
| # df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand') | |
| # | |
| # # Calculate cumulative sums | |
| # df['Cumulative Missing'] = df['Missing Tag'].cumsum() | |
| # df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum() | |
| # df['Correct Tag'] = df['Correct Tag'].cumsum() | |
| # | |
| # # Get the last row's missing tag and incorrect tag counts | |
| # last_row = df.iloc[-1] | |
| # missing_tag_count = last_row['Cumulative Missing'] | |
| # incorrect_tag_count = last_row['Cumulative Incorrect'] | |
| # correct_tag = last_row['Correct Tag'] | |
| # | |
| # # Create pie chart | |
| # labels = ['Missing Tags', 'Incorrect Tags', 'Correct Tags'] | |
| # sizes = [missing_tag_count, incorrect_tag_count, correct_tag] | |
| # | |
| # plt.figure(figsize=(8, 8)) | |
| # patches, texts, autotexts = plt.pie(sizes, labels=labels, autopct='%1.1f%%', | |
| # startangle=90, textprops={'fontsize': 14}) | |
| # | |
| # plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. | |
| # plt.title("Tag Comparison", fontsize=16) | |
| # | |
| # # Save the figure | |
| # image_path = "tag_comparison_pie_chart.png" | |
| # plt.savefig(image_path) | |
| # plt.close() # Close the figure to free up memory | |
| # | |
| # return image_path | |
| # | |
| # | |
| # iface = gr.Interface( | |
| # process_xlsx, | |
| # gr.File(label="Upload XLSX File"), | |
| # "image", | |
| # title="XLSX File Processor", | |
| # description="Upload an XLSX file to process and get a pie chart comparison of tags." | |
| # ) | |
| # | |
| # iface.launch() | |