Spaces:

MathanKarthik
/

acc

Build error

App Files Files Community

acc / app.py

MathanKarthik

Update app.py

f493e83 verified over 1 year ago

raw

history blame contribute delete

7.93 kB

	# import pandas as pd
	# import numpy as np
	#
	# # Read the Excel file
	# df = pd.read_excel('your_file.xlsx')
	#
	#
	# # Define a function to compare tags
	# def compare_tags(row):
	# htag = row['human_tag']
	# ltag = row[f"{clm}"]
	# if htag:
	# htag = str(htag)
	# if htag == 'nan':
	# htag = set()
	# else:
	# htag = set(htag.split(', '))
	# if ltag:
	# ltag = str(ltag)
	# if ltag == 'nan':
	# ltag = set()
	# else:
	# ltag = set(ltag.split(', '))
	#
	#
	# if pd.isna(htag) or pd.isna(ltag):
	# return np.nan, np.nan
	#
	# missing_tag = list(htag - ltag)
	# incorrect_tags = [tag for tag in ltag if tag not in htag]
	# correct_tags = list(htag & ltag)
	#
	# return len(missing_tag), len(incorrect_tags), len(correct_tags)
	#
	#
	# # Apply the function to each row
	# df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
	#
	# # Calculate cumulative sums
	# df['Cumulative Missing'] = df['Missing Tag'].cumsum()
	# df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum()
	# df['Correct Tag'] = df['Correct Tag'].cumsum()
	#
	# # Display the results
	# print(df)
	#
	# # Write the results to a new Excel file
	# df.to_excel('output.xlsx', index=False)

	#############################################################################
	import gradio as gr
	import pandas as pd
	import numpy as np

	def process_xlsx(file, clm):
	df = pd.read_excel(file.name)

	# Add S.No column
	df['S.No'] = range(1, len(df) + 1)

	def compare_tags(row):
	htag = row['human_tag']
	ltag = row.get(clm, None)
	if htag:
	htag = str(htag)
	if htag == 'nan':
	htag = set()
	else:
	htag = set(htag.split(', '))
	if ltag:
	ltag = str(ltag)
	if ltag == 'nan':
	ltag = set()
	else:
	ltag = set(ltag.split(', '))

	if pd.isna(htag) or pd.isna(ltag):
	return np.nan, np.nan, np.nan

	missing_tag = list(htag - ltag)
	incorrect_tags = [tag for tag in ltag if tag not in htag]
	correct_tags = list(htag & ltag)

	return len(missing_tag), len(incorrect_tags), len(correct_tags)

	df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')

	# Select only the desired columns
	desired_columns = ['S.No', 'first_subject', 'Missing Tag', 'Incorrect Tag', 'human_tag', clm]

	# Apply styling to highlight zeros in green
	def highlight_zeros(val):
	color = 'background-color: lightgreen' if val == 0 else ''
	return color

	styled_df = df[desired_columns].style.applymap(highlight_zeros, subset=['Missing Tag', 'Incorrect Tag'])

	# Calculate counts and percentages
	missing_zero_count = (df['Missing Tag'] == 0).sum()
	incorrect_zero_count = (df['Incorrect Tag'] == 0).sum()
	perfect_match_count = ((df['Missing Tag'] == 0) & (df['Incorrect Tag'] == 0)).sum()
	row_of_incorrect = (df['Incorrect Tag'] != 0).sum()
	row_of_missing = (df['Missing Tag'] != 0).sum()

	total_rows = len(df)

	missing_zero_percentage = f"{(1-(row_of_missing / total_rows)) * 100:.2f}%"
	incorrect_zero_percentage = f"{(1-(row_of_incorrect / total_rows)) * 100:.2f}%"
	perfect_match_percentage = f"{perfect_match_count / total_rows * 100:.2f}%"

	return styled_df, str(row_of_missing), missing_zero_percentage, str(
	row_of_incorrect), incorrect_zero_percentage, str(perfect_match_count), perfect_match_percentage

	def update_dataframe(file, clm):
	if file is None or not clm:
	return None, None, None, None, None, None, None
	return process_xlsx(file, clm)

	with gr.Blocks(fill_height=True) as demo:
	gr.Markdown("# XLSX File Processor")
	gr.Markdown("Upload an XLSX file to process and view selected rows.")

	with gr.Row():
	file_input = gr.File(label="Upload XLSX File")
	column_input = gr.Textbox(label="Enter Column Name", placeholder="Enter the column name to compare")
	process_button = gr.Button("Process")

	dataframe_output = gr.DataFrame(
	label="Processed Data",
	headers=['S.No', 'Missing Tag', 'Incorrect Tag', 'human_tag'],
	interactive=False
	)

	with gr.Row():
	missing_zero_label = gr.Label(label="No of threads with missing tags")
	missing_zero_percentage = gr.Label(label="Missing to cover tags Percentage:")

	with gr.Row():
	incorrect_zero_label = gr.Label(label="No of threads with incorrect tags")
	incorrect_zero_percentage = gr.Label(label="Incorrect tags to cover Percentage:")

	with gr.Row():
	perfect_match_label = gr.Label(label="Perfect Match Count:")
	perfect_match_percentage = gr.Label(label="Perfect Match Percentage:")

	process_button.click(update_dataframe, inputs=[file_input, column_input], outputs=[
	dataframe_output,
	missing_zero_label, missing_zero_percentage,
	incorrect_zero_label, incorrect_zero_percentage,
	perfect_match_label, perfect_match_percentage
	])

	demo.launch()


	##################################################################################
	# import gradio as gr
	# import pandas as pd
	# import numpy as np
	# import matplotlib.pyplot as plt
	#
	#
	# def process_xlsx(file):
	# # Read the Excel file
	# df = pd.read_excel(file.name)
	#
	# # Define a function to compare tags
	# def compare_tags(row):
	# htag = row['human_tag']
	# ltag = row[f"{clm}"]
	# htag = row['human_tag']
	# ltag = row[f"{clm}"]
	# if htag:
	# htag = str(htag)
	# if htag == 'nan':
	# htag = set()
	# else:
	# htag = set(htag.split(', '))
	# if ltag:
	# ltag = str(ltag)
	# if ltag == 'nan':
	# ltag = set()
	# else:
	# ltag = set(ltag.split(', '))
	#
	# if pd.isna(htag) or pd.isna(ltag):
	# return np.nan, np.nan, np.nan
	#
	# missing_tag = list(htag - ltag)
	# incorrect_tags = [tag for tag in ltag if tag not in htag]
	# correct_tags = list(htag & ltag)
	#
	# return len(missing_tag), len(incorrect_tags), len(correct_tags)
	#
	# # Apply the function to each row
	# df[['Missing Tag', 'Incorrect Tag', 'Correct Tag']] = df.apply(compare_tags, axis=1, result_type='expand')
	#
	# # Calculate cumulative sums
	# df['Cumulative Missing'] = df['Missing Tag'].cumsum()
	# df['Cumulative Incorrect'] = df['Incorrect Tag'].cumsum()
	# df['Correct Tag'] = df['Correct Tag'].cumsum()
	#
	# # Get the last row's missing tag and incorrect tag counts
	# last_row = df.iloc[-1]
	# missing_tag_count = last_row['Cumulative Missing']
	# incorrect_tag_count = last_row['Cumulative Incorrect']
	# correct_tag = last_row['Correct Tag']
	#
	# # Create pie chart
	# labels = ['Missing Tags', 'Incorrect Tags', 'Correct Tags']
	# sizes = [missing_tag_count, incorrect_tag_count, correct_tag]
	#
	# plt.figure(figsize=(8, 8))
	# patches, texts, autotexts = plt.pie(sizes, labels=labels, autopct='%1.1f%%',
	# startangle=90, textprops={'fontsize': 14})
	#
	# plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
	# plt.title("Tag Comparison", fontsize=16)
	#
	# # Save the figure
	# image_path = "tag_comparison_pie_chart.png"
	# plt.savefig(image_path)
	# plt.close() # Close the figure to free up memory
	#
	# return image_path
	#
	#
	# iface = gr.Interface(
	# process_xlsx,
	# gr.File(label="Upload XLSX File"),
	# "image",
	# title="XLSX File Processor",
	# description="Upload an XLSX file to process and get a pie chart comparison of tags."
	# )
	#
	# iface.launch()