modefinder / app.py
qwekuaryee's picture
add share
b3ec40e verified
import gradio as gr
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import tempfile
def process_file(file):
df = pd.read_excel(file.name)
df.replace(0, pd.NA, inplace=True)
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
intermediate_file_path = tmp.name
df.to_excel(intermediate_file_path, index=False)
wb = load_workbook(intermediate_file_path)
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
data = ws.values
columns = next(data)[0:]
df = pd.DataFrame(data, columns=columns)
unique_states = df['Name of State'].unique()
for state in unique_states:
state_str = str(state)
state_data = df[df['Name of State'] == state]
unique_channels = state_data['Outlet Channel'].unique()
mode_data = []
for channel in unique_channels:
channel_str = str(channel)
channel_data = state_data[state_data['Outlet Channel'] == channel]
count_series = channel_data.count()
mode_series = channel_data.mode().iloc[0]
numeric_columns = channel_data.select_dtypes(include='number').columns
max_series = channel_data[numeric_columns].max()
min_series = channel_data[numeric_columns].min()
combined_series = pd.concat([
count_series.rename('COUNT'),
mode_series.rename('MODE'),
max_series.rename('MAX'),
min_series.rename('MIN')
], axis=1).T
combined_series.loc[:,'STATE'] = state_str
combined_series.loc[:,'CHANNEL'] = channel_str
mode_data.append(combined_series)
mode_df = pd.concat(mode_data)
new_ws = wb.create_sheet(title=state_str)
for r in dataframe_to_rows(mode_df, index=False, header=True):
new_ws.append(r)
output_file_path = 'state_and_channel_modes.xlsx'
wb.save(output_file_path)
return output_file_path
def process_regional(file):
df = pd.read_excel(file.name)
df.replace(0, pd.NA, inplace=True)
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
intermediate_file_path = tmp.name
df.to_excel(intermediate_file_path, index=False)
wb = load_workbook(intermediate_file_path)
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
data = ws.values
columns = next(data)[0:]
df = pd.DataFrame(data, columns=columns)
unique_states = df['Region'].unique()
for state in unique_states:
state_str = str(state)
state_data = df[df['Region'] == state]
unique_channels = state_data['Outlet Channel'].unique()
mode_data = []
for channel in unique_channels:
channel_str = str(channel)
channel_data = state_data[state_data['Outlet Channel'] == channel]
count_series = channel_data.count()
mode_series = channel_data.mode().iloc[0]
numeric_columns = channel_data.select_dtypes(include='number').columns
max_series = channel_data[numeric_columns].max()
min_series = channel_data[numeric_columns].min()
combined_series = pd.concat([
count_series.rename('COUNT'),
mode_series.rename('MODE'),
max_series.rename('MAX'),
min_series.rename('MIN')
], axis=1).T
combined_series.loc[:,'STATE'] = state_str
combined_series.loc[:,'CHANNEL'] = channel_str
mode_data.append(combined_series)
mode_df = pd.concat(mode_data)
new_ws = wb.create_sheet(title=state_str)
for r in dataframe_to_rows(mode_df, index=False, header=True):
new_ws.append(r)
output_file_path = 'regional.xlsx'
wb.save(output_file_path)
return output_file_path
def process_national(file):
df = pd.read_excel(file.name)
df.replace(0, pd.NA, inplace=True)
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
intermediate_file_path = tmp.name
df.to_excel(intermediate_file_path, index=False)
wb = load_workbook(intermediate_file_path)
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
data = ws.values
columns = next(data)[0:]
df = pd.DataFrame(data, columns=columns)
unique_states = df['Nation'].unique()
for state in unique_states:
state_str = str(state)
state_data = df[df['Nation'] == state]
unique_channels = state_data['Outlet Channel'].unique()
mode_data = []
for channel in unique_channels:
channel_str = str(channel)
channel_data = state_data[state_data['Outlet Channel'] == channel]
count_series = channel_data.count()
mode_series = channel_data.mode().iloc[0]
numeric_columns = channel_data.select_dtypes(include='number').columns
max_series = channel_data[numeric_columns].max()
min_series = channel_data[numeric_columns].min()
combined_series = pd.concat([
count_series.rename('COUNT'),
mode_series.rename('MODE'),
max_series.rename('MAX'),
min_series.rename('MIN')
], axis=1).T
combined_series.loc[:,'STATE'] = state_str
combined_series.loc[:,'CHANNEL'] = channel_str
mode_data.append(combined_series)
mode_df = pd.concat(mode_data)
new_ws = wb.create_sheet(title=state_str)
for r in dataframe_to_rows(mode_df, index=False, header=True):
new_ws.append(r)
output_file_path = 'national.xlsx'
wb.save(output_file_path)
return output_file_path
iface = gr.Interface(
fn=process_file,
inputs=gr.File(file_types=[".xlsx"]),
outputs=gr.File(),
title="Excel File Processor",
description="Upload an Excel file to process it and generate a new file with state and channel modes."
)
iface2 = gr.Interface(
fn=process_regional,
inputs=gr.File(file_types=[".xlsx"]),
outputs=gr.File(),
title="Excel File Processor",
description="Upload an Excel file to process it and generate a new file with regional and channel modes."
)
iface3 = gr.Interface(
fn=process_national,
inputs=gr.File(file_types=[".xlsx"]),
outputs=gr.File(),
title="Excel File Processor",
description="Upload an Excel file to process it and generate a new file with national and channel modes."
)
gr.TabbedInterface([iface3, iface2, iface], tab_names=['National', 'Region', 'State']).launch(share=True)