jiehou's picture
Update app.py
b581d09 verified
import numpy as np
import gradio as gr
import pandas as pd
def apply_scaling(method, train_df, test_df):
if method == "Min-Max normalization":
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train_df) # get min, max for each column
min_fea = scaler.data_min_
max_fea = scaler.data_max_
else:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_df) # get min, max for each column
mean_fea = scaler.mean_
var_fea = scaler.var_
scaled_train_data = scaler.transform(train_df) # apply min-max or standardization
scaled_test_data = scaler.transform(test_df) # apply min-max or standardization
scaled_train_data = pd.DataFrame(scaled_train_data, columns=["Normalized Feature 1", "Normalized Feature 2"])
scaled_train_data["Instance" ] = [i for i in range(0,len(scaled_train_data))]
scaled_train_data = scaled_train_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]]
if method == "Min-Max normalization":
temp = {"Instance": 'Min used for scaling', "Normalized Feature 1": min_fea[0], "Normalized Feature 2": min_fea[1]}
temp_stat1 = pd.DataFrame(temp, index=[0])
temp = {"Instance": 'Max used for scaling', "Normalized Feature 1": max_fea[0], "Normalized Feature 2": max_fea[1]}
temp_stat2 = pd.DataFrame(temp, index=[0])
else:
temp = {"Instance": 'Mean used for scaling', "Normalized Feature 1": mean_fea[0], "Normalized Feature 2": mean_fea[1]}
temp_stat1 = pd.DataFrame(temp, index=[0])
temp = {"Instance": 'Std used for scaling', "Normalized Feature 1": np.sqrt(var_fea[0]), "Normalized Feature 2": np.sqrt(var_fea[1])}
temp_stat2 = pd.DataFrame(temp, index=[0])
scaled_train_data = pd.concat([scaled_train_data, temp_stat1], ignore_index=True)
scaled_train_data = pd.concat([scaled_train_data, temp_stat2], ignore_index=True)
scaled_test_data = pd.DataFrame(scaled_test_data, columns=["Normalized Feature 1", "Normalized Feature 2"])
scaled_test_data["Instance" ] = [i for i in range(0,len(scaled_test_data))]
scaled_test_data = scaled_test_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]]
scaled_test_data = pd.concat([scaled_test_data, temp_stat1], ignore_index=True)
scaled_test_data = pd.concat([scaled_test_data, temp_stat2], ignore_index=True)
scaled_train_data["Normalized Feature 1"] = scaled_train_data["Normalized Feature 1"].round(3)
scaled_train_data["Normalized Feature 2"] = scaled_train_data["Normalized Feature 2"].round(3)
scaled_test_data["Normalized Feature 1"] = scaled_test_data["Normalized Feature 1"].round(3)
scaled_test_data["Normalized Feature 2"] = scaled_test_data["Normalized Feature 2"].round(3)
return scaled_train_data, scaled_test_data
input_df1 = gr.Dataframe(
headers=["Feature 1", "Feature 2"],
datatype=["number", "number"],
row_count=(4, "fixed"),
col_count=(2, "fixed"),
label = 'Training dataset'
)
#input_method = gr.CheckboxGroup(["Min-Max normalization", "Standardization"], label="Feature Scaling Method")
#input_method = gr.Textbox(value = "Min-Max normalization", label="Feature Scaling Method")
input_method = gr.Dropdown(
["Min-Max normalization", "Standardization"], value="Min-Max normalization", label="Feature Scaling Method"
)
input_df2 = gr.Dataframe(
headers=["Feature 1", "Feature 2"],
datatype=["number", "number"],
row_count=(4, "fixed"),
col_count=(2, "fixed"),
label = 'Test dataset'
)
output_df1 = gr.Dataframe(
headers=["Instance", "Feature 1", "Feature 2"],
datatype=["str","number", "number"],
row_count=(6, "fixed"),
col_count=(3, "fixed"),
label = 'Normalized Training dataset'
)
output_df2 = gr.Dataframe(
headers=["Instance", "Feature 1", "Feature 2"],
datatype=["str","number", "number"],
row_count=(6, "fixed"),
col_count=(3, "fixed"),
label = 'Normalized Test dataset'
)
train_df1 = pd.DataFrame(np.array([[3, 10],
[6, 20],
[3, 5],
[8, 12]]), columns = ["Feature 1", "Feature 2"])
test_df1 = pd.DataFrame(np.array([[2, 7],
[1, 9],
[3, 6],
[4, 12]]), columns = ["Feature 1", "Feature 2"])
train_df2 = pd.DataFrame(np.array([[1, 10],
[3, 20],
[4, 5],
[5, 12]]), columns = ["Feature 1", "Feature 2"])
test_df2 = pd.DataFrame(np.array([[2, 11],
[3, 12],
[4, 7],
[2, 18]]), columns = ["Feature 1", "Feature 2"])
train_df3 = pd.DataFrame(np.array([[2, 10],
[4, 20],
[4, 5],
[5, 12]]), columns = ["Feature 1", "Feature 2"])
test_df3 = pd.DataFrame(np.array([[3, 11],
[1, 12],
[4, 7],
[2, 18]]), columns = ["Feature 1", "Feature 2"])
### configure Gradio
interface = gr.Interface(fn=apply_scaling,
inputs=[input_method, input_df1, input_df2],
outputs= [output_df1, output_df2],
title="CSCI4750/5750(hw01-PartI): Mathematics for KNN (Question 5: Feature Scaling)",
description= "Click examples below for a quick demo, or change input values by clicking cells",
theme = 'huggingface',
examples = [["Min-Max normalization", train_df2, test_df2]],
)
interface.launch(debug=True)