Spaces:
Sleeping
Sleeping
File size: 5,836 Bytes
500cc34 d2dea50 9f7f620 d2dea50 500cc34 d2dea50 60f7930 d2dea50 9f7f620 d2dea50 500cc34 d2dea50 500cc34 d2dea50 500cc34 ccf7836 500cc34 d2dea50 6ec63bb d2dea50 9446416 314bdac 9446416 d2dea50 500cc34 ccf7836 500cc34 48c3e65 5948e98 48c3e65 5948e98 500cc34 d2dea50 500cc34 b581d09 aa3a0c8 500cc34 f1a3640 500cc34 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import numpy as np
import gradio as gr
import pandas as pd
def apply_scaling(method, train_df, test_df):
if method == "Min-Max normalization":
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train_df) # get min, max for each column
min_fea = scaler.data_min_
max_fea = scaler.data_max_
else:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_df) # get min, max for each column
mean_fea = scaler.mean_
var_fea = scaler.var_
scaled_train_data = scaler.transform(train_df) # apply min-max or standardization
scaled_test_data = scaler.transform(test_df) # apply min-max or standardization
scaled_train_data = pd.DataFrame(scaled_train_data, columns=["Normalized Feature 1", "Normalized Feature 2"])
scaled_train_data["Instance" ] = [i for i in range(0,len(scaled_train_data))]
scaled_train_data = scaled_train_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]]
if method == "Min-Max normalization":
temp = {"Instance": 'Min used for scaling', "Normalized Feature 1": min_fea[0], "Normalized Feature 2": min_fea[1]}
temp_stat1 = pd.DataFrame(temp, index=[0])
temp = {"Instance": 'Max used for scaling', "Normalized Feature 1": max_fea[0], "Normalized Feature 2": max_fea[1]}
temp_stat2 = pd.DataFrame(temp, index=[0])
else:
temp = {"Instance": 'Mean used for scaling', "Normalized Feature 1": mean_fea[0], "Normalized Feature 2": mean_fea[1]}
temp_stat1 = pd.DataFrame(temp, index=[0])
temp = {"Instance": 'Std used for scaling', "Normalized Feature 1": np.sqrt(var_fea[0]), "Normalized Feature 2": np.sqrt(var_fea[1])}
temp_stat2 = pd.DataFrame(temp, index=[0])
scaled_train_data = pd.concat([scaled_train_data, temp_stat1], ignore_index=True)
scaled_train_data = pd.concat([scaled_train_data, temp_stat2], ignore_index=True)
scaled_test_data = pd.DataFrame(scaled_test_data, columns=["Normalized Feature 1", "Normalized Feature 2"])
scaled_test_data["Instance" ] = [i for i in range(0,len(scaled_test_data))]
scaled_test_data = scaled_test_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]]
scaled_test_data = pd.concat([scaled_test_data, temp_stat1], ignore_index=True)
scaled_test_data = pd.concat([scaled_test_data, temp_stat2], ignore_index=True)
scaled_train_data["Normalized Feature 1"] = scaled_train_data["Normalized Feature 1"].round(3)
scaled_train_data["Normalized Feature 2"] = scaled_train_data["Normalized Feature 2"].round(3)
scaled_test_data["Normalized Feature 1"] = scaled_test_data["Normalized Feature 1"].round(3)
scaled_test_data["Normalized Feature 2"] = scaled_test_data["Normalized Feature 2"].round(3)
return scaled_train_data, scaled_test_data
input_df1 = gr.Dataframe(
headers=["Feature 1", "Feature 2"],
datatype=["number", "number"],
row_count=(4, "fixed"),
col_count=(2, "fixed"),
label = 'Training dataset'
)
#input_method = gr.CheckboxGroup(["Min-Max normalization", "Standardization"], label="Feature Scaling Method")
#input_method = gr.Textbox(value = "Min-Max normalization", label="Feature Scaling Method")
input_method = gr.Dropdown(
["Min-Max normalization", "Standardization"], value="Min-Max normalization", label="Feature Scaling Method"
)
input_df2 = gr.Dataframe(
headers=["Feature 1", "Feature 2"],
datatype=["number", "number"],
row_count=(4, "fixed"),
col_count=(2, "fixed"),
label = 'Test dataset'
)
output_df1 = gr.Dataframe(
headers=["Instance", "Feature 1", "Feature 2"],
datatype=["str","number", "number"],
row_count=(6, "fixed"),
col_count=(3, "fixed"),
label = 'Normalized Training dataset'
)
output_df2 = gr.Dataframe(
headers=["Instance", "Feature 1", "Feature 2"],
datatype=["str","number", "number"],
row_count=(6, "fixed"),
col_count=(3, "fixed"),
label = 'Normalized Test dataset'
)
train_df1 = pd.DataFrame(np.array([[3, 10],
[6, 20],
[3, 5],
[8, 12]]), columns = ["Feature 1", "Feature 2"])
test_df1 = pd.DataFrame(np.array([[2, 7],
[1, 9],
[3, 6],
[4, 12]]), columns = ["Feature 1", "Feature 2"])
train_df2 = pd.DataFrame(np.array([[1, 10],
[3, 20],
[4, 5],
[5, 12]]), columns = ["Feature 1", "Feature 2"])
test_df2 = pd.DataFrame(np.array([[2, 11],
[3, 12],
[4, 7],
[2, 18]]), columns = ["Feature 1", "Feature 2"])
train_df3 = pd.DataFrame(np.array([[2, 10],
[4, 20],
[4, 5],
[5, 12]]), columns = ["Feature 1", "Feature 2"])
test_df3 = pd.DataFrame(np.array([[3, 11],
[1, 12],
[4, 7],
[2, 18]]), columns = ["Feature 1", "Feature 2"])
### configure Gradio
interface = gr.Interface(fn=apply_scaling,
inputs=[input_method, input_df1, input_df2],
outputs= [output_df1, output_df2],
title="CSCI4750/5750(hw01-PartI): Mathematics for KNN (Question 5: Feature Scaling)",
description= "Click examples below for a quick demo, or change input values by clicking cells",
theme = 'huggingface',
examples = [["Min-Max normalization", train_df2, test_df2]],
)
interface.launch(debug=True)
|