Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import gradio as gr | |
| import pandas as pd | |
| def apply_scaling(method, train_df, test_df): | |
| if method == "Min-Max normalization": | |
| from sklearn.preprocessing import MinMaxScaler | |
| scaler = MinMaxScaler() | |
| scaler.fit(train_df) # get min, max for each column | |
| min_fea = scaler.data_min_ | |
| max_fea = scaler.data_max_ | |
| else: | |
| from sklearn.preprocessing import StandardScaler | |
| scaler = StandardScaler() | |
| scaler.fit(train_df) # get min, max for each column | |
| mean_fea = scaler.mean_ | |
| var_fea = scaler.var_ | |
| scaled_train_data = scaler.transform(train_df) # apply min-max or standardization | |
| scaled_test_data = scaler.transform(test_df) # apply min-max or standardization | |
| scaled_train_data = pd.DataFrame(scaled_train_data, columns=["Normalized Feature 1", "Normalized Feature 2"]) | |
| scaled_train_data["Instance" ] = [i for i in range(0,len(scaled_train_data))] | |
| scaled_train_data = scaled_train_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]] | |
| if method == "Min-Max normalization": | |
| temp = {"Instance": 'Min used for scaling', "Normalized Feature 1": min_fea[0], "Normalized Feature 2": min_fea[1]} | |
| temp_stat1 = pd.DataFrame(temp, index=[0]) | |
| temp = {"Instance": 'Max used for scaling', "Normalized Feature 1": max_fea[0], "Normalized Feature 2": max_fea[1]} | |
| temp_stat2 = pd.DataFrame(temp, index=[0]) | |
| else: | |
| temp = {"Instance": 'Mean used for scaling', "Normalized Feature 1": mean_fea[0], "Normalized Feature 2": mean_fea[1]} | |
| temp_stat1 = pd.DataFrame(temp, index=[0]) | |
| temp = {"Instance": 'Std used for scaling', "Normalized Feature 1": np.sqrt(var_fea[0]), "Normalized Feature 2": np.sqrt(var_fea[1])} | |
| temp_stat2 = pd.DataFrame(temp, index=[0]) | |
| scaled_train_data = pd.concat([scaled_train_data, temp_stat1], ignore_index=True) | |
| scaled_train_data = pd.concat([scaled_train_data, temp_stat2], ignore_index=True) | |
| scaled_test_data = pd.DataFrame(scaled_test_data, columns=["Normalized Feature 1", "Normalized Feature 2"]) | |
| scaled_test_data["Instance" ] = [i for i in range(0,len(scaled_test_data))] | |
| scaled_test_data = scaled_test_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]] | |
| scaled_test_data = pd.concat([scaled_test_data, temp_stat1], ignore_index=True) | |
| scaled_test_data = pd.concat([scaled_test_data, temp_stat2], ignore_index=True) | |
| scaled_train_data["Normalized Feature 1"] = scaled_train_data["Normalized Feature 1"].round(3) | |
| scaled_train_data["Normalized Feature 2"] = scaled_train_data["Normalized Feature 2"].round(3) | |
| scaled_test_data["Normalized Feature 1"] = scaled_test_data["Normalized Feature 1"].round(3) | |
| scaled_test_data["Normalized Feature 2"] = scaled_test_data["Normalized Feature 2"].round(3) | |
| return scaled_train_data, scaled_test_data | |
| input_df1 = gr.Dataframe( | |
| headers=["Feature 1", "Feature 2"], | |
| datatype=["number", "number"], | |
| row_count=(4, "fixed"), | |
| col_count=(2, "fixed"), | |
| label = 'Training dataset' | |
| ) | |
| #input_method = gr.CheckboxGroup(["Min-Max normalization", "Standardization"], label="Feature Scaling Method") | |
| #input_method = gr.Textbox(value = "Min-Max normalization", label="Feature Scaling Method") | |
| input_method = gr.Dropdown( | |
| ["Min-Max normalization", "Standardization"], value="Min-Max normalization", label="Feature Scaling Method" | |
| ) | |
| input_df2 = gr.Dataframe( | |
| headers=["Feature 1", "Feature 2"], | |
| datatype=["number", "number"], | |
| row_count=(4, "fixed"), | |
| col_count=(2, "fixed"), | |
| label = 'Test dataset' | |
| ) | |
| output_df1 = gr.Dataframe( | |
| headers=["Instance", "Feature 1", "Feature 2"], | |
| datatype=["str","number", "number"], | |
| row_count=(6, "fixed"), | |
| col_count=(3, "fixed"), | |
| label = 'Normalized Training dataset' | |
| ) | |
| output_df2 = gr.Dataframe( | |
| headers=["Instance", "Feature 1", "Feature 2"], | |
| datatype=["str","number", "number"], | |
| row_count=(6, "fixed"), | |
| col_count=(3, "fixed"), | |
| label = 'Normalized Test dataset' | |
| ) | |
| train_df1 = pd.DataFrame(np.array([[3, 10], | |
| [6, 20], | |
| [3, 5], | |
| [8, 12]]), columns = ["Feature 1", "Feature 2"]) | |
| test_df1 = pd.DataFrame(np.array([[2, 7], | |
| [1, 9], | |
| [3, 6], | |
| [4, 12]]), columns = ["Feature 1", "Feature 2"]) | |
| train_df2 = pd.DataFrame(np.array([[1, 10], | |
| [3, 20], | |
| [4, 5], | |
| [5, 12]]), columns = ["Feature 1", "Feature 2"]) | |
| test_df2 = pd.DataFrame(np.array([[2, 11], | |
| [3, 12], | |
| [4, 7], | |
| [2, 18]]), columns = ["Feature 1", "Feature 2"]) | |
| train_df3 = pd.DataFrame(np.array([[2, 10], | |
| [4, 20], | |
| [4, 5], | |
| [5, 12]]), columns = ["Feature 1", "Feature 2"]) | |
| test_df3 = pd.DataFrame(np.array([[3, 11], | |
| [1, 12], | |
| [4, 7], | |
| [2, 18]]), columns = ["Feature 1", "Feature 2"]) | |
| ### configure Gradio | |
| interface = gr.Interface(fn=apply_scaling, | |
| inputs=[input_method, input_df1, input_df2], | |
| outputs= [output_df1, output_df2], | |
| title="CSCI4750/5750(hw01-PartI): Mathematics for KNN (Question 5: Feature Scaling)", | |
| description= "Click examples below for a quick demo, or change input values by clicking cells", | |
| theme = 'huggingface', | |
| examples = [["Min-Max normalization", train_df2, test_df2]], | |
| ) | |
| interface.launch(debug=True) | |