import numpy as np import gradio as gr import pandas as pd def apply_scaling(method, train_df, test_df): if method == "Min-Max normalization": from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() scaler.fit(train_df) # get min, max for each column min_fea = scaler.data_min_ max_fea = scaler.data_max_ else: from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(train_df) # get min, max for each column mean_fea = scaler.mean_ var_fea = scaler.var_ scaled_train_data = scaler.transform(train_df) # apply min-max or standardization scaled_test_data = scaler.transform(test_df) # apply min-max or standardization scaled_train_data = pd.DataFrame(scaled_train_data, columns=["Normalized Feature 1", "Normalized Feature 2"]) scaled_train_data["Instance" ] = [i for i in range(0,len(scaled_train_data))] scaled_train_data = scaled_train_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]] if method == "Min-Max normalization": temp = {"Instance": 'Min used for scaling', "Normalized Feature 1": min_fea[0], "Normalized Feature 2": min_fea[1]} temp_stat1 = pd.DataFrame(temp, index=[0]) temp = {"Instance": 'Max used for scaling', "Normalized Feature 1": max_fea[0], "Normalized Feature 2": max_fea[1]} temp_stat2 = pd.DataFrame(temp, index=[0]) else: temp = {"Instance": 'Mean used for scaling', "Normalized Feature 1": mean_fea[0], "Normalized Feature 2": mean_fea[1]} temp_stat1 = pd.DataFrame(temp, index=[0]) temp = {"Instance": 'Std used for scaling', "Normalized Feature 1": np.sqrt(var_fea[0]), "Normalized Feature 2": np.sqrt(var_fea[1])} temp_stat2 = pd.DataFrame(temp, index=[0]) scaled_train_data = pd.concat([scaled_train_data, temp_stat1], ignore_index=True) scaled_train_data = pd.concat([scaled_train_data, temp_stat2], ignore_index=True) scaled_test_data = pd.DataFrame(scaled_test_data, columns=["Normalized Feature 1", "Normalized Feature 2"]) scaled_test_data["Instance" ] = [i for i in range(0,len(scaled_test_data))] scaled_test_data = scaled_test_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]] scaled_test_data = pd.concat([scaled_test_data, temp_stat1], ignore_index=True) scaled_test_data = pd.concat([scaled_test_data, temp_stat2], ignore_index=True) scaled_train_data["Normalized Feature 1"] = scaled_train_data["Normalized Feature 1"].round(3) scaled_train_data["Normalized Feature 2"] = scaled_train_data["Normalized Feature 2"].round(3) scaled_test_data["Normalized Feature 1"] = scaled_test_data["Normalized Feature 1"].round(3) scaled_test_data["Normalized Feature 2"] = scaled_test_data["Normalized Feature 2"].round(3) return scaled_train_data, scaled_test_data input_df1 = gr.Dataframe( headers=["Feature 1", "Feature 2"], datatype=["number", "number"], row_count=(4, "fixed"), col_count=(2, "fixed"), label = 'Training dataset' ) #input_method = gr.CheckboxGroup(["Min-Max normalization", "Standardization"], label="Feature Scaling Method") #input_method = gr.Textbox(value = "Min-Max normalization", label="Feature Scaling Method") input_method = gr.Dropdown( ["Min-Max normalization", "Standardization"], value="Min-Max normalization", label="Feature Scaling Method" ) input_df2 = gr.Dataframe( headers=["Feature 1", "Feature 2"], datatype=["number", "number"], row_count=(4, "fixed"), col_count=(2, "fixed"), label = 'Test dataset' ) output_df1 = gr.Dataframe( headers=["Instance", "Feature 1", "Feature 2"], datatype=["str","number", "number"], row_count=(6, "fixed"), col_count=(3, "fixed"), label = 'Normalized Training dataset' ) output_df2 = gr.Dataframe( headers=["Instance", "Feature 1", "Feature 2"], datatype=["str","number", "number"], row_count=(6, "fixed"), col_count=(3, "fixed"), label = 'Normalized Test dataset' ) train_df1 = pd.DataFrame(np.array([[3, 10], [6, 20], [3, 5], [8, 12]]), columns = ["Feature 1", "Feature 2"]) test_df1 = pd.DataFrame(np.array([[2, 7], [1, 9], [3, 6], [4, 12]]), columns = ["Feature 1", "Feature 2"]) train_df2 = pd.DataFrame(np.array([[1, 10], [3, 20], [4, 5], [5, 12]]), columns = ["Feature 1", "Feature 2"]) test_df2 = pd.DataFrame(np.array([[2, 11], [3, 12], [4, 7], [2, 18]]), columns = ["Feature 1", "Feature 2"]) train_df3 = pd.DataFrame(np.array([[2, 10], [4, 20], [4, 5], [5, 12]]), columns = ["Feature 1", "Feature 2"]) test_df3 = pd.DataFrame(np.array([[3, 11], [1, 12], [4, 7], [2, 18]]), columns = ["Feature 1", "Feature 2"]) ### configure Gradio interface = gr.Interface(fn=apply_scaling, inputs=[input_method, input_df1, input_df2], outputs= [output_df1, output_df2], title="CSCI4750/5750(hw01-PartI): Mathematics for KNN (Question 5: Feature Scaling)", description= "Click examples below for a quick demo, or change input values by clicking cells", theme = 'huggingface', examples = [["Min-Max normalization", train_df2, test_df2]], ) interface.launch(debug=True)