Spaces:

SLU-CSCI4750
/

Homework01-PartA-03

Sleeping

File size: 5,836 Bytes

import numpy as np
import gradio as gr

import pandas as pd

def apply_scaling(method, train_df, test_df):

  if method == "Min-Max normalization":
     from sklearn.preprocessing import MinMaxScaler
     scaler = MinMaxScaler()
     scaler.fit(train_df) # get min, max for each column

     min_fea = scaler.data_min_
     max_fea = scaler.data_max_
  else:
     from sklearn.preprocessing import StandardScaler
     scaler = StandardScaler()
     scaler.fit(train_df) # get min, max for each column

     mean_fea = scaler.mean_
     var_fea = scaler.var_
      
  scaled_train_data = scaler.transform(train_df) # apply min-max or standardization
  scaled_test_data = scaler.transform(test_df) # apply min-max or standardization

  scaled_train_data = pd.DataFrame(scaled_train_data, columns=["Normalized Feature 1", "Normalized Feature 2"])
  scaled_train_data["Instance" ] = [i for i in range(0,len(scaled_train_data))]
  scaled_train_data = scaled_train_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]]

  if method == "Min-Max normalization":
      temp = {"Instance": 'Min used for scaling', "Normalized Feature 1": min_fea[0],  "Normalized Feature 2": min_fea[1]}
      temp_stat1 = pd.DataFrame(temp, index=[0])
      temp = {"Instance": 'Max used for scaling', "Normalized Feature 1": max_fea[0],  "Normalized Feature 2": max_fea[1]}
      temp_stat2 = pd.DataFrame(temp, index=[0])
  else:
      temp = {"Instance": 'Mean used for scaling', "Normalized Feature 1": mean_fea[0],  "Normalized Feature 2": mean_fea[1]}
      temp_stat1 = pd.DataFrame(temp, index=[0])
      temp = {"Instance": 'Std used for scaling', "Normalized Feature 1": np.sqrt(var_fea[0]),  "Normalized Feature 2": np.sqrt(var_fea[1])}
      temp_stat2 = pd.DataFrame(temp, index=[0])

    
  scaled_train_data = pd.concat([scaled_train_data, temp_stat1], ignore_index=True)
  scaled_train_data = pd.concat([scaled_train_data, temp_stat2], ignore_index=True)

  scaled_test_data = pd.DataFrame(scaled_test_data, columns=["Normalized Feature 1", "Normalized Feature 2"])
  scaled_test_data["Instance" ] = [i for i in range(0,len(scaled_test_data))]
  scaled_test_data = scaled_test_data[["Instance", "Normalized Feature 1", "Normalized Feature 2"]]
  scaled_test_data = pd.concat([scaled_test_data, temp_stat1], ignore_index=True)
  scaled_test_data = pd.concat([scaled_test_data, temp_stat2], ignore_index=True)

  scaled_train_data["Normalized Feature 1"] = scaled_train_data["Normalized Feature 1"].round(3)
  scaled_train_data["Normalized Feature 2"] = scaled_train_data["Normalized Feature 2"].round(3)
  scaled_test_data["Normalized Feature 1"] = scaled_test_data["Normalized Feature 1"].round(3)
  scaled_test_data["Normalized Feature 2"] = scaled_test_data["Normalized Feature 2"].round(3)

  return scaled_train_data, scaled_test_data


input_df1 = gr.Dataframe(
            headers=["Feature 1", "Feature 2"],
            datatype=["number", "number"],
            row_count=(4, "fixed"),
            col_count=(2, "fixed"),
            label = 'Training dataset'
        )


#input_method = gr.CheckboxGroup(["Min-Max normalization", "Standardization"], label="Feature Scaling Method")

#input_method = gr.Textbox(value = "Min-Max normalization", label="Feature Scaling Method")

input_method = gr.Dropdown(
            ["Min-Max normalization", "Standardization"], value="Min-Max normalization", label="Feature Scaling Method"
        )


input_df2 = gr.Dataframe(
            headers=["Feature 1", "Feature 2"],
            datatype=["number", "number"],
            row_count=(4, "fixed"),
            col_count=(2, "fixed"),
            label = 'Test dataset'
        )

output_df1 = gr.Dataframe(
            headers=["Instance", "Feature 1", "Feature 2"],
            datatype=["str","number", "number"],
            row_count=(6, "fixed"),
            col_count=(3, "fixed"),
            label = 'Normalized Training dataset'
        )



output_df2 = gr.Dataframe(
            headers=["Instance", "Feature 1", "Feature 2"],
            datatype=["str","number", "number"],
            row_count=(6, "fixed"),
            col_count=(3, "fixed"),
            label = 'Normalized Test dataset'
        )



train_df1 = pd.DataFrame(np.array([[3, 10],
                 [6, 20],
                 [3, 5],
                 [8, 12]]), columns = ["Feature 1", "Feature 2"])
test_df1 = pd.DataFrame(np.array([[2, 7],
                 [1, 9],
                 [3, 6],
                 [4, 12]]), columns = ["Feature 1", "Feature 2"])



train_df2 = pd.DataFrame(np.array([[1, 10],
                 [3, 20],
                 [4, 5],
                 [5, 12]]), columns = ["Feature 1", "Feature 2"])
test_df2 = pd.DataFrame(np.array([[2, 11],
                 [3, 12],
                 [4, 7],
                 [2, 18]]), columns = ["Feature 1", "Feature 2"])


train_df3 = pd.DataFrame(np.array([[2, 10],
                 [4, 20],
                 [4, 5],
                 [5, 12]]), columns = ["Feature 1", "Feature 2"])
test_df3 = pd.DataFrame(np.array([[3, 11],
                 [1, 12],
                 [4, 7],
                 [2, 18]]), columns = ["Feature 1", "Feature 2"])

### configure Gradio
interface = gr.Interface(fn=apply_scaling, 
                         inputs=[input_method, input_df1, input_df2], 
                         outputs= [output_df1, output_df2],
                         
                         title="CSCI4750/5750(hw01-PartI): Mathematics for KNN (Question 5: Feature Scaling)", 
                         description= "Click examples below for a quick demo, or change input values by clicking cells",
                         theme = 'huggingface',
                         examples = [["Min-Max normalization", train_df2, test_df2]],
                         )


interface.launch(debug=True)