HackHPC / README.md
Seanyoon's picture
Create README.md
7b2b370
|
raw
history blame
445 Bytes

import pandas as pd

def preprocess_data(data): nc = len(data.columns) nr = len(data.index) new = [0] * nc

for i in range(nc):
    new[i] = len(data.iloc[:, i].unique()) / nr

sorted_index = sorted(range(len(new)), key=lambda k: new[k], reverse=True)

sensitive_cols = list(data.columns[sorted_index[i]] for i in range(nc) if new[sorted_index[i]] > 0.5)
data = data.drop(columns=sensitive_cols)

return data