Spaces:
Sleeping
Sleeping
File size: 1,143 Bytes
04653e2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | # coding: utf-8
import pandas as pd
import random
# 1. Train-Test-Split
def train_test_split(df, test_size):
if isinstance(test_size, float):
test_size = round(test_size * len(df))
indices = df.index.tolist()
test_indices = random.sample(population=indices, k=test_size)
test_df = df.loc[test_indices]
train_df = df.drop(test_indices)
return train_df, test_df
# 2. Distinguish categorical and continuous features
def determine_type_of_feature(df):
feature_types = []
n_unique_values_treshold = 15
for feature in df.columns:
if feature != "label":
unique_values = df[feature].unique()
example_value = unique_values[0]
if (isinstance(example_value, str)) or (len(unique_values) <= n_unique_values_treshold):
feature_types.append("categorical")
else:
feature_types.append("continuous")
return feature_types
# 3. Accuracy
def calculate_accuracy(predictions, labels):
predictions_correct = predictions == labels
accuracy = predictions_correct.mean()
return accuracy |