Spaces:
Sleeping
Sleeping
| # coding: utf-8 | |
| import pandas as pd | |
| import random | |
| # 1. Train-Test-Split | |
| def train_test_split(df, test_size): | |
| if isinstance(test_size, float): | |
| test_size = round(test_size * len(df)) | |
| indices = df.index.tolist() | |
| test_indices = random.sample(population=indices, k=test_size) | |
| test_df = df.loc[test_indices] | |
| train_df = df.drop(test_indices) | |
| return train_df, test_df | |
| # 2. Distinguish categorical and continuous features | |
| def determine_type_of_feature(df): | |
| feature_types = [] | |
| n_unique_values_treshold = 15 | |
| for feature in df.columns: | |
| if feature != "label": | |
| unique_values = df[feature].unique() | |
| example_value = unique_values[0] | |
| if (isinstance(example_value, str)) or (len(unique_values) <= n_unique_values_treshold): | |
| feature_types.append("categorical") | |
| else: | |
| feature_types.append("continuous") | |
| return feature_types | |
| # 3. Accuracy | |
| def calculate_accuracy(predictions, labels): | |
| predictions_correct = predictions == labels | |
| accuracy = predictions_correct.mean() | |
| return accuracy |