local_copilot / data /helper_functions.py
Kash6's picture
Deploy AI Coding Assistant
04653e2
# coding: utf-8
import pandas as pd
import random
# 1. Train-Test-Split
def train_test_split(df, test_size):
if isinstance(test_size, float):
test_size = round(test_size * len(df))
indices = df.index.tolist()
test_indices = random.sample(population=indices, k=test_size)
test_df = df.loc[test_indices]
train_df = df.drop(test_indices)
return train_df, test_df
# 2. Distinguish categorical and continuous features
def determine_type_of_feature(df):
feature_types = []
n_unique_values_treshold = 15
for feature in df.columns:
if feature != "label":
unique_values = df[feature].unique()
example_value = unique_values[0]
if (isinstance(example_value, str)) or (len(unique_values) <= n_unique_values_treshold):
feature_types.append("categorical")
else:
feature_types.append("continuous")
return feature_types
# 3. Accuracy
def calculate_accuracy(predictions, labels):
predictions_correct = predictions == labels
accuracy = predictions_correct.mean()
return accuracy