Spaces:
Sleeping
Sleeping
Added MaxAE, MedAE and Execution Time; Updated algorithms
Browse files- DecisionTreeRegressor.py +1 -1
- RandomForestRegressor.py +3 -5
- app.py +46 -33
DecisionTreeRegressor.py
CHANGED
|
@@ -12,7 +12,7 @@ class Node():
|
|
| 12 |
|
| 13 |
|
| 14 |
class DecisionTreeRegressor():
|
| 15 |
-
def __init__(self, min_samples_split, max_depth
|
| 16 |
self.root = None
|
| 17 |
self.min_samples_split = min_samples_split
|
| 18 |
self.max_depth = max_depth
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
class DecisionTreeRegressor():
|
| 15 |
+
def __init__(self, min_samples_split, max_depth):
|
| 16 |
self.root = None
|
| 17 |
self.min_samples_split = min_samples_split
|
| 18 |
self.max_depth = max_depth
|
RandomForestRegressor.py
CHANGED
|
@@ -3,11 +3,10 @@ from sklearn.tree import DecisionTreeRegressor as SKLearnDecisionTreeRegressor
|
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
class RandomForestRegressor:
|
| 6 |
-
def __init__(self, n_estimators, max_depth, min_samples_split,
|
| 7 |
self.n_estimators = n_estimators
|
| 8 |
self.max_depth = max_depth
|
| 9 |
self.min_samples_split = min_samples_split
|
| 10 |
-
self.min_samples_leaf = min_samples_leaf
|
| 11 |
self.trees = []
|
| 12 |
self.custom = custom
|
| 13 |
|
|
@@ -15,8 +14,7 @@ class RandomForestRegressor:
|
|
| 15 |
if tree_params is None:
|
| 16 |
tree_params = {
|
| 17 |
'max_depth': self.max_depth,
|
| 18 |
-
'min_samples_split': self.min_samples_split
|
| 19 |
-
'min_samples_leaf': self.min_samples_leaf
|
| 20 |
}
|
| 21 |
|
| 22 |
# Convert X and y to NumPy arrays
|
|
@@ -43,4 +41,4 @@ class RandomForestRegressor:
|
|
| 43 |
for i, tree in enumerate(self.trees):
|
| 44 |
predictions[:, i] = tree.predict(X.values)
|
| 45 |
|
| 46 |
-
return np.mean(predictions, axis=1)
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
class RandomForestRegressor:
|
| 6 |
+
def __init__(self, n_estimators, max_depth, min_samples_split, custom=True):
|
| 7 |
self.n_estimators = n_estimators
|
| 8 |
self.max_depth = max_depth
|
| 9 |
self.min_samples_split = min_samples_split
|
|
|
|
| 10 |
self.trees = []
|
| 11 |
self.custom = custom
|
| 12 |
|
|
|
|
| 14 |
if tree_params is None:
|
| 15 |
tree_params = {
|
| 16 |
'max_depth': self.max_depth,
|
| 17 |
+
'min_samples_split': self.min_samples_split
|
|
|
|
| 18 |
}
|
| 19 |
|
| 20 |
# Convert X and y to NumPy arrays
|
|
|
|
| 41 |
for i, tree in enumerate(self.trees):
|
| 42 |
predictions[:, i] = tree.predict(X.values)
|
| 43 |
|
| 44 |
+
return np.mean(predictions, axis=1)
|
app.py
CHANGED
|
@@ -3,7 +3,8 @@ import gradio as gr
|
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
|
| 5 |
from sklearn.model_selection import train_test_split
|
| 6 |
-
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score
|
|
|
|
| 7 |
|
| 8 |
from RandomForestRegressor import RandomForestRegressor
|
| 9 |
from LinearRegression import LinearRegression
|
|
@@ -92,8 +93,7 @@ def residual_plot_graph(x, y, color='black'):
|
|
| 92 |
# Decision Tree - Custom
|
| 93 |
def decision_tree(X_train, y_train, X_test, max_depth, min_samples_split):
|
| 94 |
Custom_Decision_Tree_Regressor = DecisionTreeRegressor(max_depth=max_depth,
|
| 95 |
-
min_samples_split=min_samples_split
|
| 96 |
-
min_samples_leaf=None)
|
| 97 |
Custom_Decision_Tree_Regressor.fit(X_train.values, y_train.values)
|
| 98 |
Custom_Decision_Tree_Regressor_Prediction = Custom_Decision_Tree_Regressor.predict(X_test.values)
|
| 99 |
return Custom_Decision_Tree_Regressor_Prediction
|
|
@@ -110,10 +110,9 @@ def decision_tree_sklearn(X_train, y_train, X_test, max_depth, min_samples_split
|
|
| 110 |
|
| 111 |
|
| 112 |
# Random Forest - Custom
|
| 113 |
-
def random_forest(X_train, y_train, X_test, n_estimators, max_depth, min_samples_split
|
| 114 |
Custom_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,
|
| 115 |
-
min_samples_split=min_samples_split
|
| 116 |
-
min_samples_leaf=min_samples_leaf)
|
| 117 |
Custom_Random_Forest_Regressor.fit(X_train, y_train)
|
| 118 |
Custom_Random_Forest_Regressor_Prediction = Custom_Random_Forest_Regressor.predict(X_test)
|
| 119 |
return Custom_Random_Forest_Regressor_Prediction
|
|
@@ -132,7 +131,6 @@ def random_forest_sklearn_decision_trees(X_train, y_train, X_test, n_estimators,
|
|
| 132 |
SKLearn_Decision_Trees_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators,
|
| 133 |
max_depth=max_depth,
|
| 134 |
min_samples_split=min_samples_split,
|
| 135 |
-
min_samples_leaf=min_samples_leaf,
|
| 136 |
custom=False)
|
| 137 |
SKLearn_Decision_Trees_Random_Forest_Regressor.fit(X_train, y_train)
|
| 138 |
SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction = SKLearn_Decision_Trees_Random_Forest_Regressor.predict(
|
|
@@ -157,14 +155,20 @@ def linear_regression_sklearn(X_train, y_train, X_test):
|
|
| 157 |
|
| 158 |
|
| 159 |
def evaluate_algorithm(algorithm_function, X_train, y_train, X_test, y_test, algorithm_parameters):
|
|
|
|
| 160 |
prediction = algorithm_function(X_train, y_train, X_test, **algorithm_parameters)
|
|
|
|
| 161 |
mae = mean_absolute_error(y_test, prediction)
|
| 162 |
mse = mean_squared_error(y_test, prediction)
|
| 163 |
rmse = mean_squared_error(y_test, prediction, squared=True)
|
| 164 |
r2 = r2_score(y_test, prediction)
|
| 165 |
variance = explained_variance_score(y_test, prediction)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
prediction_results = pd.DataFrame(prediction)
|
| 167 |
-
return prediction_results, mae, mse, rmse, r2, variance
|
| 168 |
|
| 169 |
|
| 170 |
# Used both for the "All" button and for the filtered data using all algorithms
|
|
@@ -172,63 +176,69 @@ def process_all_algorithms(dt_max_depth, dt_min_samples_split, dt_min_samples_le
|
|
| 172 |
lr_learning_rate, lr_num_iterations):
|
| 173 |
results = {}
|
| 174 |
# Decision Tree - Custom
|
| 175 |
-
prediction_dt, mae_dt, mse_dt, rmse_dt, r2_dt, variance_dt = evaluate_algorithm(
|
| 176 |
decision_tree, X_train, y_train, X_test, y_test,
|
| 177 |
{"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
|
| 178 |
|
| 179 |
results["Decision Tree - Custom"] = {"Algorithm": "Decision Tree - Custom", "MAE": mae_dt, "MSE": mse_dt,
|
| 180 |
-
"RMSE": rmse_dt, "R2": r2_dt, "Explained Variance": variance_dt
|
|
|
|
| 181 |
|
| 182 |
# Decision Tree - SKLearn
|
| 183 |
-
prediction_dts, mae_dts, mse_dts, rmse_dts, r2_dts, variance_dts = evaluate_algorithm(
|
| 184 |
decision_tree_sklearn, X_train, y_train,
|
| 185 |
X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
|
| 186 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 187 |
results["Decision Tree - SKLearn"] = {"Algorithm": "Decision Tree - SKLearn", "MAE": mae_dts, "MSE": mse_dts,
|
| 188 |
-
"RMSE": rmse_dts, "R2": r2_dts, "Explained Variance": variance_dts
|
|
|
|
| 189 |
|
| 190 |
# Random Forest - Custom
|
| 191 |
-
prediction_rf, mae_rf, mse_rf, rmse_rf, r2_rf, variance_rf = evaluate_algorithm(random_forest, X_train, y_train, X_test,
|
| 192 |
y_test, {"max_depth": rf_max_depth,
|
| 193 |
"n_estimators": rf_n_estimators,
|
| 194 |
-
"min_samples_split": dt_min_samples_split
|
| 195 |
-
"min_samples_leaf": dt_min_samples_leaf})
|
| 196 |
|
| 197 |
results["Random Forest - Custom"] = {"Algorithm": "Random Forest - Custom", "MAE": mae_rf, "MSE": mse_rf,
|
| 198 |
-
"RMSE": rmse_rf, "R2": r2_rf, "Explained Variance": variance_rf
|
|
|
|
| 199 |
|
| 200 |
# Random Forest - SKLearn
|
| 201 |
-
prediction_rfs, mae_rfs, mse_rfs, rmse_rfs, r2_rfs, variance_rfs = evaluate_algorithm(random_forest_sklearn,
|
| 202 |
X_train, y_train, X_test,
|
| 203 |
y_test, {})
|
| 204 |
results["Random Forest - SKLearn"] = {"Algorithm": "Random Forest - SKLearn", "MAE": mae_rfs, "MSE": mse_rfs,
|
| 205 |
-
"RMSE": rmse_rfs, "R2": r2_rfs, "Explained Variance": variance_rfs
|
|
|
|
| 206 |
|
| 207 |
# Random Forest - Custom using SKLearn Decision Trees
|
| 208 |
-
prediction_rfsdt, mae_rfsdt, mse_rfsdt, rmse_rfsdt, r2_rfsdt, variance_rfsdt = evaluate_algorithm(
|
| 209 |
random_forest_sklearn_decision_trees, X_train, y_train, X_test, y_test,
|
| 210 |
{"max_depth": rf_max_depth, "n_estimators": rf_n_estimators, "min_samples_split": dt_min_samples_split,
|
| 211 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 212 |
|
| 213 |
results["Random Forest - Custom using SKLearn DT"] = {"Algorithm": "Random Forest - Custom using SKLearn DT",
|
| 214 |
"MAE": mae_rfsdt, "MSE": mse_rfsdt, "RMSE": rmse_rfsdt,
|
| 215 |
-
"R2": r2_rfsdt, "Explained Variance": variance_rfsdt
|
|
|
|
| 216 |
|
| 217 |
# Linear Regression - Custom
|
| 218 |
-
prediction_lr, mae_lr, mse_lr, rmse_lr, r2_lr, variance_lr = evaluate_algorithm(linear_regression, X_train, y_train,
|
| 219 |
X_test, y_test,
|
| 220 |
{"learning_rate": lr_learning_rate,
|
| 221 |
"num_iterations": lr_num_iterations})
|
| 222 |
results["Linear Regression - Custom"] = {"Algorithm": "Linear Regression - Custom", "MAE": mae_lr, "MSE": mse_lr,
|
| 223 |
-
"RMSE": rmse_lr, "R2": r2_lr, "Explained Variance": variance_lr
|
|
|
|
| 224 |
|
| 225 |
# Linear Regression - SKLearn
|
| 226 |
-
prediction_lrs, mae_lrs, mse_lrs, rmse_lrs, r2_lrs, variance_lrs = evaluate_algorithm(linear_regression_sklearn,
|
| 227 |
X_train, y_train, X_test,
|
| 228 |
y_test, {})
|
| 229 |
results["Linear Regression - SKLearn"] = {"Algorithm": "Linear Regression - SKLearn", "MAE": mae_lrs,
|
| 230 |
"MSE": mse_lrs, "RMSE": rmse_lrs, "R2": r2_lrs,
|
| 231 |
-
"Explained Variance": variance_lrs
|
|
|
|
| 232 |
|
| 233 |
df_results = pd.DataFrame(results).T # Convert results to DataFrame
|
| 234 |
|
|
@@ -348,29 +358,29 @@ def filter_data(records, algorithm, selected_district, selected_year, dt_max_dep
|
|
| 348 |
|
| 349 |
# Evaluate algorithm
|
| 350 |
if algorithm == "Decision Tree - Custom":
|
| 351 |
-
prediction_dt, mae, mse, rmse, r2, variance = evaluate_algorithm(
|
| 352 |
decision_tree, X_train, y_train, X_test, y_test,
|
| 353 |
{"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
|
| 354 |
all_predictions["Decision Tree - Custom"] = prediction_dt
|
| 355 |
elif algorithm == "Decision Tree - SKLearn":
|
| 356 |
-
prediction_dts, mae, mse, rmse, r2, variance = evaluate_algorithm(
|
| 357 |
decision_tree_sklearn, X_train, y_train,
|
| 358 |
X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
|
| 359 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 360 |
all_predictions["Decision Tree - SKLearn"] = prediction_dts
|
| 361 |
elif algorithm == "Random Forest - Custom":
|
| 362 |
-
prediction_rf, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest, X_train, y_train, X_test,
|
| 363 |
y_test, {"max_depth": rf_max_depth,
|
| 364 |
"n_estimators": rf_n_estimators,
|
| 365 |
"min_samples_split": dt_min_samples_split,
|
| 366 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 367 |
all_predictions["Random Forest - Custom"] = prediction_rf
|
| 368 |
elif algorithm == "Random Forest - SKLearn":
|
| 369 |
-
prediction_rfs, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest_sklearn, X_train, y_train,
|
| 370 |
X_test, y_test, {})
|
| 371 |
all_predictions["Random Forest - SKLearn"] = prediction_rfs
|
| 372 |
elif algorithm == "Random Forest - Custom using SKLearn DT":
|
| 373 |
-
prediction_rfsdt, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest_sklearn_decision_trees,
|
| 374 |
X_train, y_train, X_test, y_test,
|
| 375 |
{"max_depth": rf_max_depth,
|
| 376 |
"n_estimators": rf_n_estimators,
|
|
@@ -378,22 +388,25 @@ def filter_data(records, algorithm, selected_district, selected_year, dt_max_dep
|
|
| 378 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 379 |
all_predictions["Random Forest - Custom using SKLearn DT"] = prediction_rfsdt
|
| 380 |
elif algorithm == "Linear Regression - Custom":
|
| 381 |
-
prediction_lr, mae, mse, rmse, r2, variance = evaluate_algorithm(linear_regression, X_train, y_train,
|
| 382 |
X_test, y_test,
|
| 383 |
{"learning_rate": lr_learning_rate,
|
| 384 |
"num_iterations": lr_num_iterations})
|
| 385 |
all_predictions["Linear Regression - Custom"] = prediction_lr
|
| 386 |
elif algorithm == "Linear Regression - SKLearn":
|
| 387 |
-
prediction_lrs, mae, mse, rmse, r2, variance = evaluate_algorithm(linear_regression_sklearn, X_train,
|
| 388 |
y_train, X_test, y_test,
|
| 389 |
{"learning_rate": lr_learning_rate,
|
| 390 |
"num_iterations": lr_num_iterations})
|
| 391 |
all_predictions["Linear Regression - SKLearn"] = prediction_lrs
|
| 392 |
# In case of error
|
| 393 |
else:
|
| 394 |
-
mae, mse, rmse, r2, variance = None, None, None, None, None
|
| 395 |
|
| 396 |
-
results = [{
|
|
|
|
|
|
|
|
|
|
| 397 |
df_results = pd.DataFrame(results) # Convert results to DataFrame
|
| 398 |
|
| 399 |
all_predictions = pd.DataFrame(all_predictions)
|
|
|
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
|
| 5 |
from sklearn.model_selection import train_test_split
|
| 6 |
+
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score, median_absolute_error, max_error
|
| 7 |
+
import time
|
| 8 |
|
| 9 |
from RandomForestRegressor import RandomForestRegressor
|
| 10 |
from LinearRegression import LinearRegression
|
|
|
|
| 93 |
# Decision Tree - Custom
|
| 94 |
def decision_tree(X_train, y_train, X_test, max_depth, min_samples_split):
|
| 95 |
Custom_Decision_Tree_Regressor = DecisionTreeRegressor(max_depth=max_depth,
|
| 96 |
+
min_samples_split=min_samples_split)
|
|
|
|
| 97 |
Custom_Decision_Tree_Regressor.fit(X_train.values, y_train.values)
|
| 98 |
Custom_Decision_Tree_Regressor_Prediction = Custom_Decision_Tree_Regressor.predict(X_test.values)
|
| 99 |
return Custom_Decision_Tree_Regressor_Prediction
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
# Random Forest - Custom
|
| 113 |
+
def random_forest(X_train, y_train, X_test, n_estimators, max_depth, min_samples_split):
|
| 114 |
Custom_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,
|
| 115 |
+
min_samples_split=min_samples_split)
|
|
|
|
| 116 |
Custom_Random_Forest_Regressor.fit(X_train, y_train)
|
| 117 |
Custom_Random_Forest_Regressor_Prediction = Custom_Random_Forest_Regressor.predict(X_test)
|
| 118 |
return Custom_Random_Forest_Regressor_Prediction
|
|
|
|
| 131 |
SKLearn_Decision_Trees_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators,
|
| 132 |
max_depth=max_depth,
|
| 133 |
min_samples_split=min_samples_split,
|
|
|
|
| 134 |
custom=False)
|
| 135 |
SKLearn_Decision_Trees_Random_Forest_Regressor.fit(X_train, y_train)
|
| 136 |
SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction = SKLearn_Decision_Trees_Random_Forest_Regressor.predict(
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
def evaluate_algorithm(algorithm_function, X_train, y_train, X_test, y_test, algorithm_parameters):
|
| 158 |
+
start_time = time.time()
|
| 159 |
prediction = algorithm_function(X_train, y_train, X_test, **algorithm_parameters)
|
| 160 |
+
end_time = time.time()
|
| 161 |
mae = mean_absolute_error(y_test, prediction)
|
| 162 |
mse = mean_squared_error(y_test, prediction)
|
| 163 |
rmse = mean_squared_error(y_test, prediction, squared=True)
|
| 164 |
r2 = r2_score(y_test, prediction)
|
| 165 |
variance = explained_variance_score(y_test, prediction)
|
| 166 |
+
medae = median_absolute_error(y_test, prediction)
|
| 167 |
+
maxae = max_error(y_test, prediction)
|
| 168 |
+
execution_time = end_time - start_time
|
| 169 |
+
|
| 170 |
prediction_results = pd.DataFrame(prediction)
|
| 171 |
+
return prediction_results, mae, mse, rmse, r2, variance, medae, maxae, execution_time
|
| 172 |
|
| 173 |
|
| 174 |
# Used both for the "All" button and for the filtered data using all algorithms
|
|
|
|
| 176 |
lr_learning_rate, lr_num_iterations):
|
| 177 |
results = {}
|
| 178 |
# Decision Tree - Custom
|
| 179 |
+
prediction_dt, mae_dt, mse_dt, rmse_dt, r2_dt, variance_dt, medae_dt, max_ae_dt, time_dt = evaluate_algorithm(
|
| 180 |
decision_tree, X_train, y_train, X_test, y_test,
|
| 181 |
{"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
|
| 182 |
|
| 183 |
results["Decision Tree - Custom"] = {"Algorithm": "Decision Tree - Custom", "MAE": mae_dt, "MSE": mse_dt,
|
| 184 |
+
"RMSE": rmse_dt, "R2": r2_dt, "Explained Variance": variance_dt,
|
| 185 |
+
"MedAE": medae_dt, "Max AE": max_ae_dt, "Execution Time": time_dt}
|
| 186 |
|
| 187 |
# Decision Tree - SKLearn
|
| 188 |
+
prediction_dts, mae_dts, mse_dts, rmse_dts, r2_dts, variance_dts, medae_dts, max_ae_dts, time_dts = evaluate_algorithm(
|
| 189 |
decision_tree_sklearn, X_train, y_train,
|
| 190 |
X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
|
| 191 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 192 |
results["Decision Tree - SKLearn"] = {"Algorithm": "Decision Tree - SKLearn", "MAE": mae_dts, "MSE": mse_dts,
|
| 193 |
+
"RMSE": rmse_dts, "R2": r2_dts, "Explained Variance": variance_dts,
|
| 194 |
+
"MedAE": medae_dts, "Max AE": max_ae_dts, "Execution Time": time_dts}
|
| 195 |
|
| 196 |
# Random Forest - Custom
|
| 197 |
+
prediction_rf, mae_rf, mse_rf, rmse_rf, r2_rf, variance_rf, medae_rf, max_ae_rf, time_rf = evaluate_algorithm(random_forest, X_train, y_train, X_test,
|
| 198 |
y_test, {"max_depth": rf_max_depth,
|
| 199 |
"n_estimators": rf_n_estimators,
|
| 200 |
+
"min_samples_split": dt_min_samples_split})
|
|
|
|
| 201 |
|
| 202 |
results["Random Forest - Custom"] = {"Algorithm": "Random Forest - Custom", "MAE": mae_rf, "MSE": mse_rf,
|
| 203 |
+
"RMSE": rmse_rf, "R2": r2_rf, "Explained Variance": variance_rf,
|
| 204 |
+
"MedAE": medae_rf, "Max AE": max_ae_rf, "Execution Time": time_rf}
|
| 205 |
|
| 206 |
# Random Forest - SKLearn
|
| 207 |
+
prediction_rfs, mae_rfs, mse_rfs, rmse_rfs, r2_rfs, variance_rfs, medae_rfs, max_ae_rfs, time_rfs = evaluate_algorithm(random_forest_sklearn,
|
| 208 |
X_train, y_train, X_test,
|
| 209 |
y_test, {})
|
| 210 |
results["Random Forest - SKLearn"] = {"Algorithm": "Random Forest - SKLearn", "MAE": mae_rfs, "MSE": mse_rfs,
|
| 211 |
+
"RMSE": rmse_rfs, "R2": r2_rfs, "Explained Variance": variance_rfs,
|
| 212 |
+
"MedAE": medae_rfs, "Max AE": max_ae_rfs, "Execution Time": time_rfs}
|
| 213 |
|
| 214 |
# Random Forest - Custom using SKLearn Decision Trees
|
| 215 |
+
prediction_rfsdt, mae_rfsdt, mse_rfsdt, rmse_rfsdt, r2_rfsdt, variance_rfsdt, medae_rfsdt, max_ae_rfsdt, time_rfsdt = evaluate_algorithm(
|
| 216 |
random_forest_sklearn_decision_trees, X_train, y_train, X_test, y_test,
|
| 217 |
{"max_depth": rf_max_depth, "n_estimators": rf_n_estimators, "min_samples_split": dt_min_samples_split,
|
| 218 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 219 |
|
| 220 |
results["Random Forest - Custom using SKLearn DT"] = {"Algorithm": "Random Forest - Custom using SKLearn DT",
|
| 221 |
"MAE": mae_rfsdt, "MSE": mse_rfsdt, "RMSE": rmse_rfsdt,
|
| 222 |
+
"R2": r2_rfsdt, "Explained Variance": variance_rfsdt,
|
| 223 |
+
"MedAE": medae_rfsdt, "Max AE": max_ae_rfsdt, "Execution Time": time_rfsdt}
|
| 224 |
|
| 225 |
# Linear Regression - Custom
|
| 226 |
+
prediction_lr, mae_lr, mse_lr, rmse_lr, r2_lr, variance_lr, medae_lr, max_ae_lr, time_lr = evaluate_algorithm(linear_regression, X_train, y_train,
|
| 227 |
X_test, y_test,
|
| 228 |
{"learning_rate": lr_learning_rate,
|
| 229 |
"num_iterations": lr_num_iterations})
|
| 230 |
results["Linear Regression - Custom"] = {"Algorithm": "Linear Regression - Custom", "MAE": mae_lr, "MSE": mse_lr,
|
| 231 |
+
"RMSE": rmse_lr, "R2": r2_lr, "Explained Variance": variance_lr,
|
| 232 |
+
"MedAE": medae_lr, "Max AE": max_ae_lr, "Execution Time": time_lr}
|
| 233 |
|
| 234 |
# Linear Regression - SKLearn
|
| 235 |
+
prediction_lrs, mae_lrs, mse_lrs, rmse_lrs, r2_lrs, variance_lrs, medae_lrs, max_ae_lrs, time_lrs = evaluate_algorithm(linear_regression_sklearn,
|
| 236 |
X_train, y_train, X_test,
|
| 237 |
y_test, {})
|
| 238 |
results["Linear Regression - SKLearn"] = {"Algorithm": "Linear Regression - SKLearn", "MAE": mae_lrs,
|
| 239 |
"MSE": mse_lrs, "RMSE": rmse_lrs, "R2": r2_lrs,
|
| 240 |
+
"Explained Variance": variance_lrs, "MedAE": medae_lrs,
|
| 241 |
+
"Max AE": max_ae_lrs, "Execution Time": time_lrs}
|
| 242 |
|
| 243 |
df_results = pd.DataFrame(results).T # Convert results to DataFrame
|
| 244 |
|
|
|
|
| 358 |
|
| 359 |
# Evaluate algorithm
|
| 360 |
if algorithm == "Decision Tree - Custom":
|
| 361 |
+
prediction_dt, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(
|
| 362 |
decision_tree, X_train, y_train, X_test, y_test,
|
| 363 |
{"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
|
| 364 |
all_predictions["Decision Tree - Custom"] = prediction_dt
|
| 365 |
elif algorithm == "Decision Tree - SKLearn":
|
| 366 |
+
prediction_dts, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(
|
| 367 |
decision_tree_sklearn, X_train, y_train,
|
| 368 |
X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
|
| 369 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 370 |
all_predictions["Decision Tree - SKLearn"] = prediction_dts
|
| 371 |
elif algorithm == "Random Forest - Custom":
|
| 372 |
+
prediction_rf, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(random_forest, X_train, y_train, X_test,
|
| 373 |
y_test, {"max_depth": rf_max_depth,
|
| 374 |
"n_estimators": rf_n_estimators,
|
| 375 |
"min_samples_split": dt_min_samples_split,
|
| 376 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 377 |
all_predictions["Random Forest - Custom"] = prediction_rf
|
| 378 |
elif algorithm == "Random Forest - SKLearn":
|
| 379 |
+
prediction_rfs, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(random_forest_sklearn, X_train, y_train,
|
| 380 |
X_test, y_test, {})
|
| 381 |
all_predictions["Random Forest - SKLearn"] = prediction_rfs
|
| 382 |
elif algorithm == "Random Forest - Custom using SKLearn DT":
|
| 383 |
+
prediction_rfsdt, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(random_forest_sklearn_decision_trees,
|
| 384 |
X_train, y_train, X_test, y_test,
|
| 385 |
{"max_depth": rf_max_depth,
|
| 386 |
"n_estimators": rf_n_estimators,
|
|
|
|
| 388 |
"min_samples_leaf": dt_min_samples_leaf})
|
| 389 |
all_predictions["Random Forest - Custom using SKLearn DT"] = prediction_rfsdt
|
| 390 |
elif algorithm == "Linear Regression - Custom":
|
| 391 |
+
prediction_lr, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(linear_regression, X_train, y_train,
|
| 392 |
X_test, y_test,
|
| 393 |
{"learning_rate": lr_learning_rate,
|
| 394 |
"num_iterations": lr_num_iterations})
|
| 395 |
all_predictions["Linear Regression - Custom"] = prediction_lr
|
| 396 |
elif algorithm == "Linear Regression - SKLearn":
|
| 397 |
+
prediction_lrs, mae, mse, rmse, r2, variance, medae, max_ae, time = evaluate_algorithm(linear_regression_sklearn, X_train,
|
| 398 |
y_train, X_test, y_test,
|
| 399 |
{"learning_rate": lr_learning_rate,
|
| 400 |
"num_iterations": lr_num_iterations})
|
| 401 |
all_predictions["Linear Regression - SKLearn"] = prediction_lrs
|
| 402 |
# In case of error
|
| 403 |
else:
|
| 404 |
+
mae, mse, rmse, r2, variance, medae, max_ae, time = None, None, None, None, None, None, None, None
|
| 405 |
|
| 406 |
+
results = [{
|
| 407 |
+
"Algorithm": algorithm, "MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2, "Explained Variance": variance,
|
| 408 |
+
"MedAE": medae, "Max AE": max_ae, "Execution Time": time
|
| 409 |
+
}]
|
| 410 |
df_results = pd.DataFrame(results) # Convert results to DataFrame
|
| 411 |
|
| 412 |
all_predictions = pd.DataFrame(all_predictions)
|