NathanPortelli commited on
Commit
94b723c
·
1 Parent(s): 0388c9b

Initial commit

Browse files
DecisionTreeRegressor.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ class Node():
5
+ def __init__(self, feature_index=None, threshold=None, left=None, right=None, var_red=None, value=None):
6
+ self.feature_index = feature_index
7
+ self.threshold = threshold
8
+ self.left = left
9
+ self.right = right
10
+ self.var_red = var_red
11
+ self.value = value
12
+
13
+
14
+ class DecisionTreeRegressor():
15
+ def __init__(self, min_samples_split, max_depth, min_samples_leaf):
16
+ self.root = None
17
+ self.min_samples_split = min_samples_split
18
+ self.max_depth = max_depth
19
+
20
+ def build_tree(self, dataset, curr_depth=0):
21
+ X, Y = dataset[:, :-1], dataset[:, -1]
22
+ num_samples, num_features = np.shape(X)
23
+
24
+ best_split = {}
25
+
26
+ if num_samples >= self.min_samples_split and curr_depth <= self.max_depth:
27
+ best_split = self.get_best_split(dataset, num_samples, num_features)
28
+
29
+ if "var_red" in best_split and best_split["var_red"] > 0:
30
+ left_subtree = self.build_tree(best_split["dataset_left"], curr_depth + 1)
31
+ right_subtree = self.build_tree(best_split["dataset_right"], curr_depth + 1)
32
+
33
+ return Node(best_split["feature_index"], best_split["threshold"], left_subtree, right_subtree,
34
+ best_split["var_red"])
35
+
36
+ leaf_value = self.calculate_leaf_value(Y)
37
+ return Node(value=leaf_value)
38
+
39
+ def variance_reduction(self, parent, l_child, r_child):
40
+ weight_l = len(l_child) / len(parent)
41
+ weight_r = len(r_child) / len(parent)
42
+
43
+ reduction = np.var(parent) - (weight_l * np.var(l_child) + weight_r * np.var(r_child))
44
+ return reduction
45
+
46
+ def calculate_leaf_value(self, Y):
47
+ val = np.mean(Y)
48
+ return val
49
+
50
+ def split(self, dataset, feature_index, threshold):
51
+ dataset_left = np.array([row for row in dataset if row[feature_index] <= threshold])
52
+ dataset_right = np.array([row for row in dataset if row[feature_index] > threshold])
53
+ return dataset_left, dataset_right
54
+
55
+ def get_best_split(self, dataset, num_samples, num_features):
56
+ best_split = {}
57
+ max_var_red = -float("inf")
58
+
59
+ for feature_index in range(num_features):
60
+ feature_values = dataset[:, feature_index]
61
+ possible_thresholds = np.unique(feature_values)
62
+
63
+ for threshold in possible_thresholds:
64
+ dataset_left, dataset_right = self.split(dataset, feature_index, threshold)
65
+
66
+ if len(dataset_left) > 0 and len(dataset_right) > 0:
67
+ y, left_y, right_y = dataset[:, -1], dataset_left[:, -1], dataset_right[:, -1]
68
+ curr_var_red = self.variance_reduction(y, left_y, right_y)
69
+
70
+ if curr_var_red > max_var_red:
71
+ best_split["feature_index"] = feature_index
72
+ best_split["threshold"] = threshold
73
+ best_split["dataset_left"] = dataset_left
74
+ best_split["dataset_right"] = dataset_right
75
+ best_split["var_red"] = curr_var_red
76
+ max_var_red = curr_var_red
77
+
78
+ return best_split
79
+
80
+ def print_tree(self, tree=None, indent=" "):
81
+ if not tree:
82
+ tree = self.root
83
+
84
+ if tree.value is not None:
85
+ print(tree.value)
86
+
87
+ else:
88
+ print("X_" + str(tree.feature_index), "<=", tree.threshold, "?", tree.var_red)
89
+ print("%sleft:" % (indent), end="")
90
+ self.print_tree(tree.left, indent + indent)
91
+ print("%sright:" % (indent), end="")
92
+ self.print_tree(tree.right, indent + indent)
93
+
94
+ def fit(self, X, Y, min_samples_split=None, max_depth=None):
95
+ if min_samples_split is not None:
96
+ self.min_samples_split = min_samples_split
97
+ if max_depth is not None:
98
+ self.max_depth = max_depth
99
+
100
+ dataset = np.column_stack((X, Y))
101
+ self.root = self.build_tree(dataset)
102
+
103
+ def make_prediction(self, x, tree):
104
+ if tree.value != None: return tree.value
105
+
106
+ feature_val = x[tree.feature_index]
107
+
108
+ if feature_val <= tree.threshold:
109
+ return self.make_prediction(x, tree.left)
110
+ else:
111
+ return self.make_prediction(x, tree.right)
112
+
113
+ def predict(self, X):
114
+ predictions = [self.make_prediction(x, self.root) for x in X]
115
+ return predictions
LinearRegression.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ class LinearRegression:
5
+ def __init__(self, learning_rate, num_iterations):
6
+ self.learning_rate = learning_rate
7
+ self.num_iterations = num_iterations
8
+ self.weights = None
9
+ self.bias = None
10
+
11
+ def fit(self, X, y):
12
+ num_samples, num_features = X.shape
13
+
14
+ # Initialization
15
+ self.weights = np.zeros(num_features)
16
+ self.bias = 0
17
+
18
+ # Gradient Descent
19
+ for _ in range(self.num_iterations):
20
+ predictions = np.dot(X, self.weights) + self.bias
21
+
22
+ dw = (1 / num_samples) * np.dot(X.T, (predictions - y))
23
+ db = (1 / num_samples) * np.sum(predictions - y)
24
+
25
+ # Update
26
+ self.weights = self.weights - (self.learning_rate * dw)
27
+ self.bias = self.bias - (self.learning_rate * db)
28
+
29
+ def predict(self, X):
30
+ predictions = np.dot(X, self.weights) + self.bias
31
+ return predictions
NSO_Population_Sex_dataset/NSO_DF_TOT_POP_BY_REG_DIST_LOC_1.5.csv ADDED
The diff for this file is too large to render. See raw diff
 
NSO_Population_Sex_dataset/NSO_DIS_SEX_YEAR_POP.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
NSO_Population_Sex_dataset/NSO_POPULATION_DATA_CLEANED.csv ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ District,Sex,Year,Population,Population_Growth_Rate,Average_Population
2
+ 1,0,0.0,0.3372772323766804,0.0,0.33836672889163455
3
+ 1,0,0.06666666666666667,0.3342958058581002,-0.008839691008998951,0.33836672889163455
4
+ 1,0,0.13333333333333333,0.332684223956165,-0.004820825968182452,0.33836672889163455
5
+ 1,0,0.2,0.3328319522971757,0.000444049733570262,0.33836672889163455
6
+ 1,0,0.26666666666666666,0.3322007493855844,-0.001896461283944606,0.33836672889163455
7
+ 1,0,0.3333333333333333,0.327930057345456,-0.012855756791720685,0.33836672889163455
8
+ 1,0,0.4,0.32661393212554224,-0.004013432713571907,0.33836672889163455
9
+ 1,0,0.4666666666666667,0.327016827601026,0.0012335526315787604,0.33836672889163455
10
+ 1,0,0.5333333333333333,0.3263856246894347,-0.0019301848049281256,0.33836672889163455
11
+ 1,0,0.6,0.33040114959509004,0.01230300785911198,0.33836672889163455
12
+ 1,0,0.6666666666666666,0.333919770080982,0.010649540687748926,0.33836672889163455
13
+ 1,0,0.7333333333333333,0.3361759847436913,0.006756756756756577,0.33836672889163455
14
+ 1,0,0.8,0.3448113777682277,0.02568712048577826,0.33836672889163455
15
+ 1,0,0.8666666666666667,0.35786519117390314,0.03785783836416745,0.33836672889163455
16
+ 1,0,0.9333333333333333,0.37900377378762035,0.05906856306526054,0.33836672889163455
17
+ 1,0,1.0,0.3783994305743946,-0.0015945572446051104,0.33836672889163455
18
+ 1,1,0.0,0.34063469467237883,-0.09980124929017598,0.33836672889163455
19
+ 1,1,0.06666666666666667,0.33621627429123974,-0.012971140198706821,0.33836672889163455
20
+ 1,1,0.13333333333333333,0.33459126254012167,-0.004833233473137688,0.33836672889163455
21
+ 1,1,0.2,0.3335705940022294,-0.00305049369832211,0.33836672889163455
22
+ 1,1,0.26666666666666666,0.3332079880742939,-0.001087044045414376,0.33836672889163455
23
+ 1,1,0.3333333333333333,0.3309786331099502,-0.006690580790778222,0.33836672889163455
24
+ 1,1,0.4,0.33013255261143415,-0.0025562994522216886,0.33836672889163455
25
+ 1,1,0.4666666666666667,0.3299042451753267,-0.0006915629322267192,0.33836672889163455
26
+ 1,1,0.5333333333333333,0.3303340003491761,0.0013026663952779849,0.33836672889163455
27
+ 1,1,0.6,0.3311666509985093,0.0025206325974711508,0.33836672889163455
28
+ 1,1,0.6666666666666666,0.33355716415304654,0.007218459791556775,0.33836672889163455
29
+ 1,1,0.7333333333333333,0.33655202052080957,0.008978540081330477,0.33836672889163455
30
+ 1,1,0.8,0.3402452290460778,0.010973663208299955,0.33836672889163455
31
+ 1,1,0.8666666666666667,0.34502625535515236,0.01405170712453141,0.33836672889163455
32
+ 1,1,0.9333333333333333,0.35257383059588243,0.021875364913783057,0.33836672889163455
33
+ 1,1,1.0,0.35123084567760304,-0.0038090884851256224,0.33836672889163455
34
+ 2,0,0.0,0.5852459676877829,0.0,0.7000187178522985
35
+ 2,0,0.06666666666666667,0.5842790185466217,-0.001652209830648621,0.7000187178522985
36
+ 2,0,0.13333333333333333,0.5855145646714387,0.0021146508527558616,0.7000187178522985
37
+ 2,0,0.2,0.5892614925934382,0.006399376118170652,0.7000187178522985
38
+ 2,0,0.26666666666666666,0.5919608922791797,0.004580987761241717,0.7000187178522985
39
+ 2,0,0.3333333333333333,0.5893017821409866,-0.004492036843776859,0.7000187178522985
40
+ 2,0,0.4,0.5907790655510938,0.0025068368277119113,0.7000187178522985
41
+ 2,0,0.4666666666666667,0.609446555915177,0.03159809047510809,0.7000187178522985
42
+ 2,0,0.5333333333333333,0.6420139401834517,0.05343763772587051,0.7000187178522985
43
+ 2,0,0.6,0.6891392809658747,0.07340236376948028,0.7000187178522985
44
+ 2,0,0.6666666666666666,0.731819341668793,0.06193241610476674,0.7000187178522985
45
+ 2,0,0.7333333333333333,0.7673547226064651,0.04855758643470609,0.7000187178522985
46
+ 2,0,0.8,0.8286485542767354,0.07987678952710975,0.7000187178522985
47
+ 2,0,0.8666666666666667,0.898456910328897,0.0842436225730121,0.7000187178522985
48
+ 2,0,0.9333333333333333,0.9849317092169055,0.09624813153961154,0.7000187178522985
49
+ 2,0,1.0,1.0,0.015298817818623078,0.7000187178522985
50
+ 2,1,0.0,0.6070963323081882,-0.39290366769181184,0.7000187178522985
51
+ 2,1,0.06666666666666667,0.6042760639798015,-0.0046455038159495254,0.7000187178522985
52
+ 2,1,0.13333333333333333,0.6057936369374572,0.002511390154461779,0.7000187178522985
53
+ 2,1,0.2,0.6081438605444461,0.0038795779019242804,0.7000187178522985
54
+ 2,1,0.26666666666666666,0.6116893407287036,0.005830002429167536,0.7000187178522985
55
+ 2,1,0.3333333333333333,0.612186245148467,0.0008123476848089783,0.7000187178522985
56
+ 2,1,0.4,0.6148319254374773,0.004321691821691731,0.7000187178522985
57
+ 2,1,0.4666666666666667,0.6312297712896684,0.026670452807933342,0.7000187178522985
58
+ 2,1,0.5333333333333333,0.6527309598313211,0.03406238032424169,0.7000187178522985
59
+ 2,1,0.6,0.6871516632868213,0.05273337036808434,0.7000187178522985
60
+ 2,1,0.6666666666666666,0.7201488027289453,0.04802016964390399,0.7000187178522985
61
+ 2,1,0.7333333333333333,0.7529176347349619,0.04550286257762526,0.7000187178522985
62
+ 2,1,0.8,0.7975987429661164,0.05934395233933265,0.7000187178522985
63
+ 2,1,0.8666666666666667,0.8420649736103464,0.0557501262838862,0.7000187178522985
64
+ 2,1,0.9333333333333333,0.8905198694618659,0.05754294189885312,0.7000187178522985
65
+ 2,1,1.0,0.8940653496461235,0.003981359996380718,0.7000187178522985
66
+ 3,0,0.0,0.19249002833698178,0.0,0.24032043620150145
67
+ 3,0,0.06666666666666667,0.19827829333476585,0.03007046675504066,0.24032043620150145
68
+ 3,0,0.13333333333333333,0.20512751641799062,0.03454348415063668,0.24032043620150145
69
+ 3,0,0.2,0.21333315426867755,0.04000261882938316,0.24032043620150145
70
+ 3,0,0.26666666666666666,0.22094787875532157,0.03569405099150136,0.24032043620150145
71
+ 3,0,0.3333333333333333,0.22567518566766495,0.02139557500607836,0.24032043620150145
72
+ 3,0,0.4,0.23263184754435207,0.030825993810997376,0.24032043620150145
73
+ 3,0,0.4666666666666667,0.24111951222787767,0.036485394296270623,0.24032043620150145
74
+ 3,0,0.5333333333333333,0.25179624232819864,0.04427982622256876,0.24032043620150145
75
+ 3,0,0.6,0.25930352802138035,0.029814923462584852,0.24032043620150145
76
+ 3,0,0.6666666666666666,0.26713313009494905,0.03019473793246319,0.24032043620150145
77
+ 3,0,0.7333333333333333,0.27388834423389424,0.025287818611432256,0.24032043620150145
78
+ 3,0,0.8,0.28554545332455916,0.04256153770716886,0.24032043620150145
79
+ 3,0,0.8666666666666667,0.3025073528424276,0.059401749600225706,0.24032043620150145
80
+ 3,0,0.9333333333333333,0.3240622607808114,0.07125416204217538,0.24032043620150145
81
+ 3,0,1.0,0.32553954419091874,0.0045586406962287995,0.24032043620150145
82
+ 3,1,0.0,0.19430305797665892,-0.4031353135313531,0.24032043620150145
83
+ 3,1,0.06666666666666667,0.19680100992465854,0.012855957976223298,0.24032043620150145
84
+ 3,1,0.13333333333333333,0.20132686909926,0.022997133888358112,0.24032043620150145
85
+ 3,1,0.2,0.20630934314607646,0.02474818224267894,0.24032043620150145
86
+ 3,1,0.26666666666666666,0.21185587085857027,0.026884520244759846,0.24032043620150145
87
+ 3,1,0.3333333333333333,0.21591168531177396,0.019144215530903397,0.24032043620150145
88
+ 3,1,0.4,0.22098816830286996,0.023511849225601678,0.24032043620150145
89
+ 3,1,0.4666666666666667,0.22595721250050363,0.022485566697052484,0.24032043620150145
90
+ 3,1,0.5333333333333333,0.23034877318327715,0.01943536404160473,0.24032043620150145
91
+ 3,1,0.6,0.23553269496783552,0.02250466417910446,0.24032043620150145
92
+ 3,1,0.6666666666666666,0.24145525845744753,0.025145398563120036,0.24032043620150145
93
+ 3,1,0.7333333333333333,0.24470528195968358,0.013460147950386414,0.24032043620150145
94
+ 3,1,0.8,0.2505472663541988,0.023873552494374728,0.24032043620150145
95
+ 3,1,0.8666666666666667,0.2589006325458965,0.033340480274442585,0.24032043620150145
96
+ 3,1,0.9333333333333333,0.26707941069821783,0.031590413943354934,0.24032043620150145
97
+ 3,1,1.0,0.26885215079034663,0.006637501885654151,0.24032043620150145
98
+ 4,0,0.0,0.17321819475967284,0.0,0.192231503740213
99
+ 4,0,0.06666666666666667,0.17387625736962975,0.003799038610637462,0.192231503740213
100
+ 4,0,0.13333333333333333,0.17555498851747894,0.0096547462732679,0.192231503740213
101
+ 4,0,0.2,0.17844240609177958,0.016447368421052655,0.192231503740213
102
+ 4,0,0.26666666666666666,0.18081948939713408,0.013321291487920606,0.192231503740213
103
+ 4,0,0.3333333333333333,0.1805777521118438,-0.0013368983957219305,0.192231503740213
104
+ 4,0,0.4,0.1822833429580586,0.00944518816004769,0.192231503740213
105
+ 4,0,0.4666666666666667,0.18451269792240232,0.012230162823251955,0.192231503740213
106
+ 4,0,0.5333333333333333,0.1869569304736708,0.01324696120532809,0.192231503740213
107
+ 4,0,0.6,0.18918628543801452,0.011924430716184098,0.192231503740213
108
+ 4,0,0.6666666666666666,0.19271833577308928,0.018669695463902958,0.192231503740213
109
+ 4,0,0.7333333333333333,0.19658613233773384,0.020069686411149812,0.192231503740213
110
+ 4,0,0.8,0.20066880648930313,0.020767864462358343,0.192231503740213
111
+ 4,0,0.8666666666666667,0.2102845785041834,0.04791861865881408,0.192231503740213
112
+ 4,0,0.9333333333333333,0.2200615087092572,0.04649380508366341,0.192231503740213
113
+ 4,0,1.0,0.22241173231624609,0.010679848651287571,0.192231503740213
114
+ 4,1,0.0,0.182256483259693,-0.18054465310065815,0.192231503740213
115
+ 4,1,0.06666666666666667,0.18132982366608022,-0.005084371085402717,0.192231503740213
116
+ 4,1,0.13333333333333333,0.18244450114825211,0.006147237446304299,0.192231503740213
117
+ 4,1,0.2,0.1839889338042734,0.00846521899153485,0.192231503740213
118
+ 4,1,0.26666666666666666,0.18601684103087524,0.011021897810219006,0.192231503740213
119
+ 4,1,0.3333333333333333,0.1866883334900149,0.0036098476644284005,0.192231503740213
120
+ 4,1,0.4,0.18825962584440176,0.00841666067189406,0.192231503740213
121
+ 4,1,0.4666666666666667,0.19087844643504653,0.013910686260522276,0.192231503740213
122
+ 4,1,0.5333333333333333,0.1926243268288097,0.009146555969886672,0.192231503740213
123
+ 4,1,0.6,0.19471938330132552,0.010876385693369572,0.192231503740213
124
+ 4,1,0.6666666666666666,0.19823800378721748,0.018070211738740793,0.192231503740213
125
+ 4,1,0.7333333333333333,0.20038677965646445,0.010839374026149873,0.192231503740213
126
+ 4,1,0.8,0.20383825089644245,0.017224046645667235,0.192231503740213
127
+ 4,1,0.8666666666666667,0.20761203851680746,0.018513638160495338,0.192231503740213
128
+ 4,1,0.9333333333333333,0.21185587085857027,0.020441166957759327,0.192231503740213
129
+ 4,1,1.0,0.21211103799304334,0.0012044374009507752,0.192231503740213
130
+ 5,1,0.0,0.17998683874780086,0.0,0.25831349632693623
131
+ 5,1,0.06666666666666667,0.18556022615866025,0.03096552753320414,0.25831349632693623
132
+ 5,1,0.13333333333333333,0.19316152079612145,0.040964029818339576,0.25831349632693623
133
+ 5,1,0.2,0.2011925706074321,0.04157686157269014,0.25831349632693623
134
+ 5,1,0.26666666666666666,0.2098145337827856,0.04285428209064812,0.25831349632693623
135
+ 5,1,0.3333333333333333,0.21694578369884907,0.03398835050886517,0.25831349632693623
136
+ 5,1,0.4,0.22505741260525644,0.03739012009409426,0.25831349632693623
137
+ 5,1,0.4666666666666667,0.2319334953868468,0.030552571905955395,0.25831349632693623
138
+ 5,1,0.5333333333333333,0.240904634640953,0.03867979154603374,0.25831349632693623
139
+ 5,1,0.6,0.2546030808074025,0.05686252648009793,0.25831349632693623
140
+ 5,1,0.6666666666666666,0.27343172936167925,0.07395294862327262,0.25831349632693623
141
+ 5,1,0.7333333333333333,0.29264984354225704,0.0702848722986249,0.25831349632693623
142
+ 5,1,0.8,0.3186500315601456,0.08884401817263998,0.25831349632693623
143
+ 5,1,0.8666666666666667,0.3428237600891742,0.07586294095334423,0.25831349632693623
144
+ 5,1,0.9333333333333333,0.36506359033588054,0.06487248795393108,0.25831349632693623
145
+ 5,1,1.0,0.3640160620996226,-0.0028694404591105283,0.25831349632693623
146
+ 5,0,0.0,0.17726057936369374,-0.5130418741929533,0.25831349632693623
147
+ 5,0,0.06666666666666667,0.1837337666698003,0.036517918024092655,0.25831349632693623
148
+ 5,0,0.13333333333333333,0.19122762251379918,0.040786492215481474,0.25831349632693623
149
+ 5,0,0.2,0.19999731403016344,0.0458599620759883,0.25831349632693623
150
+ 5,0,0.26666666666666666,0.20827010112676433,0.04136449100188022,0.25831349632693623
151
+ 5,0,0.3333333333333333,0.21374947959334417,0.02630900180551987,0.25831349632693623
152
+ 5,0,0.4,0.22137763392917098,0.03568735863282235,0.25831349632693623
153
+ 5,0,0.4666666666666667,0.22735391681551417,0.026995874787672847,0.25831349632693623
154
+ 5,0,0.5333333333333333,0.2376680409878997,0.045365940102782165,0.25831349632693623
155
+ 5,0,0.6,0.25472394945004767,0.07176357574730186,0.25831349632693623
156
+ 5,0,0.6666666666666666,0.27494930231933495,0.07940106500764488,0.25831349632693623
157
+ 5,0,0.7333333333333333,0.29509407609352545,0.07326723001025726,0.25831349632693623
158
+ 5,0,0.8,0.3256872725319295,0.10367268920948436,0.25831349632693623
159
+ 5,0,0.8666666666666667,0.3615986892467198,0.11026349428889537,0.25831349632693623
160
+ 5,0,0.9333333333333333,0.40008863700460645,0.10644382544103981,0.25831349632693623
161
+ 5,0,1.0,0.3974563865647789,-0.006579168205162689,0.25831349632693623
162
+ 6,1,0.0,0.005841984394515249,0.0,0.009613673600945461
163
+ 6,1,0.06666666666666667,0.00507648299109601,-0.13103448275862062,0.009613673600945461
164
+ 6,1,0.13333333333333333,0.005385369522300265,0.060846560846560926,0.009613673600945461
165
+ 6,1,0.2,0.005895703791246425,0.09476309226932678,0.009613673600945461
166
+ 6,1,0.26666666666666666,0.006728354440579632,0.1412300683371297,0.009613673600945461
167
+ 6,1,0.3333333333333333,0.006808933535676394,0.011976047904191711,0.009613673600945461
168
+ 6,1,0.4,0.00726554840789138,0.06706114398422103,0.009613673600945461
169
+ 6,1,0.4666666666666667,0.007184969312794618,-0.011090573012939031,0.009613673600945461
170
+ 6,1,0.5333333333333333,0.007278978257074173,0.013084112149532645,0.009613673600945461
171
+ 6,1,0.6,0.008957709404923382,0.23062730627306283,0.009613673600945461
172
+ 6,1,0.6666666666666666,0.009978377942815702,0.11394302848575721,0.009613673600945461
173
+ 6,1,0.7333333333333333,0.011576529995568149,0.16016150740242252,0.009613673600945461
174
+ 6,1,0.8,0.014772834101073044,0.2761020881670533,0.009613673600945461
175
+ 6,1,0.8666666666666667,0.01814372624595426,0.22818181818181826,0.009613673600945461
176
+ 6,1,0.9333333333333333,0.022548716777910583,0.24278312361213894,0.009613673600945461
177
+ 6,1,1.0,0.024187158378211414,0.07266229898749255,0.009613673600945461
178
+ 6,0,0.0,0.0,-1.0,0.009613673600945461
179
+ 6,0,0.06666666666666667,0.00017458803937631782,3.846153846153846,0.009613673600945461
180
+ 6,0,0.13333333333333333,0.0008460804985160016,3.846153846153846,0.009613673600945461
181
+ 6,0,0.2,0.002242784813526544,1.6507936507936511,0.009613673600945461
182
+ 6,0,0.26666666666666666,0.0033440324465156255,0.49101796407185616,0.009613673600945461
183
+ 6,0,0.3333333333333333,0.0030217160661285773,-0.09638554216867468,0.009613673600945461
184
+ 6,0,0.4,0.0038946562630101664,0.288888888888889,0.009613673600945461
185
+ 6,0,0.4666666666666667,0.004230402492580008,0.0862068965517242,0.009613673600945461
186
+ 6,0,0.5333333333333333,0.0045930084205154376,0.08571428571428563,0.009613673600945461
187
+ 6,0,0.6,0.006634345496300076,0.4444444444444444,0.009613673600945461
188
+ 6,0,0.6666666666666666,0.008192208001504143,0.23481781376518218,0.009613673600945461
189
+ 6,0,0.7333333333333333,0.010367843569116719,0.2655737704918033,0.009613673600945461
190
+ 6,0,0.8,0.014114771491116155,0.3613989637305699,0.009613673600945461
191
+ 6,0,0.8666666666666667,0.019674729052792737,0.3939105613701237,0.009613673600945461
192
+ 6,0,0.9333333333333333,0.02926364136930742,0.4873720136518771,0.009613673600945461
193
+ 6,0,1.0,0.029411369710318155,0.005048187241854185,0.009613673600945461
NSO_Population_Sex_dataset/NSO_POPULATION_DATA_PREFEATURE.csv ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ District,Sex,Year,Population
2
+ 1,0,2005,40400
3
+ 1,0,2006,40178
4
+ 1,0,2007,40058
5
+ 1,0,2008,40069
6
+ 1,0,2009,40022
7
+ 1,0,2010,39704
8
+ 1,0,2011,39606
9
+ 1,0,2012,39636
10
+ 1,0,2013,39589
11
+ 1,0,2014,39888
12
+ 1,0,2015,40150
13
+ 1,0,2016,40318
14
+ 1,0,2017,40961
15
+ 1,0,2018,41933
16
+ 1,0,2019,43507
17
+ 1,0,2020,43462
18
+ 1,1,2005,40650
19
+ 1,1,2006,40321
20
+ 1,1,2007,40200
21
+ 1,1,2008,40124
22
+ 1,1,2009,40097
23
+ 1,1,2010,39931
24
+ 1,1,2011,39868
25
+ 1,1,2012,39851
26
+ 1,1,2013,39883
27
+ 1,1,2014,39945
28
+ 1,1,2015,40123
29
+ 1,1,2016,40346
30
+ 1,1,2017,40621
31
+ 1,1,2018,40977
32
+ 1,1,2019,41539
33
+ 1,1,2020,41439
34
+ 2,0,2005,58864
35
+ 2,0,2006,58792
36
+ 2,0,2007,58884
37
+ 2,0,2008,59163
38
+ 2,0,2009,59364
39
+ 2,0,2010,59166
40
+ 2,0,2011,59276
41
+ 2,0,2012,60666
42
+ 2,0,2013,63091
43
+ 2,0,2014,66600
44
+ 2,0,2015,69778
45
+ 2,0,2016,72424
46
+ 2,0,2017,76988
47
+ 2,0,2018,82186
48
+ 2,0,2019,88625
49
+ 2,0,2020,89747
50
+ 2,1,2005,60491
51
+ 2,1,2006,60281
52
+ 2,1,2007,60394
53
+ 2,1,2008,60569
54
+ 2,1,2009,60833
55
+ 2,1,2010,60870
56
+ 2,1,2011,61067
57
+ 2,1,2012,62288
58
+ 2,1,2013,63889
59
+ 2,1,2014,66452
60
+ 2,1,2015,68909
61
+ 2,1,2016,71349
62
+ 2,1,2017,74676
63
+ 2,1,2018,77987
64
+ 2,1,2019,81595
65
+ 2,1,2020,81859
66
+ 3,0,2005,29619
67
+ 3,0,2006,30050
68
+ 3,0,2007,30560
69
+ 3,0,2008,31171
70
+ 3,0,2009,31738
71
+ 3,0,2010,32090
72
+ 3,0,2011,32608
73
+ 3,0,2012,33240
74
+ 3,0,2013,34035
75
+ 3,0,2014,34594
76
+ 3,0,2015,35177
77
+ 3,0,2016,35680
78
+ 3,0,2017,36548
79
+ 3,0,2018,37811
80
+ 3,0,2019,39416
81
+ 3,0,2020,39526
82
+ 3,1,2005,29754
83
+ 3,1,2006,29940
84
+ 3,1,2007,30277
85
+ 3,1,2008,30648
86
+ 3,1,2009,31061
87
+ 3,1,2010,31363
88
+ 3,1,2011,31741
89
+ 3,1,2012,32111
90
+ 3,1,2013,32438
91
+ 3,1,2014,32824
92
+ 3,1,2015,33265
93
+ 3,1,2016,33507
94
+ 3,1,2017,33942
95
+ 3,1,2018,34564
96
+ 3,1,2019,35173
97
+ 3,1,2020,35305
98
+ 4,0,2005,28184
99
+ 4,0,2006,28233
100
+ 4,0,2007,28358
101
+ 4,0,2008,28573
102
+ 4,0,2009,28750
103
+ 4,0,2010,28732
104
+ 4,0,2011,28859
105
+ 4,0,2012,29025
106
+ 4,0,2013,29207
107
+ 4,0,2014,29373
108
+ 4,0,2015,29636
109
+ 4,0,2016,29924
110
+ 4,0,2017,30228
111
+ 4,0,2018,30944
112
+ 4,0,2019,31672
113
+ 4,0,2020,31847
114
+ 4,1,2005,28857
115
+ 4,1,2006,28788
116
+ 4,1,2007,28871
117
+ 4,1,2008,28986
118
+ 4,1,2009,29137
119
+ 4,1,2010,29187
120
+ 4,1,2011,29304
121
+ 4,1,2012,29499
122
+ 4,1,2013,29629
123
+ 4,1,2014,29785
124
+ 4,1,2015,30047
125
+ 4,1,2016,30207
126
+ 4,1,2017,30464
127
+ 4,1,2018,30745
128
+ 4,1,2019,31061
129
+ 4,1,2020,31080
130
+ 5,1,2005,28688
131
+ 5,1,2006,29103
132
+ 5,1,2007,29669
133
+ 5,1,2008,30267
134
+ 5,1,2009,30909
135
+ 5,1,2010,31440
136
+ 5,1,2011,32044
137
+ 5,1,2012,32556
138
+ 5,1,2013,33224
139
+ 5,1,2014,34244
140
+ 5,1,2015,35646
141
+ 5,1,2016,37077
142
+ 5,1,2017,39013
143
+ 5,1,2018,40813
144
+ 5,1,2019,42469
145
+ 5,1,2020,42391
146
+ 5,0,2005,28485
147
+ 5,0,2006,28967
148
+ 5,0,2007,29525
149
+ 5,0,2008,30178
150
+ 5,0,2009,30794
151
+ 5,0,2010,31202
152
+ 5,0,2011,31770
153
+ 5,0,2012,32215
154
+ 5,0,2013,32983
155
+ 5,0,2014,34253
156
+ 5,0,2015,35759
157
+ 5,0,2016,37259
158
+ 5,0,2017,39537
159
+ 5,0,2018,42211
160
+ 5,0,2019,45077
161
+ 5,0,2020,44881
162
+ 6,1,2005,15721
163
+ 6,1,2006,15664
164
+ 6,1,2007,15687
165
+ 6,1,2008,15725
166
+ 6,1,2009,15787
167
+ 6,1,2010,15793
168
+ 6,1,2011,15827
169
+ 6,1,2012,15821
170
+ 6,1,2013,15828
171
+ 6,1,2014,15953
172
+ 6,1,2015,16029
173
+ 6,1,2016,16148
174
+ 6,1,2017,16386
175
+ 6,1,2018,16637
176
+ 6,1,2019,16965
177
+ 6,1,2020,17087
178
+ 6,0,2005,15286
179
+ 6,0,2006,15299
180
+ 6,0,2007,15349
181
+ 6,0,2008,15453
182
+ 6,0,2009,15535
183
+ 6,0,2010,15511
184
+ 6,0,2011,15576
185
+ 6,0,2012,15601
186
+ 6,0,2013,15628
187
+ 6,0,2014,15780
188
+ 6,0,2015,15896
189
+ 6,0,2016,16058
190
+ 6,0,2017,16337
191
+ 6,0,2018,16751
192
+ 6,0,2019,17465
193
+ 6,0,2020,17476
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: Gradio Web Tool
3
- emoji: 🌍
4
- colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.13.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ICS5110 Gradio Web Tool
3
+ emoji: 🐠
4
+ colorFrom: indigo
5
+ colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 4.9.1
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
 
 
RandomForestRegressor.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from DecisionTreeRegressor import DecisionTreeRegressor
2
+ from sklearn.tree import DecisionTreeRegressor as SKLearnDecisionTreeRegressor
3
+ import numpy as np
4
+
5
+ class RandomForestRegressor:
6
+ def __init__(self, n_estimators, max_depth, min_samples_split, min_samples_leaf, custom=True):
7
+ self.n_estimators = n_estimators
8
+ self.max_depth = max_depth
9
+ self.min_samples_split = min_samples_split
10
+ self.min_samples_leaf = min_samples_leaf
11
+ self.trees = []
12
+ self.custom = custom
13
+
14
+ def fit(self, X, y, tree_params=None):
15
+ if tree_params is None:
16
+ tree_params = {
17
+ 'max_depth': self.max_depth,
18
+ 'min_samples_split': self.min_samples_split,
19
+ 'min_samples_leaf': self.min_samples_leaf
20
+ }
21
+
22
+ # Convert X and y to NumPy arrays
23
+ X = np.array(X)
24
+ y = np.array(y)
25
+
26
+ for _ in range(self.n_estimators):
27
+ if self.custom:
28
+ tree = DecisionTreeRegressor(**tree_params)
29
+ else:
30
+ tree = SKLearnDecisionTreeRegressor(**tree_params)
31
+
32
+ # Bootstrap sampling
33
+ indices = np.random.choice(len(X), len(X), replace=True)
34
+ X_bootstrap = X[indices]
35
+ y_bootstrap = y[indices]
36
+
37
+ tree.fit(X_bootstrap, y_bootstrap)
38
+ self.trees.append(tree)
39
+
40
+ def predict(self, X):
41
+ predictions = np.zeros((X.shape[0], len(self.trees)))
42
+
43
+ for i, tree in enumerate(self.trees):
44
+ predictions[:, i] = tree.predict(X.values)
45
+
46
+ return np.mean(predictions, axis=1)
app.py ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score
7
+
8
+ from RandomForestRegressor import RandomForestRegressor
9
+ from LinearRegression import LinearRegression
10
+ from DecisionTreeRegressor import DecisionTreeRegressor
11
+
12
+ from sklearn.ensemble import RandomForestRegressor as SKLearnRandomForestRegressor
13
+ from sklearn.linear_model import LinearRegression as SKLearnLinearRegression
14
+ from sklearn.tree import DecisionTreeRegressor as SKLearnDecisionTreeRegressor
15
+
16
+ # Dataset exported prior to feature scaling/engineering -- for user readability
17
+ df_read = pd.read_csv('NSO_Population_Sex_dataset/NSO_POPULATION_DATA_PREFEATURE.csv')
18
+ # Cleaned dataset after feature scaling/engineering -- for model training
19
+ df = pd.read_csv('NSO_Population_Sex_dataset/NSO_POPULATION_DATA_CLEANED.csv')
20
+
21
+ feature_cols = ['District', 'Sex', 'Year', 'Population_Growth_Rate', 'Average_Population']
22
+ X = pd.get_dummies(df[feature_cols], columns=['District', 'Sex']) # for converting to categorical variables
23
+ y = df["Population"]
24
+
25
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
26
+
27
+ # Mapping for dropdowns
28
+ mapping_display = {
29
+ "year": {
30
+ "2005": 0,
31
+ "2006": 0.0666666666666666,
32
+ "2007": 0.133333333333333,
33
+ "2008": 0.2,
34
+ "2009": 0.266666666666666,
35
+ "2010": 0.333333333333333,
36
+ "2011": 0.4,
37
+ "2012": 0.466666666666666,
38
+ "2013": 0.533333333333333,
39
+ "2014": 0.6,
40
+ "2015": 0.666666666666666,
41
+ "2016": 0.733333333333333,
42
+ "2017": 0.8,
43
+ "2018": 0.866666666666666,
44
+ "2019": 0.933333333333333,
45
+ "2020": 1,
46
+ },
47
+ "district": {
48
+ "Southern Harbour": 1,
49
+ "Northern Harbour": 2,
50
+ "South Eastern": 3,
51
+ "Western": 4,
52
+ "Northern": 5,
53
+ "Gozo & Comino": 6,
54
+ },
55
+ }
56
+
57
+
58
+ def scatter_plot_graph(x, y, legend_labels):
59
+ fig, ax = plt.subplots()
60
+ for result in x:
61
+ ax.scatter(result, y, alpha=0.5)
62
+ ax.set_xlabel('Actual')
63
+ ax.set_ylabel('Predicted')
64
+ ax.legend(legend_labels, loc='best')
65
+ plt.close()
66
+ return fig
67
+
68
+
69
+ def line_plot_graph(x, legend_labels):
70
+ fig, ax = plt.subplots()
71
+ for result in x:
72
+ ax.plot(result, alpha=0.5)
73
+ ax.set_xlabel('Sample Index')
74
+ ax.set_ylabel('Target Variable (Values)')
75
+ ax.legend(legend_labels, loc='best')
76
+ plt.close()
77
+ return fig
78
+
79
+
80
+ def residual_plot_graph(x, y, color='black'):
81
+ fig, ax = plt.subplots()
82
+ # Avoiding x != y error
83
+ for i in range(len(x)):
84
+ ax.scatter(x[i], y[i] - x[i], alpha=0.5, c=color)
85
+ ax.set_xlabel('Predicted')
86
+ ax.set_ylabel('Residuals')
87
+ plt.axhline(y=0, color='r', linestyle='--', label='Residuals Mean')
88
+ plt.close()
89
+ return fig
90
+
91
+
92
+ # Decision Tree - Custom
93
+ def decision_tree(X_train, y_train, X_test, max_depth, min_samples_split):
94
+ Custom_Decision_Tree_Regressor = DecisionTreeRegressor(max_depth=max_depth,
95
+ min_samples_split=min_samples_split,
96
+ min_samples_leaf=None)
97
+ Custom_Decision_Tree_Regressor.fit(X_train.values, y_train.values)
98
+ Custom_Decision_Tree_Regressor_Prediction = Custom_Decision_Tree_Regressor.predict(X_test.values)
99
+ return Custom_Decision_Tree_Regressor_Prediction
100
+
101
+
102
+ # Decision Tree - SKLearn
103
+ def decision_tree_sklearn(X_train, y_train, X_test, max_depth, min_samples_split, min_samples_leaf):
104
+ SKLearn_Decision_Tree_Regressor = SKLearnDecisionTreeRegressor(max_depth=max_depth,
105
+ min_samples_split=min_samples_split,
106
+ min_samples_leaf=min_samples_leaf)
107
+ SKLearn_Decision_Tree_Regressor.fit(X_train.values, y_train.values)
108
+ SKLearn_Decision_Tree_Regressor_Prediction = SKLearn_Decision_Tree_Regressor.predict(X_test.values)
109
+ return SKLearn_Decision_Tree_Regressor_Prediction
110
+
111
+
112
+ # Random Forest - Custom
113
+ def random_forest(X_train, y_train, X_test, n_estimators, max_depth, min_samples_split, min_samples_leaf):
114
+ Custom_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,
115
+ min_samples_split=min_samples_split,
116
+ min_samples_leaf=min_samples_leaf)
117
+ Custom_Random_Forest_Regressor.fit(X_train, y_train)
118
+ Custom_Random_Forest_Regressor_Prediction = Custom_Random_Forest_Regressor.predict(X_test)
119
+ return Custom_Random_Forest_Regressor_Prediction
120
+
121
+
122
+ # Random Forest - SKLearn
123
+ def random_forest_sklearn(X_train, y_train, X_test):
124
+ SKLearn_Random_Forest_Regressor = SKLearnRandomForestRegressor()
125
+ SKLearn_Random_Forest_Regressor.fit(X_train, y_train)
126
+ SKLearn_Random_Forest_Regressor_Prediction = SKLearn_Random_Forest_Regressor.predict(X_test)
127
+ return SKLearn_Random_Forest_Regressor_Prediction
128
+
129
+
130
+ # Random Forest - Custom using SKLearn Decision Trees
131
+ def random_forest_sklearn_decision_trees(X_train, y_train, X_test, n_estimators, max_depth, min_samples_split, min_samples_leaf):
132
+ SKLearn_Decision_Trees_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators,
133
+ max_depth=max_depth,
134
+ min_samples_split=min_samples_split,
135
+ min_samples_leaf=min_samples_leaf,
136
+ custom=False)
137
+ SKLearn_Decision_Trees_Random_Forest_Regressor.fit(X_train, y_train)
138
+ SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction = SKLearn_Decision_Trees_Random_Forest_Regressor.predict(
139
+ X_test)
140
+ return SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction
141
+
142
+
143
+ # Linear Regression - Custom
144
+ def linear_regression(X_train, y_train, X_test, learning_rate, num_iterations):
145
+ Custom_Linear_Regression = LinearRegression(learning_rate=learning_rate, num_iterations=num_iterations)
146
+ Custom_Linear_Regression.fit(X_train, y_train)
147
+ Custom_Linear_Regression_Prediction = Custom_Linear_Regression.predict(X_test)
148
+ return Custom_Linear_Regression_Prediction
149
+
150
+
151
+ # Linear Regression - SKLearn
152
+ def linear_regression_sklearn(X_train, y_train, X_test):
153
+ SKLearn_Linear_Regression = SKLearnLinearRegression()
154
+ SKLearn_Linear_Regression.fit(X_train, y_train)
155
+ SKLearn_Linear_Regression_Prediction = SKLearn_Linear_Regression.predict(X_test)
156
+ return SKLearn_Linear_Regression_Prediction
157
+
158
+
159
+ def evaluate_algorithm(algorithm_function, X_train, y_train, X_test, y_test, algorithm_parameters):
160
+ prediction = algorithm_function(X_train, y_train, X_test, **algorithm_parameters)
161
+ mae = mean_absolute_error(y_test, prediction)
162
+ mse = mean_squared_error(y_test, prediction)
163
+ rmse = mean_squared_error(y_test, prediction, squared=True)
164
+ r2 = r2_score(y_test, prediction)
165
+ variance = explained_variance_score(y_test, prediction)
166
+ prediction_results = pd.DataFrame(prediction)
167
+ return prediction_results, mae, mse, rmse, r2, variance
168
+
169
+
170
+ # Used both for the "All" button and for the filtered data using all algorithms
171
+ def process_all_algorithms(dt_max_depth, dt_min_samples_split, dt_min_samples_leaf, rf_n_estimators, rf_max_depth,
172
+ lr_learning_rate, lr_num_iterations):
173
+ results = {}
174
+ # Decision Tree - Custom
175
+ prediction_dt, mae_dt, mse_dt, rmse_dt, r2_dt, variance_dt = evaluate_algorithm(
176
+ decision_tree, X_train, y_train, X_test, y_test,
177
+ {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
178
+
179
+ results["Decision Tree - Custom"] = {"Algorithm": "Decision Tree - Custom", "MAE": mae_dt, "MSE": mse_dt,
180
+ "RMSE": rmse_dt, "R2": r2_dt, "Explained Variance": variance_dt}
181
+
182
+ # Decision Tree - SKLearn
183
+ prediction_dts, mae_dts, mse_dts, rmse_dts, r2_dts, variance_dts = evaluate_algorithm(
184
+ decision_tree_sklearn, X_train, y_train,
185
+ X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
186
+ "min_samples_leaf": dt_min_samples_leaf})
187
+ results["Decision Tree - SKLearn"] = {"Algorithm": "Decision Tree - SKLearn", "MAE": mae_dts, "MSE": mse_dts,
188
+ "RMSE": rmse_dts, "R2": r2_dts, "Explained Variance": variance_dts}
189
+
190
+ # Random Forest - Custom
191
+ prediction_rf, mae_rf, mse_rf, rmse_rf, r2_rf, variance_rf = evaluate_algorithm(random_forest, X_train, y_train, X_test,
192
+ y_test, {"max_depth": rf_max_depth,
193
+ "n_estimators": rf_n_estimators,
194
+ "min_samples_split": dt_min_samples_split,
195
+ "min_samples_leaf": dt_min_samples_leaf})
196
+
197
+ results["Random Forest - Custom"] = {"Algorithm": "Random Forest - Custom", "MAE": mae_rf, "MSE": mse_rf,
198
+ "RMSE": rmse_rf, "R2": r2_rf, "Explained Variance": variance_rf}
199
+
200
+ # Random Forest - SKLearn
201
+ prediction_rfs, mae_rfs, mse_rfs, rmse_rfs, r2_rfs, variance_rfs = evaluate_algorithm(random_forest_sklearn,
202
+ X_train, y_train, X_test,
203
+ y_test, {})
204
+ results["Random Forest - SKLearn"] = {"Algorithm": "Random Forest - SKLearn", "MAE": mae_rfs, "MSE": mse_rfs,
205
+ "RMSE": rmse_rfs, "R2": r2_rfs, "Explained Variance": variance_rfs}
206
+
207
+ # Random Forest - Custom using SKLearn Decision Trees
208
+ prediction_rfsdt, mae_rfsdt, mse_rfsdt, rmse_rfsdt, r2_rfsdt, variance_rfsdt = evaluate_algorithm(
209
+ random_forest_sklearn_decision_trees, X_train, y_train, X_test, y_test,
210
+ {"max_depth": rf_max_depth, "n_estimators": rf_n_estimators, "min_samples_split": dt_min_samples_split,
211
+ "min_samples_leaf": dt_min_samples_leaf})
212
+
213
+ results["Random Forest - Custom using SKLearn DT"] = {"Algorithm": "Random Forest - Custom using SKLearn DT",
214
+ "MAE": mae_rfsdt, "MSE": mse_rfsdt, "RMSE": rmse_rfsdt,
215
+ "R2": r2_rfsdt, "Explained Variance": variance_rfsdt}
216
+
217
+ # Linear Regression - Custom
218
+ prediction_lr, mae_lr, mse_lr, rmse_lr, r2_lr, variance_lr = evaluate_algorithm(linear_regression, X_train, y_train,
219
+ X_test, y_test,
220
+ {"learning_rate": lr_learning_rate,
221
+ "num_iterations": lr_num_iterations})
222
+ results["Linear Regression - Custom"] = {"Algorithm": "Linear Regression - Custom", "MAE": mae_lr, "MSE": mse_lr,
223
+ "RMSE": rmse_lr, "R2": r2_lr, "Explained Variance": variance_lr}
224
+
225
+ # Linear Regression - SKLearn
226
+ prediction_lrs, mae_lrs, mse_lrs, rmse_lrs, r2_lrs, variance_lrs = evaluate_algorithm(linear_regression_sklearn,
227
+ X_train, y_train, X_test,
228
+ y_test, {})
229
+ results["Linear Regression - SKLearn"] = {"Algorithm": "Linear Regression - SKLearn", "MAE": mae_lrs,
230
+ "MSE": mse_lrs, "RMSE": rmse_lrs, "R2": r2_lrs,
231
+ "Explained Variance": variance_lrs}
232
+
233
+ df_results = pd.DataFrame(results).T # Convert results to DataFrame
234
+
235
+ all_predictions = pd.DataFrame() # Initialising empty dataframe to store predictions
236
+ all_predictions["Actual"] = y_test.values
237
+ all_predictions["Decision Tree - Custom"] = prediction_dt
238
+ all_predictions["Decision Tree - SKLearn"] = prediction_dts
239
+ all_predictions["Random Forest - Custom"] = prediction_rf
240
+ all_predictions["Random Forest - SKLearn"] = prediction_rfs
241
+ all_predictions["Random Forest - Custom using SKLearn DT"] = prediction_rfsdt
242
+ all_predictions["Linear Regression - Custom"] = prediction_lr
243
+ all_predictions["Linear Regression - SKLearn"] = prediction_lrs
244
+ all_predictions = pd.DataFrame(all_predictions)
245
+
246
+ scatter_plot = scatter_plot_graph(
247
+ [prediction_dt.to_numpy(), prediction_dts.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy(), prediction_lr.to_numpy(), prediction_lrs.to_numpy()],
248
+ y_test.to_numpy(),
249
+ ['Custom DT', 'SKLearn DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF', 'Custom LR', 'SKLearn LR'])
250
+ custom_scatter_plot = scatter_plot_graph(
251
+ [prediction_dt.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_lr.to_numpy()],
252
+ y_test.to_numpy(),
253
+ ['Custom DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'Custom LR'])
254
+ sklearn_scatter_plot = scatter_plot_graph(
255
+ [prediction_dts.to_numpy(), prediction_rfs.to_numpy(), prediction_lrs.to_numpy()], y_test.to_numpy(),
256
+ ['SKLearn DT', 'SKLearn RF', 'SKLearn LR'])
257
+ dt_scatter_plot = scatter_plot_graph(
258
+ [prediction_dt.to_numpy(), prediction_dts.to_numpy()], y_test.to_numpy(),
259
+ ['Custom DT', 'SKLearn DT'])
260
+ rf_scatter_plot = scatter_plot_graph(
261
+ [prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy()], y_test.to_numpy(),
262
+ ['Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF'])
263
+ lr_scatter_plot = scatter_plot_graph(
264
+ [prediction_lr.to_numpy(), prediction_lrs.to_numpy()], y_test.to_numpy(),
265
+ ['Custom LR', 'SKLearn LR'])
266
+
267
+ line_plot = line_plot_graph(
268
+ [y_test.to_numpy(), prediction_dt.to_numpy(), prediction_dts.to_numpy(), prediction_rf.to_numpy(),
269
+ prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy(), prediction_lr.to_numpy(), prediction_lrs.to_numpy()],
270
+ ['Actual', 'Custom DT', 'SKLearn DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF', 'Custom LR',
271
+ 'SKLearn LR'])
272
+ custom_line_plot = line_plot_graph(
273
+ [y_test.to_numpy(), prediction_dt.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_lr.to_numpy()],
274
+ ['Actual', 'Custom DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'Custom LR'])
275
+ sklearn_line_plot = line_plot_graph(
276
+ [y_test.to_numpy(), prediction_dts.to_numpy(), prediction_rfs.to_numpy(), prediction_lrs.to_numpy()],
277
+ ['Actual', 'SKLearn DT', 'SKLearn RF', 'SKLearn LR'])
278
+ dt_line_plot = line_plot_graph(
279
+ [y_test.to_numpy(), prediction_dt.to_numpy(), prediction_dts.to_numpy()],
280
+ ['Actual', 'Custom DT', 'SKLearn DT'])
281
+ rf_line_plot = line_plot_graph(
282
+ [y_test.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy()],
283
+ ['Actual', 'Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF'])
284
+ lr_line_plot = line_plot_graph(
285
+ [y_test.to_numpy(), prediction_lr.to_numpy(), prediction_lrs.to_numpy()],
286
+ ['Actual', 'Custom LR', 'SKLearn LR'])
287
+
288
+ dt_residual_plot = residual_plot_graph(prediction_dt.to_numpy(), y_test.to_numpy())
289
+ dts_residual_plot = residual_plot_graph(prediction_dts.to_numpy(), y_test.to_numpy())
290
+ rf_residual_plot = residual_plot_graph(prediction_rf.to_numpy(), y_test.to_numpy())
291
+ rfs_residual_plot = residual_plot_graph(prediction_rfs.to_numpy(), y_test.to_numpy())
292
+ rfsdt_residual_plot = residual_plot_graph(prediction_rfsdt.to_numpy(), y_test.to_numpy())
293
+ lr_residual_plot = residual_plot_graph(prediction_lr.to_numpy(), y_test.to_numpy())
294
+ lrs_residual_plot = residual_plot_graph(prediction_lrs.to_numpy(), y_test.to_numpy())
295
+
296
+ return (all_predictions, df_results, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot,
297
+ rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot,
298
+ rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot,
299
+ rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot)
300
+
301
+
302
+ # When the data/algorithms are filtered & 'All' button
303
+ def filter_data(records, algorithm, selected_district, selected_year, dt_max_depth, dt_min_samples_split,
304
+ dt_min_samples_leaf, rf_n_estimators, rf_max_depth, lr_learning_rate, lr_num_iterations):
305
+ if algorithm == "All" or algorithm is None:
306
+ # Process all algorithms
307
+ df_predictions, df_results, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot = process_all_algorithms(dt_max_depth, dt_min_samples_split, dt_min_samples_leaf,
308
+ rf_n_estimators, rf_max_depth, lr_learning_rate,
309
+ lr_num_iterations)
310
+ return records, df_predictions, X_test, None, df_results, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot
311
+
312
+ # Convert selected district to the corresponding value from district_mapping_display
313
+ selected_district_value = mapping_display["district"].get(selected_district, None)
314
+ # Convert selected year to the corresponding value from year_mapping_display
315
+ selected_year_value = mapping_display["year"].get(selected_year, None)
316
+
317
+ if (selected_district_value != "All" and selected_district_value is not None and selected_year != "All" and
318
+ selected_year is not None):
319
+ filtered_data = records[
320
+ (pd.notna(records["District"]) & (records["District"] == int(selected_district_value))) &
321
+ (pd.notna(records["Year"]) & (records["Year"] == int(selected_year)))]
322
+ elif selected_district_value != "All" and selected_district_value is not None:
323
+ filtered_data = records[pd.notna(records["District"]) & (records["District"] == int(selected_district_value))]
324
+ elif selected_year != "All" and selected_year is not None:
325
+ filtered_data = records[pd.notna(records["Year"]) & (records["Year"] == int(selected_year))]
326
+ else: # If both inputs are None, return the original records
327
+ filtered_data = records
328
+
329
+ # Evaluation
330
+
331
+ query_str_year = f'Year == {selected_year_value}' if (selected_year_value != "All" and
332
+ selected_year_value is not None) else None
333
+ query_str_district = f'District_{selected_district_value} == 1' if (selected_district_value != "All" and
334
+ selected_district_value is not None) else None
335
+
336
+ query_str = " and ".join(filter(None, [query_str_district, query_str_year]))
337
+
338
+ filtered_X_test = X_test.query(query_str) if query_str else X_test
339
+
340
+ # Check if filtered dataset is empty
341
+ if filtered_X_test.empty:
342
+ no_results = [{"Algorithm": algorithm, "Error": "No samples for the selected filter."}]
343
+ return filtered_data, None, X_test, filtered_X_test, pd.DataFrame(no_results)
344
+
345
+ # Initialising prediction results
346
+ all_predictions = pd.DataFrame() # Initialize an empty dataframe to store prediction/s
347
+ all_predictions["Actual"] = y_test.values
348
+
349
+ # Evaluate algorithm
350
+ if algorithm == "Decision Tree - Custom":
351
+ prediction_dt, mae, mse, rmse, r2, variance = evaluate_algorithm(
352
+ decision_tree, X_train, y_train, X_test, y_test,
353
+ {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
354
+ all_predictions["Decision Tree - Custom"] = prediction_dt
355
+ elif algorithm == "Decision Tree - SKLearn":
356
+ prediction_dts, mae, mse, rmse, r2, variance = evaluate_algorithm(
357
+ decision_tree_sklearn, X_train, y_train,
358
+ X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
359
+ "min_samples_leaf": dt_min_samples_leaf})
360
+ all_predictions["Decision Tree - SKLearn"] = prediction_dts
361
+ elif algorithm == "Random Forest - Custom":
362
+ prediction_rf, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest, X_train, y_train, X_test,
363
+ y_test, {"max_depth": rf_max_depth,
364
+ "n_estimators": rf_n_estimators,
365
+ "min_samples_split": dt_min_samples_split,
366
+ "min_samples_leaf": dt_min_samples_leaf})
367
+ all_predictions["Random Forest - Custom"] = prediction_rf
368
+ elif algorithm == "Random Forest - SKLearn":
369
+ prediction_rfs, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest_sklearn, X_train, y_train,
370
+ X_test, y_test, {})
371
+ all_predictions["Random Forest - SKLearn"] = prediction_rfs
372
+ elif algorithm == "Random Forest - Custom using SKLearn DT":
373
+ prediction_rfsdt, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest_sklearn_decision_trees,
374
+ X_train, y_train, X_test, y_test,
375
+ {"max_depth": rf_max_depth,
376
+ "n_estimators": rf_n_estimators,
377
+ "min_samples_split": dt_min_samples_split,
378
+ "min_samples_leaf": dt_min_samples_leaf})
379
+ all_predictions["Random Forest - Custom using SKLearn DT"] = prediction_rfsdt
380
+ elif algorithm == "Linear Regression - Custom":
381
+ prediction_lr, mae, mse, rmse, r2, variance = evaluate_algorithm(linear_regression, X_train, y_train,
382
+ X_test, y_test,
383
+ {"learning_rate": lr_learning_rate,
384
+ "num_iterations": lr_num_iterations})
385
+ all_predictions["Linear Regression - Custom"] = prediction_lr
386
+ elif algorithm == "Linear Regression - SKLearn":
387
+ prediction_lrs, mae, mse, rmse, r2, variance = evaluate_algorithm(linear_regression_sklearn, X_train,
388
+ y_train, X_test, y_test,
389
+ {"learning_rate": lr_learning_rate,
390
+ "num_iterations": lr_num_iterations})
391
+ all_predictions["Linear Regression - SKLearn"] = prediction_lrs
392
+ # In case of error
393
+ else:
394
+ mae, mse, rmse, r2, variance = None, None, None, None, None
395
+
396
+ results = [{"Algorithm": algorithm, "MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2, "Explained Variance": variance}]
397
+ df_results = pd.DataFrame(results) # Convert results to DataFrame
398
+
399
+ all_predictions = pd.DataFrame(all_predictions)
400
+
401
+ return filtered_data, all_predictions, X_test, filtered_X_test, df_results, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
402
+
403
+
404
+ with gr.Blocks(theme='ParityError/Interstellar') as gr_output:
405
+ alg, district, year = None, None, None # Initialising inputs for use by all_btn
406
+
407
+ gr.Markdown(
408
+ """
409
+ # Machine Learning Approaches to Ethical Analysis of Statistics
410
+ ## January 2024
411
+ ### Created in partial fulfillment of the requirements for the ICS5110 Applied Machine Learning project by: Nathan Camilleri, Nathan Portelli, Oleg Grech.
412
+ Email: {nathan.camillieri, nathan.portelli, oleg.grech}@um.edu.mt
413
+ ### Full project code available at:
414
+ [github.com/NathanPortelli/ICS5110-Applied-ML](https://github.com/NathanPortelli/ICS5110-Applied-ML/)
415
+ ### Instructions:
416
+ Click 'Run all algorithms/datasets' to run all algorithms without filtering, or choose the available filters and click 'Run'. You may also edit the parameters of each algorithm type.
417
+ """)
418
+ with gr.Row():
419
+ with gr.Column():
420
+ gr.Markdown("# Inputs")
421
+ gr.Markdown("### NSO Malta - 'Total Population by region, district and locality' Dataset")
422
+ record = gr.Dataframe(
423
+ value=df_read,
424
+ headers=["District", "Sex", "Year", "Population"],
425
+ datatype=["number", "bool", "number", "number"],
426
+ column_widths=[60, 60, 60, 75],
427
+ height=325,
428
+ interactive=False,
429
+ )
430
+ gr.Markdown("## Parameters")
431
+ with gr.Row():
432
+ with gr.Tab("Decision Tree"):
433
+ dt_max_depth = gr.Slider(label="Max Depth", minimum=1, maximum=100, value=100, interactive=True,
434
+ step=1)
435
+ dt_min_samples_split = gr.Slider(label="Min Samples Split", minimum=0, maximum=20, value=2,
436
+ interactive=True, step=1)
437
+ dt_min_samples_leaf = gr.Slider(label="Min Samples Leaf", minimum=1, maximum=20, value=5,
438
+ interactive=True, step=1)
439
+ with gr.Tab("Random Forest"):
440
+ rf_n_estimators = gr.Slider(label="N Estimators", minimum=1, maximum=100, value=100,
441
+ interactive=True, step=1)
442
+ rf_max_depth = gr.Slider(label="Max Depth", minimum=1, maximum=100, value=100,
443
+ interactive=True, step=1)
444
+ # rf_custom = gr.Dropdown([True, False], label="Custom", value=False, interactive=True)
445
+ with gr.Tab("Linear Regression"):
446
+ lr_learning_rate = gr.Slider(label="Max Depth", minimum=0.001, maximum=1, value=0.01,
447
+ interactive=True, step=0.01)
448
+ lr_num_iterations = gr.Slider(label="Num of Iterations", minimum=50, maximum=5000, value=1000,
449
+ interactive=True, step=50)
450
+ all_btn = gr.Button(value="Run all algorithms/dataset", variant="secondary")
451
+ gr.Markdown("### or pick the algorithm, district or year to filter the dataset")
452
+ with gr.Column():
453
+ alg = gr.Dropdown(["All", "Decision Tree - Custom", "Decision Tree - SKLearn",
454
+ "Random Forest - Custom", "Random Forest - SKLearn",
455
+ "Random Forest - Custom using SKLearn DT", "Linear Regression - Custom",
456
+ "Linear Regression - SKLearn"],
457
+ label="Select Algorithm", value="All")
458
+ district = gr.Dropdown(
459
+ ["Southern Harbour", "Northern Harbour", "South Eastern", "Western", "Northern",
460
+ "Gozo & Comino", "All"], label="Select District", value="All")
461
+ year = gr.Dropdown(list(mapping_display["year"].keys()) + ["All"], label="Select Year", value="All")
462
+ with gr.Row():
463
+ submit_btn = gr.Button(value="Run", variant="primary")
464
+ with gr.Column():
465
+ gr.Markdown("# Outputs")
466
+ gr.Markdown("## Filtered Inputs/Outputs")
467
+ with gr.Tab("Filtered Dataset Records"):
468
+ filtered_records = gr.Dataframe(label="", height=300)
469
+ with gr.Tab("Total X_Test Output"):
470
+ total_x_test = gr.Dataframe(label="", height=300)
471
+ with gr.Tab("Filtered X_Test Output"):
472
+ filtered_x_test = gr.Dataframe(label="", height=300)
473
+ gr.Markdown("## Algorithm Evaluation")
474
+ evaluation = gr.Dataframe(label="")
475
+ gr.Markdown("## Prediction Results")
476
+ predictions = gr.Dataframe(label="Predicted vs Actual", height=300)
477
+ gr.Markdown("## Graph Plots")
478
+ with gr.Tab("Scatter Plots"):
479
+ scatter_plot = gr.Plot(label="All Algorithms")
480
+ custom_scatter_plot = gr.Plot(label="Custom Implementations")
481
+ sklearn_scatter_plot = gr.Plot(label="SKLearn Implementations")
482
+ dt_scatter_plot = gr.Plot(label="Decision Tree Implementations")
483
+ rf_scatter_plot = gr.Plot(label="Random Forest Implementations")
484
+ lr_scatter_plot = gr.Plot(label="Linear Regression Implementations")
485
+ with gr.Tab("Line Plots"):
486
+ line_plot = gr.Plot(label="All Algorithms")
487
+ custom_line_plot = gr.Plot(label="Custom Implementations")
488
+ sklearn_line_plot = gr.Plot(label="SKLearn Implementations")
489
+ dt_line_plot = gr.Plot(label="Decision Tree Implementations")
490
+ rf_line_plot = gr.Plot(label="Random Forest Implementations")
491
+ lr_line_plot = gr.Plot(label="Linear Regression Implementations")
492
+ with gr.Tab("Residual Plots"):
493
+ dt_residual_plot = gr.Plot(label="Custom Decision Tree")
494
+ dts_residual_plot = gr.Plot(label="SKLearn Decision Tree")
495
+ rf_residual_plot = gr.Plot(label="Custom Random Forest")
496
+ rfs_residual_plot = gr.Plot(label="SKLearn Random Forest")
497
+ rfsdt_residual_plot = gr.Plot(label="Custom Random Forest using SKLearn Decision Trees")
498
+ lr_residual_plot = gr.Plot(label="Custom Linear Regression")
499
+ lrs_residual_plot = gr.Plot(label="SKLearn Linear Regression")
500
+
501
+
502
+ # Filtering logic
503
+ submit_btn.click(filter_data, inputs=[record, alg, district, year,
504
+ dt_max_depth, dt_min_samples_split, dt_min_samples_leaf,
505
+ rf_n_estimators, rf_max_depth,
506
+ lr_learning_rate, lr_num_iterations],
507
+ outputs=[filtered_records, predictions, total_x_test, filtered_x_test, evaluation, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot])
508
+
509
+ # Run all algorithms/dataset optimization
510
+ all_btn.click(process_all_algorithms, inputs=[dt_max_depth, dt_min_samples_split, dt_min_samples_leaf,
511
+ rf_n_estimators, rf_max_depth,
512
+ lr_learning_rate, lr_num_iterations],
513
+ outputs=[predictions, evaluation, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot])
514
+
515
+ if __name__ == "__main__":
516
+ gr_output.launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ pandas>=2.1.3
3
+ numpy>=1.26.2
4
+ gradio>=4.7.1
5
+ scikit-learn>=1.3.2