Spaces:

ICS5110
/

Gradio-Web-Tool

Sleeping

App Files Files Community

NathanPortelli commited on Jan 5, 2024

Commit

94b723c

1 Parent(s): 0388c9b

Initial commit

Browse files

Files changed (11) hide show

DecisionTreeRegressor.py +115 -0
LinearRegression.py +31 -0
NSO_Population_Sex_dataset/NSO_DF_TOT_POP_BY_REG_DIST_LOC_1.5.csv +0 -0
NSO_Population_Sex_dataset/NSO_DIS_SEX_YEAR_POP.ipynb +0 -0
NSO_Population_Sex_dataset/NSO_POPULATION_DATA_CLEANED.csv +193 -0
NSO_Population_Sex_dataset/NSO_POPULATION_DATA_PREFEATURE.csv +193 -0
README.md +6 -7
RandomForestRegressor.py +46 -0
app.py +516 -0
gitattributes +35 -0
requirements.txt +5 -0

DecisionTreeRegressor.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import numpy as np
+class Node():
+    def __init__(self, feature_index=None, threshold=None, left=None, right=None, var_red=None, value=None):
+        self.feature_index = feature_index
+        self.threshold = threshold
+        self.left = left
+        self.right = right
+        self.var_red = var_red
+        self.value = value
+class DecisionTreeRegressor():
+    def __init__(self, min_samples_split, max_depth, min_samples_leaf):
+        self.root = None
+        self.min_samples_split = min_samples_split
+        self.max_depth = max_depth
+    def build_tree(self, dataset, curr_depth=0):
+        X, Y = dataset[:, :-1], dataset[:, -1]
+        num_samples, num_features = np.shape(X)
+        best_split = {}
+        if num_samples >= self.min_samples_split and curr_depth <= self.max_depth:
+            best_split = self.get_best_split(dataset, num_samples, num_features)
+            if "var_red" in best_split and best_split["var_red"] > 0:
+                left_subtree = self.build_tree(best_split["dataset_left"], curr_depth + 1)
+                right_subtree = self.build_tree(best_split["dataset_right"], curr_depth + 1)
+                return Node(best_split["feature_index"], best_split["threshold"], left_subtree, right_subtree,
+                            best_split["var_red"])
+        leaf_value = self.calculate_leaf_value(Y)
+        return Node(value=leaf_value)
+    def variance_reduction(self, parent, l_child, r_child):
+        weight_l = len(l_child) / len(parent)
+        weight_r = len(r_child) / len(parent)
+        reduction = np.var(parent) - (weight_l * np.var(l_child) + weight_r * np.var(r_child))
+        return reduction
+    def calculate_leaf_value(self, Y):
+        val = np.mean(Y)
+        return val
+    def split(self, dataset, feature_index, threshold):
+        dataset_left = np.array([row for row in dataset if row[feature_index] <= threshold])
+        dataset_right = np.array([row for row in dataset if row[feature_index] > threshold])
+        return dataset_left, dataset_right
+    def get_best_split(self, dataset, num_samples, num_features):
+        best_split = {}
+        max_var_red = -float("inf")
+        for feature_index in range(num_features):
+            feature_values = dataset[:, feature_index]
+            possible_thresholds = np.unique(feature_values)
+            for threshold in possible_thresholds:
+                dataset_left, dataset_right = self.split(dataset, feature_index, threshold)
+                if len(dataset_left) > 0 and len(dataset_right) > 0:
+                    y, left_y, right_y = dataset[:, -1], dataset_left[:, -1], dataset_right[:, -1]
+                    curr_var_red = self.variance_reduction(y, left_y, right_y)
+                    if curr_var_red > max_var_red:
+                        best_split["feature_index"] = feature_index
+                        best_split["threshold"] = threshold
+                        best_split["dataset_left"] = dataset_left
+                        best_split["dataset_right"] = dataset_right
+                        best_split["var_red"] = curr_var_red
+                        max_var_red = curr_var_red
+        return best_split
+    def print_tree(self, tree=None, indent=" "):
+        if not tree:
+            tree = self.root
+        if tree.value is not None:
+            print(tree.value)
+        else:
+            print("X_" + str(tree.feature_index), "<=", tree.threshold, "?", tree.var_red)
+            print("%sleft:" % (indent), end="")
+            self.print_tree(tree.left, indent + indent)
+            print("%sright:" % (indent), end="")
+            self.print_tree(tree.right, indent + indent)
+    def fit(self, X, Y, min_samples_split=None, max_depth=None):
+        if min_samples_split is not None:
+            self.min_samples_split = min_samples_split
+        if max_depth is not None:
+            self.max_depth = max_depth
+        dataset = np.column_stack((X, Y))
+        self.root = self.build_tree(dataset)
+    def make_prediction(self, x, tree):
+        if tree.value != None: return tree.value
+        feature_val = x[tree.feature_index]
+        if feature_val <= tree.threshold:
+            return self.make_prediction(x, tree.left)
+        else:
+            return self.make_prediction(x, tree.right)
+    def predict(self, X):
+        predictions = [self.make_prediction(x, self.root) for x in X]
+        return predictions

LinearRegression.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import numpy as np
+class LinearRegression:
+    def __init__(self, learning_rate, num_iterations):
+        self.learning_rate = learning_rate
+        self.num_iterations = num_iterations
+        self.weights = None
+        self.bias = None
+    def fit(self, X, y):
+        num_samples, num_features = X.shape
+        # Initialization
+        self.weights = np.zeros(num_features)
+        self.bias = 0
+        # Gradient Descent
+        for _ in range(self.num_iterations):
+            predictions = np.dot(X, self.weights) + self.bias
+            dw = (1 / num_samples) * np.dot(X.T, (predictions - y))
+            db = (1 / num_samples) * np.sum(predictions - y)
+            # Update
+            self.weights = self.weights - (self.learning_rate * dw)
+            self.bias = self.bias - (self.learning_rate * db)
+    def predict(self, X):
+        predictions = np.dot(X, self.weights) + self.bias
+        return predictions

NSO_Population_Sex_dataset/NSO_DF_TOT_POP_BY_REG_DIST_LOC_1.5.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

NSO_Population_Sex_dataset/NSO_DIS_SEX_YEAR_POP.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

NSO_Population_Sex_dataset/NSO_POPULATION_DATA_CLEANED.csv ADDED Viewed

	@@ -0,0 +1,193 @@

+District,Sex,Year,Population,Population_Growth_Rate,Average_Population
+1,0,0.0,0.3372772323766804,0.0,0.33836672889163455
+1,0,0.06666666666666667,0.3342958058581002,-0.008839691008998951,0.33836672889163455
+1,0,0.13333333333333333,0.332684223956165,-0.004820825968182452,0.33836672889163455
+1,0,0.2,0.3328319522971757,0.000444049733570262,0.33836672889163455
+1,0,0.26666666666666666,0.3322007493855844,-0.001896461283944606,0.33836672889163455
+1,0,0.3333333333333333,0.327930057345456,-0.012855756791720685,0.33836672889163455
+1,0,0.4,0.32661393212554224,-0.004013432713571907,0.33836672889163455
+1,0,0.4666666666666667,0.327016827601026,0.0012335526315787604,0.33836672889163455
+1,0,0.5333333333333333,0.3263856246894347,-0.0019301848049281256,0.33836672889163455
+1,0,0.6,0.33040114959509004,0.01230300785911198,0.33836672889163455
+1,0,0.6666666666666666,0.333919770080982,0.010649540687748926,0.33836672889163455
+1,0,0.7333333333333333,0.3361759847436913,0.006756756756756577,0.33836672889163455
+1,0,0.8,0.3448113777682277,0.02568712048577826,0.33836672889163455
+1,0,0.8666666666666667,0.35786519117390314,0.03785783836416745,0.33836672889163455
+1,0,0.9333333333333333,0.37900377378762035,0.05906856306526054,0.33836672889163455
+1,0,1.0,0.3783994305743946,-0.0015945572446051104,0.33836672889163455
+1,1,0.0,0.34063469467237883,-0.09980124929017598,0.33836672889163455
+1,1,0.06666666666666667,0.33621627429123974,-0.012971140198706821,0.33836672889163455
+1,1,0.13333333333333333,0.33459126254012167,-0.004833233473137688,0.33836672889163455
+1,1,0.2,0.3335705940022294,-0.00305049369832211,0.33836672889163455
+1,1,0.26666666666666666,0.3332079880742939,-0.001087044045414376,0.33836672889163455
+1,1,0.3333333333333333,0.3309786331099502,-0.006690580790778222,0.33836672889163455
+1,1,0.4,0.33013255261143415,-0.0025562994522216886,0.33836672889163455
+1,1,0.4666666666666667,0.3299042451753267,-0.0006915629322267192,0.33836672889163455
+1,1,0.5333333333333333,0.3303340003491761,0.0013026663952779849,0.33836672889163455
+1,1,0.6,0.3311666509985093,0.0025206325974711508,0.33836672889163455
+1,1,0.6666666666666666,0.33355716415304654,0.007218459791556775,0.33836672889163455
+1,1,0.7333333333333333,0.33655202052080957,0.008978540081330477,0.33836672889163455
+1,1,0.8,0.3402452290460778,0.010973663208299955,0.33836672889163455
+1,1,0.8666666666666667,0.34502625535515236,0.01405170712453141,0.33836672889163455
+1,1,0.9333333333333333,0.35257383059588243,0.021875364913783057,0.33836672889163455
+1,1,1.0,0.35123084567760304,-0.0038090884851256224,0.33836672889163455
+2,0,0.0,0.5852459676877829,0.0,0.7000187178522985
+2,0,0.06666666666666667,0.5842790185466217,-0.001652209830648621,0.7000187178522985
+2,0,0.13333333333333333,0.5855145646714387,0.0021146508527558616,0.7000187178522985
+2,0,0.2,0.5892614925934382,0.006399376118170652,0.7000187178522985
+2,0,0.26666666666666666,0.5919608922791797,0.004580987761241717,0.7000187178522985
+2,0,0.3333333333333333,0.5893017821409866,-0.004492036843776859,0.7000187178522985
+2,0,0.4,0.5907790655510938,0.0025068368277119113,0.7000187178522985
+2,0,0.4666666666666667,0.609446555915177,0.03159809047510809,0.7000187178522985
+2,0,0.5333333333333333,0.6420139401834517,0.05343763772587051,0.7000187178522985
+2,0,0.6,0.6891392809658747,0.07340236376948028,0.7000187178522985
+2,0,0.6666666666666666,0.731819341668793,0.06193241610476674,0.7000187178522985
+2,0,0.7333333333333333,0.7673547226064651,0.04855758643470609,0.7000187178522985
+2,0,0.8,0.8286485542767354,0.07987678952710975,0.7000187178522985
+2,0,0.8666666666666667,0.898456910328897,0.0842436225730121,0.7000187178522985
+2,0,0.9333333333333333,0.9849317092169055,0.09624813153961154,0.7000187178522985
+2,0,1.0,1.0,0.015298817818623078,0.7000187178522985
+2,1,0.0,0.6070963323081882,-0.39290366769181184,0.7000187178522985
+2,1,0.06666666666666667,0.6042760639798015,-0.0046455038159495254,0.7000187178522985
+2,1,0.13333333333333333,0.6057936369374572,0.002511390154461779,0.7000187178522985
+2,1,0.2,0.6081438605444461,0.0038795779019242804,0.7000187178522985
+2,1,0.26666666666666666,0.6116893407287036,0.005830002429167536,0.7000187178522985
+2,1,0.3333333333333333,0.612186245148467,0.0008123476848089783,0.7000187178522985
+2,1,0.4,0.6148319254374773,0.004321691821691731,0.7000187178522985
+2,1,0.4666666666666667,0.6312297712896684,0.026670452807933342,0.7000187178522985
+2,1,0.5333333333333333,0.6527309598313211,0.03406238032424169,0.7000187178522985
+2,1,0.6,0.6871516632868213,0.05273337036808434,0.7000187178522985
+2,1,0.6666666666666666,0.7201488027289453,0.04802016964390399,0.7000187178522985
+2,1,0.7333333333333333,0.7529176347349619,0.04550286257762526,0.7000187178522985
+2,1,0.8,0.7975987429661164,0.05934395233933265,0.7000187178522985
+2,1,0.8666666666666667,0.8420649736103464,0.0557501262838862,0.7000187178522985
+2,1,0.9333333333333333,0.8905198694618659,0.05754294189885312,0.7000187178522985
+2,1,1.0,0.8940653496461235,0.003981359996380718,0.7000187178522985
+3,0,0.0,0.19249002833698178,0.0,0.24032043620150145
+3,0,0.06666666666666667,0.19827829333476585,0.03007046675504066,0.24032043620150145
+3,0,0.13333333333333333,0.20512751641799062,0.03454348415063668,0.24032043620150145
+3,0,0.2,0.21333315426867755,0.04000261882938316,0.24032043620150145
+3,0,0.26666666666666666,0.22094787875532157,0.03569405099150136,0.24032043620150145
+3,0,0.3333333333333333,0.22567518566766495,0.02139557500607836,0.24032043620150145
+3,0,0.4,0.23263184754435207,0.030825993810997376,0.24032043620150145
+3,0,0.4666666666666667,0.24111951222787767,0.036485394296270623,0.24032043620150145
+3,0,0.5333333333333333,0.25179624232819864,0.04427982622256876,0.24032043620150145
+3,0,0.6,0.25930352802138035,0.029814923462584852,0.24032043620150145
+3,0,0.6666666666666666,0.26713313009494905,0.03019473793246319,0.24032043620150145
+3,0,0.7333333333333333,0.27388834423389424,0.025287818611432256,0.24032043620150145
+3,0,0.8,0.28554545332455916,0.04256153770716886,0.24032043620150145
+3,0,0.8666666666666667,0.3025073528424276,0.059401749600225706,0.24032043620150145
+3,0,0.9333333333333333,0.3240622607808114,0.07125416204217538,0.24032043620150145
+3,0,1.0,0.32553954419091874,0.0045586406962287995,0.24032043620150145
+3,1,0.0,0.19430305797665892,-0.4031353135313531,0.24032043620150145
+3,1,0.06666666666666667,0.19680100992465854,0.012855957976223298,0.24032043620150145
+3,1,0.13333333333333333,0.20132686909926,0.022997133888358112,0.24032043620150145
+3,1,0.2,0.20630934314607646,0.02474818224267894,0.24032043620150145
+3,1,0.26666666666666666,0.21185587085857027,0.026884520244759846,0.24032043620150145
+3,1,0.3333333333333333,0.21591168531177396,0.019144215530903397,0.24032043620150145
+3,1,0.4,0.22098816830286996,0.023511849225601678,0.24032043620150145
+3,1,0.4666666666666667,0.22595721250050363,0.022485566697052484,0.24032043620150145
+3,1,0.5333333333333333,0.23034877318327715,0.01943536404160473,0.24032043620150145
+3,1,0.6,0.23553269496783552,0.02250466417910446,0.24032043620150145
+3,1,0.6666666666666666,0.24145525845744753,0.025145398563120036,0.24032043620150145
+3,1,0.7333333333333333,0.24470528195968358,0.013460147950386414,0.24032043620150145
+3,1,0.8,0.2505472663541988,0.023873552494374728,0.24032043620150145
+3,1,0.8666666666666667,0.2589006325458965,0.033340480274442585,0.24032043620150145
+3,1,0.9333333333333333,0.26707941069821783,0.031590413943354934,0.24032043620150145
+3,1,1.0,0.26885215079034663,0.006637501885654151,0.24032043620150145
+4,0,0.0,0.17321819475967284,0.0,0.192231503740213
+4,0,0.06666666666666667,0.17387625736962975,0.003799038610637462,0.192231503740213
+4,0,0.13333333333333333,0.17555498851747894,0.0096547462732679,0.192231503740213
+4,0,0.2,0.17844240609177958,0.016447368421052655,0.192231503740213
+4,0,0.26666666666666666,0.18081948939713408,0.013321291487920606,0.192231503740213
+4,0,0.3333333333333333,0.1805777521118438,-0.0013368983957219305,0.192231503740213
+4,0,0.4,0.1822833429580586,0.00944518816004769,0.192231503740213
+4,0,0.4666666666666667,0.18451269792240232,0.012230162823251955,0.192231503740213
+4,0,0.5333333333333333,0.1869569304736708,0.01324696120532809,0.192231503740213
+4,0,0.6,0.18918628543801452,0.011924430716184098,0.192231503740213
+4,0,0.6666666666666666,0.19271833577308928,0.018669695463902958,0.192231503740213
+4,0,0.7333333333333333,0.19658613233773384,0.020069686411149812,0.192231503740213
+4,0,0.8,0.20066880648930313,0.020767864462358343,0.192231503740213
+4,0,0.8666666666666667,0.2102845785041834,0.04791861865881408,0.192231503740213
+4,0,0.9333333333333333,0.2200615087092572,0.04649380508366341,0.192231503740213
+4,0,1.0,0.22241173231624609,0.010679848651287571,0.192231503740213
+4,1,0.0,0.182256483259693,-0.18054465310065815,0.192231503740213
+4,1,0.06666666666666667,0.18132982366608022,-0.005084371085402717,0.192231503740213
+4,1,0.13333333333333333,0.18244450114825211,0.006147237446304299,0.192231503740213
+4,1,0.2,0.1839889338042734,0.00846521899153485,0.192231503740213
+4,1,0.26666666666666666,0.18601684103087524,0.011021897810219006,0.192231503740213
+4,1,0.3333333333333333,0.1866883334900149,0.0036098476644284005,0.192231503740213
+4,1,0.4,0.18825962584440176,0.00841666067189406,0.192231503740213
+4,1,0.4666666666666667,0.19087844643504653,0.013910686260522276,0.192231503740213
+4,1,0.5333333333333333,0.1926243268288097,0.009146555969886672,0.192231503740213
+4,1,0.6,0.19471938330132552,0.010876385693369572,0.192231503740213
+4,1,0.6666666666666666,0.19823800378721748,0.018070211738740793,0.192231503740213
+4,1,0.7333333333333333,0.20038677965646445,0.010839374026149873,0.192231503740213
+4,1,0.8,0.20383825089644245,0.017224046645667235,0.192231503740213
+4,1,0.8666666666666667,0.20761203851680746,0.018513638160495338,0.192231503740213
+4,1,0.9333333333333333,0.21185587085857027,0.020441166957759327,0.192231503740213
+4,1,1.0,0.21211103799304334,0.0012044374009507752,0.192231503740213
+5,1,0.0,0.17998683874780086,0.0,0.25831349632693623
+5,1,0.06666666666666667,0.18556022615866025,0.03096552753320414,0.25831349632693623
+5,1,0.13333333333333333,0.19316152079612145,0.040964029818339576,0.25831349632693623
+5,1,0.2,0.2011925706074321,0.04157686157269014,0.25831349632693623
+5,1,0.26666666666666666,0.2098145337827856,0.04285428209064812,0.25831349632693623
+5,1,0.3333333333333333,0.21694578369884907,0.03398835050886517,0.25831349632693623
+5,1,0.4,0.22505741260525644,0.03739012009409426,0.25831349632693623
+5,1,0.4666666666666667,0.2319334953868468,0.030552571905955395,0.25831349632693623
+5,1,0.5333333333333333,0.240904634640953,0.03867979154603374,0.25831349632693623
+5,1,0.6,0.2546030808074025,0.05686252648009793,0.25831349632693623
+5,1,0.6666666666666666,0.27343172936167925,0.07395294862327262,0.25831349632693623
+5,1,0.7333333333333333,0.29264984354225704,0.0702848722986249,0.25831349632693623
+5,1,0.8,0.3186500315601456,0.08884401817263998,0.25831349632693623
+5,1,0.8666666666666667,0.3428237600891742,0.07586294095334423,0.25831349632693623
+5,1,0.9333333333333333,0.36506359033588054,0.06487248795393108,0.25831349632693623
+5,1,1.0,0.3640160620996226,-0.0028694404591105283,0.25831349632693623
+5,0,0.0,0.17726057936369374,-0.5130418741929533,0.25831349632693623
+5,0,0.06666666666666667,0.1837337666698003,0.036517918024092655,0.25831349632693623
+5,0,0.13333333333333333,0.19122762251379918,0.040786492215481474,0.25831349632693623
+5,0,0.2,0.19999731403016344,0.0458599620759883,0.25831349632693623
+5,0,0.26666666666666666,0.20827010112676433,0.04136449100188022,0.25831349632693623
+5,0,0.3333333333333333,0.21374947959334417,0.02630900180551987,0.25831349632693623
+5,0,0.4,0.22137763392917098,0.03568735863282235,0.25831349632693623
+5,0,0.4666666666666667,0.22735391681551417,0.026995874787672847,0.25831349632693623
+5,0,0.5333333333333333,0.2376680409878997,0.045365940102782165,0.25831349632693623
+5,0,0.6,0.25472394945004767,0.07176357574730186,0.25831349632693623
+5,0,0.6666666666666666,0.27494930231933495,0.07940106500764488,0.25831349632693623
+5,0,0.7333333333333333,0.29509407609352545,0.07326723001025726,0.25831349632693623
+5,0,0.8,0.3256872725319295,0.10367268920948436,0.25831349632693623
+5,0,0.8666666666666667,0.3615986892467198,0.11026349428889537,0.25831349632693623
+5,0,0.9333333333333333,0.40008863700460645,0.10644382544103981,0.25831349632693623
+5,0,1.0,0.3974563865647789,-0.006579168205162689,0.25831349632693623
+6,1,0.0,0.005841984394515249,0.0,0.009613673600945461
+6,1,0.06666666666666667,0.00507648299109601,-0.13103448275862062,0.009613673600945461
+6,1,0.13333333333333333,0.005385369522300265,0.060846560846560926,0.009613673600945461
+6,1,0.2,0.005895703791246425,0.09476309226932678,0.009613673600945461
+6,1,0.26666666666666666,0.006728354440579632,0.1412300683371297,0.009613673600945461
+6,1,0.3333333333333333,0.006808933535676394,0.011976047904191711,0.009613673600945461
+6,1,0.4,0.00726554840789138,0.06706114398422103,0.009613673600945461
+6,1,0.4666666666666667,0.007184969312794618,-0.011090573012939031,0.009613673600945461
+6,1,0.5333333333333333,0.007278978257074173,0.013084112149532645,0.009613673600945461
+6,1,0.6,0.008957709404923382,0.23062730627306283,0.009613673600945461
+6,1,0.6666666666666666,0.009978377942815702,0.11394302848575721,0.009613673600945461
+6,1,0.7333333333333333,0.011576529995568149,0.16016150740242252,0.009613673600945461
+6,1,0.8,0.014772834101073044,0.2761020881670533,0.009613673600945461
+6,1,0.8666666666666667,0.01814372624595426,0.22818181818181826,0.009613673600945461
+6,1,0.9333333333333333,0.022548716777910583,0.24278312361213894,0.009613673600945461
+6,1,1.0,0.024187158378211414,0.07266229898749255,0.009613673600945461
+6,0,0.0,0.0,-1.0,0.009613673600945461
+6,0,0.06666666666666667,0.00017458803937631782,3.846153846153846,0.009613673600945461
+6,0,0.13333333333333333,0.0008460804985160016,3.846153846153846,0.009613673600945461
+6,0,0.2,0.002242784813526544,1.6507936507936511,0.009613673600945461
+6,0,0.26666666666666666,0.0033440324465156255,0.49101796407185616,0.009613673600945461
+6,0,0.3333333333333333,0.0030217160661285773,-0.09638554216867468,0.009613673600945461
+6,0,0.4,0.0038946562630101664,0.288888888888889,0.009613673600945461
+6,0,0.4666666666666667,0.004230402492580008,0.0862068965517242,0.009613673600945461
+6,0,0.5333333333333333,0.0045930084205154376,0.08571428571428563,0.009613673600945461
+6,0,0.6,0.006634345496300076,0.4444444444444444,0.009613673600945461
+6,0,0.6666666666666666,0.008192208001504143,0.23481781376518218,0.009613673600945461
+6,0,0.7333333333333333,0.010367843569116719,0.2655737704918033,0.009613673600945461
+6,0,0.8,0.014114771491116155,0.3613989637305699,0.009613673600945461
+6,0,0.8666666666666667,0.019674729052792737,0.3939105613701237,0.009613673600945461
+6,0,0.9333333333333333,0.02926364136930742,0.4873720136518771,0.009613673600945461
+6,0,1.0,0.029411369710318155,0.005048187241854185,0.009613673600945461

NSO_Population_Sex_dataset/NSO_POPULATION_DATA_PREFEATURE.csv ADDED Viewed

	@@ -0,0 +1,193 @@

+District,Sex,Year,Population
+1,0,2005,40400
+1,0,2006,40178
+1,0,2007,40058
+1,0,2008,40069
+1,0,2009,40022
+1,0,2010,39704
+1,0,2011,39606
+1,0,2012,39636
+1,0,2013,39589
+1,0,2014,39888
+1,0,2015,40150
+1,0,2016,40318
+1,0,2017,40961
+1,0,2018,41933
+1,0,2019,43507
+1,0,2020,43462
+1,1,2005,40650
+1,1,2006,40321
+1,1,2007,40200
+1,1,2008,40124
+1,1,2009,40097
+1,1,2010,39931
+1,1,2011,39868
+1,1,2012,39851
+1,1,2013,39883
+1,1,2014,39945
+1,1,2015,40123
+1,1,2016,40346
+1,1,2017,40621
+1,1,2018,40977
+1,1,2019,41539
+1,1,2020,41439
+2,0,2005,58864
+2,0,2006,58792
+2,0,2007,58884
+2,0,2008,59163
+2,0,2009,59364
+2,0,2010,59166
+2,0,2011,59276
+2,0,2012,60666
+2,0,2013,63091
+2,0,2014,66600
+2,0,2015,69778
+2,0,2016,72424
+2,0,2017,76988
+2,0,2018,82186
+2,0,2019,88625
+2,0,2020,89747
+2,1,2005,60491
+2,1,2006,60281
+2,1,2007,60394
+2,1,2008,60569
+2,1,2009,60833
+2,1,2010,60870
+2,1,2011,61067
+2,1,2012,62288
+2,1,2013,63889
+2,1,2014,66452
+2,1,2015,68909
+2,1,2016,71349
+2,1,2017,74676
+2,1,2018,77987
+2,1,2019,81595
+2,1,2020,81859
+3,0,2005,29619
+3,0,2006,30050
+3,0,2007,30560
+3,0,2008,31171
+3,0,2009,31738
+3,0,2010,32090
+3,0,2011,32608
+3,0,2012,33240
+3,0,2013,34035
+3,0,2014,34594
+3,0,2015,35177
+3,0,2016,35680
+3,0,2017,36548
+3,0,2018,37811
+3,0,2019,39416
+3,0,2020,39526
+3,1,2005,29754
+3,1,2006,29940
+3,1,2007,30277
+3,1,2008,30648
+3,1,2009,31061
+3,1,2010,31363
+3,1,2011,31741
+3,1,2012,32111
+3,1,2013,32438
+3,1,2014,32824
+3,1,2015,33265
+3,1,2016,33507
+3,1,2017,33942
+3,1,2018,34564
+3,1,2019,35173
+3,1,2020,35305
+4,0,2005,28184
+4,0,2006,28233
+4,0,2007,28358
+4,0,2008,28573
+4,0,2009,28750
+4,0,2010,28732
+4,0,2011,28859
+4,0,2012,29025
+4,0,2013,29207
+4,0,2014,29373
+4,0,2015,29636
+4,0,2016,29924
+4,0,2017,30228
+4,0,2018,30944
+4,0,2019,31672
+4,0,2020,31847
+4,1,2005,28857
+4,1,2006,28788
+4,1,2007,28871
+4,1,2008,28986
+4,1,2009,29137
+4,1,2010,29187
+4,1,2011,29304
+4,1,2012,29499
+4,1,2013,29629
+4,1,2014,29785
+4,1,2015,30047
+4,1,2016,30207
+4,1,2017,30464
+4,1,2018,30745
+4,1,2019,31061
+4,1,2020,31080
+5,1,2005,28688
+5,1,2006,29103
+5,1,2007,29669
+5,1,2008,30267
+5,1,2009,30909
+5,1,2010,31440
+5,1,2011,32044
+5,1,2012,32556
+5,1,2013,33224
+5,1,2014,34244
+5,1,2015,35646
+5,1,2016,37077
+5,1,2017,39013
+5,1,2018,40813
+5,1,2019,42469
+5,1,2020,42391
+5,0,2005,28485
+5,0,2006,28967
+5,0,2007,29525
+5,0,2008,30178
+5,0,2009,30794
+5,0,2010,31202
+5,0,2011,31770
+5,0,2012,32215
+5,0,2013,32983
+5,0,2014,34253
+5,0,2015,35759
+5,0,2016,37259
+5,0,2017,39537
+5,0,2018,42211
+5,0,2019,45077
+5,0,2020,44881
+6,1,2005,15721
+6,1,2006,15664
+6,1,2007,15687
+6,1,2008,15725
+6,1,2009,15787
+6,1,2010,15793
+6,1,2011,15827
+6,1,2012,15821
+6,1,2013,15828
+6,1,2014,15953
+6,1,2015,16029
+6,1,2016,16148
+6,1,2017,16386
+6,1,2018,16637
+6,1,2019,16965
+6,1,2020,17087
+6,0,2005,15286
+6,0,2006,15299
+6,0,2007,15349
+6,0,2008,15453
+6,0,2009,15535
+6,0,2010,15511
+6,0,2011,15576
+6,0,2012,15601
+6,0,2013,15628
+6,0,2014,15780
+6,0,2015,15896
+6,0,2016,16058
+6,0,2017,16337
+6,0,2018,16751
+6,0,2019,17465
+6,0,2020,17476

README.md CHANGED Viewed

@@ -1,12 +1,11 @@
 ---
-title: Gradio Web Tool
-emoji: 🌍
-colorFrom: blue
-colorTo: indigo
 sdk: gradio
-sdk_version: 4.13.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ICS5110 Gradio Web Tool
+emoji: 🐠
+colorFrom: indigo
+colorTo: gray
 sdk: gradio
+sdk_version: 4.9.1
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---

RandomForestRegressor.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from DecisionTreeRegressor import DecisionTreeRegressor
+from sklearn.tree import DecisionTreeRegressor as SKLearnDecisionTreeRegressor
+import numpy as np
+class RandomForestRegressor:
+    def __init__(self, n_estimators, max_depth, min_samples_split, min_samples_leaf, custom=True):
+        self.n_estimators = n_estimators
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.trees = []
+        self.custom = custom
+    def fit(self, X, y, tree_params=None):
+        if tree_params is None:
+            tree_params = {
+                'max_depth': self.max_depth,
+                'min_samples_split': self.min_samples_split,
+                'min_samples_leaf': self.min_samples_leaf
+            }
+        # Convert X and y to NumPy arrays
+        X = np.array(X)
+        y = np.array(y)
+        for _ in range(self.n_estimators):
+            if self.custom:
+                tree = DecisionTreeRegressor(**tree_params)
+            else:
+                tree = SKLearnDecisionTreeRegressor(**tree_params)
+            # Bootstrap sampling
+            indices = np.random.choice(len(X), len(X), replace=True)
+            X_bootstrap = X[indices]
+            y_bootstrap = y[indices]
+            tree.fit(X_bootstrap, y_bootstrap)
+            self.trees.append(tree)
+    def predict(self, X):
+        predictions = np.zeros((X.shape[0], len(self.trees)))
+        for i, tree in enumerate(self.trees):
+            predictions[:, i] = tree.predict(X.values)
+        return np.mean(predictions, axis=1)

app.py ADDED Viewed

	@@ -0,0 +1,516 @@

+import pandas as pd
+import gradio as gr
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score
+from RandomForestRegressor import RandomForestRegressor
+from LinearRegression import LinearRegression
+from DecisionTreeRegressor import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestRegressor as SKLearnRandomForestRegressor
+from sklearn.linear_model import LinearRegression as SKLearnLinearRegression
+from sklearn.tree import DecisionTreeRegressor as SKLearnDecisionTreeRegressor
+# Dataset exported prior to feature scaling/engineering -- for user readability
+df_read = pd.read_csv('NSO_Population_Sex_dataset/NSO_POPULATION_DATA_PREFEATURE.csv')
+# Cleaned dataset after feature scaling/engineering -- for model training
+df = pd.read_csv('NSO_Population_Sex_dataset/NSO_POPULATION_DATA_CLEANED.csv')
+feature_cols = ['District', 'Sex', 'Year', 'Population_Growth_Rate', 'Average_Population']
+X = pd.get_dummies(df[feature_cols], columns=['District', 'Sex'])  # for converting to categorical variables
+y = df["Population"]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+# Mapping for dropdowns
+mapping_display = {
+    "year": {
+        "2005": 0,
+        "2006": 0.0666666666666666,
+        "2007": 0.133333333333333,
+        "2008": 0.2,
+        "2009": 0.266666666666666,
+        "2010": 0.333333333333333,
+        "2011": 0.4,
+        "2012": 0.466666666666666,
+        "2013": 0.533333333333333,
+        "2014": 0.6,
+        "2015": 0.666666666666666,
+        "2016": 0.733333333333333,
+        "2017": 0.8,
+        "2018": 0.866666666666666,
+        "2019": 0.933333333333333,
+        "2020": 1,
+    },
+    "district": {
+        "Southern Harbour": 1,
+        "Northern Harbour": 2,
+        "South Eastern": 3,
+        "Western": 4,
+        "Northern": 5,
+        "Gozo & Comino": 6,
+    },
+}
+def scatter_plot_graph(x, y, legend_labels):
+    fig, ax = plt.subplots()
+    for result in x:
+        ax.scatter(result, y, alpha=0.5)
+    ax.set_xlabel('Actual')
+    ax.set_ylabel('Predicted')
+    ax.legend(legend_labels, loc='best')
+    plt.close()
+    return fig
+def line_plot_graph(x, legend_labels):
+    fig, ax = plt.subplots()
+    for result in x:
+        ax.plot(result, alpha=0.5)
+    ax.set_xlabel('Sample Index')
+    ax.set_ylabel('Target Variable (Values)')
+    ax.legend(legend_labels, loc='best')
+    plt.close()
+    return fig
+def residual_plot_graph(x, y, color='black'):
+    fig, ax = plt.subplots()
+    # Avoiding x != y error
+    for i in range(len(x)):
+        ax.scatter(x[i], y[i] - x[i], alpha=0.5, c=color)
+    ax.set_xlabel('Predicted')
+    ax.set_ylabel('Residuals')
+    plt.axhline(y=0, color='r', linestyle='--', label='Residuals Mean')
+    plt.close()
+    return fig
+# Decision Tree - Custom
+def decision_tree(X_train, y_train, X_test, max_depth, min_samples_split):
+    Custom_Decision_Tree_Regressor = DecisionTreeRegressor(max_depth=max_depth,
+                                                           min_samples_split=min_samples_split,
+                                                           min_samples_leaf=None)
+    Custom_Decision_Tree_Regressor.fit(X_train.values, y_train.values)
+    Custom_Decision_Tree_Regressor_Prediction = Custom_Decision_Tree_Regressor.predict(X_test.values)
+    return Custom_Decision_Tree_Regressor_Prediction
+# Decision Tree - SKLearn
+def decision_tree_sklearn(X_train, y_train, X_test, max_depth, min_samples_split, min_samples_leaf):
+    SKLearn_Decision_Tree_Regressor = SKLearnDecisionTreeRegressor(max_depth=max_depth,
+                                                                   min_samples_split=min_samples_split,
+                                                                   min_samples_leaf=min_samples_leaf)
+    SKLearn_Decision_Tree_Regressor.fit(X_train.values, y_train.values)
+    SKLearn_Decision_Tree_Regressor_Prediction = SKLearn_Decision_Tree_Regressor.predict(X_test.values)
+    return SKLearn_Decision_Tree_Regressor_Prediction
+# Random Forest - Custom
+def random_forest(X_train, y_train, X_test, n_estimators, max_depth, min_samples_split, min_samples_leaf):
+    Custom_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,
+                                                           min_samples_split=min_samples_split,
+                                                           min_samples_leaf=min_samples_leaf)
+    Custom_Random_Forest_Regressor.fit(X_train, y_train)
+    Custom_Random_Forest_Regressor_Prediction = Custom_Random_Forest_Regressor.predict(X_test)
+    return Custom_Random_Forest_Regressor_Prediction
+# Random Forest - SKLearn
+def random_forest_sklearn(X_train, y_train, X_test):
+    SKLearn_Random_Forest_Regressor = SKLearnRandomForestRegressor()
+    SKLearn_Random_Forest_Regressor.fit(X_train, y_train)
+    SKLearn_Random_Forest_Regressor_Prediction = SKLearn_Random_Forest_Regressor.predict(X_test)
+    return SKLearn_Random_Forest_Regressor_Prediction
+# Random Forest - Custom using SKLearn Decision Trees
+def random_forest_sklearn_decision_trees(X_train, y_train, X_test, n_estimators, max_depth, min_samples_split, min_samples_leaf):
+    SKLearn_Decision_Trees_Random_Forest_Regressor = RandomForestRegressor(n_estimators=n_estimators,
+                                                                           max_depth=max_depth,
+                                                                           min_samples_split=min_samples_split,
+                                                                           min_samples_leaf=min_samples_leaf,
+                                                                           custom=False)
+    SKLearn_Decision_Trees_Random_Forest_Regressor.fit(X_train, y_train)
+    SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction = SKLearn_Decision_Trees_Random_Forest_Regressor.predict(
+        X_test)
+    return SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction
+# Linear Regression - Custom
+def linear_regression(X_train, y_train, X_test, learning_rate, num_iterations):
+    Custom_Linear_Regression = LinearRegression(learning_rate=learning_rate, num_iterations=num_iterations)
+    Custom_Linear_Regression.fit(X_train, y_train)
+    Custom_Linear_Regression_Prediction = Custom_Linear_Regression.predict(X_test)
+    return Custom_Linear_Regression_Prediction
+# Linear Regression - SKLearn
+def linear_regression_sklearn(X_train, y_train, X_test):
+    SKLearn_Linear_Regression = SKLearnLinearRegression()
+    SKLearn_Linear_Regression.fit(X_train, y_train)
+    SKLearn_Linear_Regression_Prediction = SKLearn_Linear_Regression.predict(X_test)
+    return SKLearn_Linear_Regression_Prediction
+def evaluate_algorithm(algorithm_function, X_train, y_train, X_test, y_test, algorithm_parameters):
+    prediction = algorithm_function(X_train, y_train, X_test, **algorithm_parameters)
+    mae = mean_absolute_error(y_test, prediction)
+    mse = mean_squared_error(y_test, prediction)
+    rmse = mean_squared_error(y_test, prediction, squared=True)
+    r2 = r2_score(y_test, prediction)
+    variance = explained_variance_score(y_test, prediction)
+    prediction_results = pd.DataFrame(prediction)
+    return prediction_results, mae, mse, rmse, r2, variance
+# Used both for the "All" button and for the filtered data using all algorithms
+def process_all_algorithms(dt_max_depth, dt_min_samples_split, dt_min_samples_leaf, rf_n_estimators, rf_max_depth,
+                           lr_learning_rate, lr_num_iterations):
+    results = {}
+    # Decision Tree - Custom
+    prediction_dt, mae_dt, mse_dt, rmse_dt, r2_dt, variance_dt = evaluate_algorithm(
+        decision_tree, X_train, y_train, X_test, y_test,
+        {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
+    results["Decision Tree - Custom"] = {"Algorithm": "Decision Tree - Custom", "MAE": mae_dt, "MSE": mse_dt,
+                                         "RMSE": rmse_dt, "R2": r2_dt, "Explained Variance": variance_dt}
+    # Decision Tree - SKLearn
+    prediction_dts, mae_dts, mse_dts, rmse_dts, r2_dts, variance_dts = evaluate_algorithm(
+        decision_tree_sklearn, X_train, y_train,
+        X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
+                         "min_samples_leaf": dt_min_samples_leaf})
+    results["Decision Tree - SKLearn"] = {"Algorithm": "Decision Tree - SKLearn", "MAE": mae_dts, "MSE": mse_dts,
+                                          "RMSE": rmse_dts, "R2": r2_dts, "Explained Variance": variance_dts}
+    # Random Forest - Custom
+    prediction_rf, mae_rf, mse_rf, rmse_rf, r2_rf, variance_rf = evaluate_algorithm(random_forest, X_train, y_train, X_test,
+                                                                     y_test, {"max_depth": rf_max_depth,
+                                                                              "n_estimators": rf_n_estimators,
+                                                                              "min_samples_split": dt_min_samples_split,
+                                                                              "min_samples_leaf": dt_min_samples_leaf})
+    results["Random Forest - Custom"] = {"Algorithm": "Random Forest - Custom", "MAE": mae_rf, "MSE": mse_rf,
+                                         "RMSE": rmse_rf, "R2": r2_rf, "Explained Variance": variance_rf}
+    # Random Forest - SKLearn
+    prediction_rfs, mae_rfs, mse_rfs, rmse_rfs, r2_rfs, variance_rfs = evaluate_algorithm(random_forest_sklearn,
+                                                                                          X_train, y_train, X_test,
+                                                                                          y_test, {})
+    results["Random Forest - SKLearn"] = {"Algorithm": "Random Forest - SKLearn", "MAE": mae_rfs, "MSE": mse_rfs,
+                                          "RMSE": rmse_rfs, "R2": r2_rfs, "Explained Variance": variance_rfs}
+    # Random Forest - Custom using SKLearn Decision Trees
+    prediction_rfsdt, mae_rfsdt, mse_rfsdt, rmse_rfsdt, r2_rfsdt, variance_rfsdt = evaluate_algorithm(
+        random_forest_sklearn_decision_trees, X_train, y_train, X_test, y_test,
+        {"max_depth": rf_max_depth, "n_estimators": rf_n_estimators, "min_samples_split": dt_min_samples_split,
+         "min_samples_leaf": dt_min_samples_leaf})
+    results["Random Forest - Custom using SKLearn DT"] = {"Algorithm": "Random Forest - Custom using SKLearn DT",
+                                                          "MAE": mae_rfsdt, "MSE": mse_rfsdt, "RMSE": rmse_rfsdt,
+                                                          "R2": r2_rfsdt, "Explained Variance": variance_rfsdt}
+    # Linear Regression - Custom
+    prediction_lr, mae_lr, mse_lr, rmse_lr, r2_lr, variance_lr = evaluate_algorithm(linear_regression, X_train, y_train,
+                                                                                    X_test, y_test,
+                                                                                    {"learning_rate": lr_learning_rate,
+                                                                                     "num_iterations": lr_num_iterations})
+    results["Linear Regression - Custom"] = {"Algorithm": "Linear Regression - Custom", "MAE": mae_lr, "MSE": mse_lr,
+                                             "RMSE": rmse_lr, "R2": r2_lr, "Explained Variance": variance_lr}
+    # Linear Regression - SKLearn
+    prediction_lrs, mae_lrs, mse_lrs, rmse_lrs, r2_lrs, variance_lrs = evaluate_algorithm(linear_regression_sklearn,
+                                                                                          X_train, y_train, X_test,
+                                                                                          y_test, {})
+    results["Linear Regression - SKLearn"] = {"Algorithm": "Linear Regression - SKLearn", "MAE": mae_lrs,
+                                              "MSE": mse_lrs, "RMSE": rmse_lrs, "R2": r2_lrs,
+                                              "Explained Variance": variance_lrs}
+    df_results = pd.DataFrame(results).T  # Convert results to DataFrame
+    all_predictions = pd.DataFrame()  # Initialising empty dataframe to store predictions
+    all_predictions["Actual"] = y_test.values
+    all_predictions["Decision Tree - Custom"] = prediction_dt
+    all_predictions["Decision Tree - SKLearn"] = prediction_dts
+    all_predictions["Random Forest - Custom"] = prediction_rf
+    all_predictions["Random Forest - SKLearn"] = prediction_rfs
+    all_predictions["Random Forest - Custom using SKLearn DT"] = prediction_rfsdt
+    all_predictions["Linear Regression - Custom"] = prediction_lr
+    all_predictions["Linear Regression - SKLearn"] = prediction_lrs
+    all_predictions = pd.DataFrame(all_predictions)
+    scatter_plot = scatter_plot_graph(
+    [prediction_dt.to_numpy(), prediction_dts.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy(), prediction_lr.to_numpy(), prediction_lrs.to_numpy()],
+    y_test.to_numpy(),
+    ['Custom DT', 'SKLearn DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF', 'Custom LR', 'SKLearn LR'])
+    custom_scatter_plot = scatter_plot_graph(
+        [prediction_dt.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_lr.to_numpy()],
+        y_test.to_numpy(),
+        ['Custom DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'Custom LR'])
+    sklearn_scatter_plot = scatter_plot_graph(
+        [prediction_dts.to_numpy(), prediction_rfs.to_numpy(), prediction_lrs.to_numpy()], y_test.to_numpy(),
+        ['SKLearn DT', 'SKLearn RF', 'SKLearn LR'])
+    dt_scatter_plot = scatter_plot_graph(
+        [prediction_dt.to_numpy(), prediction_dts.to_numpy()], y_test.to_numpy(),
+        ['Custom DT', 'SKLearn DT'])
+    rf_scatter_plot = scatter_plot_graph(
+        [prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy()], y_test.to_numpy(),
+        ['Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF'])
+    lr_scatter_plot = scatter_plot_graph(
+        [prediction_lr.to_numpy(), prediction_lrs.to_numpy()], y_test.to_numpy(),
+        ['Custom LR', 'SKLearn LR'])
+    line_plot = line_plot_graph(
+    [y_test.to_numpy(), prediction_dt.to_numpy(), prediction_dts.to_numpy(), prediction_rf.to_numpy(),
+     prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy(), prediction_lr.to_numpy(), prediction_lrs.to_numpy()],
+    ['Actual', 'Custom DT', 'SKLearn DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF', 'Custom LR',
+     'SKLearn LR'])
+    custom_line_plot = line_plot_graph(
+        [y_test.to_numpy(), prediction_dt.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_lr.to_numpy()],
+        ['Actual', 'Custom DT', 'Custom RF', 'Custom RF w/ SKLearn DT', 'Custom LR'])
+    sklearn_line_plot = line_plot_graph(
+        [y_test.to_numpy(), prediction_dts.to_numpy(), prediction_rfs.to_numpy(), prediction_lrs.to_numpy()],
+        ['Actual', 'SKLearn DT', 'SKLearn RF', 'SKLearn LR'])
+    dt_line_plot = line_plot_graph(
+        [y_test.to_numpy(), prediction_dt.to_numpy(), prediction_dts.to_numpy()],
+        ['Actual', 'Custom DT', 'SKLearn DT'])
+    rf_line_plot = line_plot_graph(
+        [y_test.to_numpy(), prediction_rf.to_numpy(), prediction_rfsdt.to_numpy(), prediction_rfs.to_numpy()],
+        ['Actual', 'Custom RF', 'Custom RF w/ SKLearn DT', 'SKLearn RF'])
+    lr_line_plot = line_plot_graph(
+        [y_test.to_numpy(), prediction_lr.to_numpy(), prediction_lrs.to_numpy()],
+        ['Actual', 'Custom LR', 'SKLearn LR'])
+    dt_residual_plot = residual_plot_graph(prediction_dt.to_numpy(), y_test.to_numpy())
+    dts_residual_plot = residual_plot_graph(prediction_dts.to_numpy(), y_test.to_numpy())
+    rf_residual_plot = residual_plot_graph(prediction_rf.to_numpy(), y_test.to_numpy())
+    rfs_residual_plot = residual_plot_graph(prediction_rfs.to_numpy(), y_test.to_numpy())
+    rfsdt_residual_plot = residual_plot_graph(prediction_rfsdt.to_numpy(), y_test.to_numpy())
+    lr_residual_plot = residual_plot_graph(prediction_lr.to_numpy(), y_test.to_numpy())
+    lrs_residual_plot = residual_plot_graph(prediction_lrs.to_numpy(), y_test.to_numpy())
+    return (all_predictions, df_results, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot,
+            rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot,
+            rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot,
+            rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot)
+# When the data/algorithms are filtered & 'All' button
+def filter_data(records, algorithm, selected_district, selected_year, dt_max_depth, dt_min_samples_split,
+                dt_min_samples_leaf, rf_n_estimators, rf_max_depth, lr_learning_rate, lr_num_iterations):
+    if algorithm == "All" or algorithm is None:
+        # Process all algorithms
+        df_predictions, df_results, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot = process_all_algorithms(dt_max_depth, dt_min_samples_split, dt_min_samples_leaf,
+                                                            rf_n_estimators, rf_max_depth, lr_learning_rate,
+                                                            lr_num_iterations)
+        return records, df_predictions, X_test, None, df_results, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot
+    # Convert selected district to the corresponding value from district_mapping_display
+    selected_district_value = mapping_display["district"].get(selected_district, None)
+    # Convert selected year to the corresponding value from year_mapping_display
+    selected_year_value = mapping_display["year"].get(selected_year, None)
+    if (selected_district_value != "All" and selected_district_value is not None and selected_year != "All" and
+            selected_year is not None):
+        filtered_data = records[
+            (pd.notna(records["District"]) & (records["District"] == int(selected_district_value))) &
+            (pd.notna(records["Year"]) & (records["Year"] == int(selected_year)))]
+    elif selected_district_value != "All" and selected_district_value is not None:
+        filtered_data = records[pd.notna(records["District"]) & (records["District"] == int(selected_district_value))]
+    elif selected_year != "All" and selected_year is not None:
+        filtered_data = records[pd.notna(records["Year"]) & (records["Year"] == int(selected_year))]
+    else:  # If both inputs are None, return the original records
+        filtered_data = records
+    # Evaluation
+    query_str_year = f'Year == {selected_year_value}' if (selected_year_value != "All" and
+                                                          selected_year_value is not None) else None
+    query_str_district = f'District_{selected_district_value} == 1' if (selected_district_value != "All" and
+                                                                        selected_district_value is not None) else None
+    query_str = " and ".join(filter(None, [query_str_district, query_str_year]))
+    filtered_X_test = X_test.query(query_str) if query_str else X_test
+    # Check if filtered dataset is empty
+    if filtered_X_test.empty:
+        no_results = [{"Algorithm": algorithm, "Error": "No samples for the selected filter."}]
+        return filtered_data, None, X_test, filtered_X_test, pd.DataFrame(no_results)
+    # Initialising prediction results
+    all_predictions = pd.DataFrame()  # Initialize an empty dataframe to store prediction/s
+    all_predictions["Actual"] = y_test.values
+    # Evaluate algorithm
+    if algorithm == "Decision Tree - Custom":
+        prediction_dt, mae, mse, rmse, r2, variance = evaluate_algorithm(
+            decision_tree, X_train, y_train, X_test, y_test,
+            {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split})
+        all_predictions["Decision Tree - Custom"] = prediction_dt
+    elif algorithm == "Decision Tree - SKLearn":
+        prediction_dts, mae, mse, rmse, r2, variance = evaluate_algorithm(
+            decision_tree_sklearn, X_train, y_train,
+            X_test, y_test, {"max_depth": dt_max_depth, "min_samples_split": dt_min_samples_split,
+                             "min_samples_leaf": dt_min_samples_leaf})
+        all_predictions["Decision Tree - SKLearn"] = prediction_dts
+    elif algorithm == "Random Forest - Custom":
+        prediction_rf, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest, X_train, y_train, X_test,
+                                                                         y_test, {"max_depth": rf_max_depth,
+                                                                                  "n_estimators": rf_n_estimators,
+                                                                                  "min_samples_split": dt_min_samples_split,
+                                                                                  "min_samples_leaf": dt_min_samples_leaf})
+        all_predictions["Random Forest - Custom"] = prediction_rf
+    elif algorithm == "Random Forest - SKLearn":
+        prediction_rfs, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest_sklearn, X_train, y_train,
+                                                                          X_test, y_test, {})
+        all_predictions["Random Forest - SKLearn"] = prediction_rfs
+    elif algorithm == "Random Forest - Custom using SKLearn DT":
+        prediction_rfsdt, mae, mse, rmse, r2, variance = evaluate_algorithm(random_forest_sklearn_decision_trees,
+                                                                            X_train, y_train, X_test, y_test,
+                                                                            {"max_depth": rf_max_depth,
+                                                                                  "n_estimators": rf_n_estimators,
+                                                                                  "min_samples_split": dt_min_samples_split,
+                                                                                  "min_samples_leaf": dt_min_samples_leaf})
+        all_predictions["Random Forest - Custom using SKLearn DT"] = prediction_rfsdt
+    elif algorithm == "Linear Regression - Custom":
+        prediction_lr, mae, mse, rmse, r2, variance = evaluate_algorithm(linear_regression, X_train, y_train,
+                                                                         X_test, y_test,
+                                                                         {"learning_rate": lr_learning_rate,
+                                                                          "num_iterations": lr_num_iterations})
+        all_predictions["Linear Regression - Custom"] = prediction_lr
+    elif algorithm == "Linear Regression - SKLearn":
+        prediction_lrs, mae, mse, rmse, r2, variance = evaluate_algorithm(linear_regression_sklearn, X_train,
+                                                                          y_train, X_test, y_test,
+                                                                          {"learning_rate": lr_learning_rate,
+                                                                           "num_iterations": lr_num_iterations})
+        all_predictions["Linear Regression - SKLearn"] = prediction_lrs
+    # In case of error
+    else:
+        mae, mse, rmse, r2, variance = None, None, None, None, None
+    results = [{"Algorithm": algorithm, "MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2, "Explained Variance": variance}]
+    df_results = pd.DataFrame(results)  # Convert results to DataFrame
+    all_predictions = pd.DataFrame(all_predictions)
+    return filtered_data, all_predictions, X_test, filtered_X_test, df_results, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
+with gr.Blocks(theme='ParityError/Interstellar') as gr_output:
+    alg, district, year = None, None, None  # Initialising inputs for use by all_btn
+    gr.Markdown(
+        """
+        # Machine Learning Approaches to Ethical Analysis of Statistics
+        ## January 2024
+        ### Created in partial fulfillment of the requirements for the ICS5110 Applied Machine Learning project by: Nathan Camilleri, Nathan Portelli, Oleg Grech.
+        Email: {nathan.camillieri, nathan.portelli, oleg.grech}@um.edu.mt
+        ### Full project code available at:
+        [github.com/NathanPortelli/ICS5110-Applied-ML](https://github.com/NathanPortelli/ICS5110-Applied-ML/)
+        ### Instructions:
+        Click 'Run all algorithms/datasets' to run all algorithms without filtering, or choose the available filters and click 'Run'. You may also edit the parameters of each algorithm type.
+        """)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("# Inputs")
+            gr.Markdown("### NSO Malta - 'Total Population by region, district and locality' Dataset")
+            record = gr.Dataframe(
+                value=df_read,
+                headers=["District", "Sex", "Year", "Population"],
+                datatype=["number", "bool", "number", "number"],
+                column_widths=[60, 60, 60, 75],
+                height=325,
+                interactive=False,
+            )
+            gr.Markdown("## Parameters")
+            with gr.Row():
+                with gr.Tab("Decision Tree"):
+                    dt_max_depth = gr.Slider(label="Max Depth", minimum=1, maximum=100, value=100, interactive=True,
+                                             step=1)
+                    dt_min_samples_split = gr.Slider(label="Min Samples Split", minimum=0, maximum=20, value=2,
+                                                     interactive=True, step=1)
+                    dt_min_samples_leaf = gr.Slider(label="Min Samples Leaf", minimum=1, maximum=20, value=5,
+                                                    interactive=True, step=1)
+                with gr.Tab("Random Forest"):
+                    rf_n_estimators = gr.Slider(label="N Estimators", minimum=1, maximum=100, value=100,
+                                                interactive=True, step=1)
+                    rf_max_depth = gr.Slider(label="Max Depth", minimum=1, maximum=100, value=100,
+                                             interactive=True, step=1)
+                    # rf_custom = gr.Dropdown([True, False], label="Custom", value=False, interactive=True)
+                with gr.Tab("Linear Regression"):
+                    lr_learning_rate = gr.Slider(label="Max Depth", minimum=0.001, maximum=1, value=0.01,
+                                                 interactive=True, step=0.01)
+                    lr_num_iterations = gr.Slider(label="Num of Iterations", minimum=50, maximum=5000, value=1000,
+                                                  interactive=True, step=50)
+            all_btn = gr.Button(value="Run all algorithms/dataset", variant="secondary")
+            gr.Markdown("### or pick the algorithm, district or year to filter the dataset")
+            with gr.Column():
+                alg = gr.Dropdown(["All", "Decision Tree - Custom", "Decision Tree - SKLearn",
+                                   "Random Forest - Custom", "Random Forest - SKLearn",
+                                   "Random Forest - Custom using SKLearn DT", "Linear Regression - Custom",
+                                   "Linear Regression - SKLearn"],
+                                  label="Select Algorithm", value="All")
+                district = gr.Dropdown(
+                    ["Southern Harbour", "Northern Harbour", "South Eastern", "Western", "Northern",
+                     "Gozo & Comino", "All"], label="Select District", value="All")
+                year = gr.Dropdown(list(mapping_display["year"].keys()) + ["All"], label="Select Year", value="All")
+                with gr.Row():
+                    submit_btn = gr.Button(value="Run", variant="primary")
+        with gr.Column():
+            gr.Markdown("# Outputs")
+            gr.Markdown("## Filtered Inputs/Outputs")
+            with gr.Tab("Filtered Dataset Records"):
+                filtered_records = gr.Dataframe(label="", height=300)
+            with gr.Tab("Total X_Test Output"):
+                total_x_test = gr.Dataframe(label="", height=300)
+            with gr.Tab("Filtered X_Test Output"):
+                filtered_x_test = gr.Dataframe(label="", height=300)
+            gr.Markdown("## Algorithm Evaluation")
+            evaluation = gr.Dataframe(label="")
+            gr.Markdown("## Prediction Results")
+            predictions = gr.Dataframe(label="Predicted vs Actual", height=300)
+            gr.Markdown("## Graph Plots")
+            with gr.Tab("Scatter Plots"):
+                scatter_plot = gr.Plot(label="All Algorithms")
+                custom_scatter_plot = gr.Plot(label="Custom Implementations")
+                sklearn_scatter_plot = gr.Plot(label="SKLearn Implementations")
+                dt_scatter_plot = gr.Plot(label="Decision Tree Implementations")
+                rf_scatter_plot = gr.Plot(label="Random Forest Implementations")
+                lr_scatter_plot = gr.Plot(label="Linear Regression Implementations")
+            with gr.Tab("Line Plots"):
+                line_plot = gr.Plot(label="All Algorithms")
+                custom_line_plot = gr.Plot(label="Custom Implementations")
+                sklearn_line_plot = gr.Plot(label="SKLearn Implementations")
+                dt_line_plot = gr.Plot(label="Decision Tree Implementations")
+                rf_line_plot = gr.Plot(label="Random Forest Implementations")
+                lr_line_plot = gr.Plot(label="Linear Regression Implementations")
+            with gr.Tab("Residual Plots"):
+                dt_residual_plot = gr.Plot(label="Custom Decision Tree")
+                dts_residual_plot = gr.Plot(label="SKLearn Decision Tree")
+                rf_residual_plot = gr.Plot(label="Custom Random Forest")
+                rfs_residual_plot = gr.Plot(label="SKLearn Random Forest")
+                rfsdt_residual_plot = gr.Plot(label="Custom Random Forest using SKLearn Decision Trees")
+                lr_residual_plot = gr.Plot(label="Custom Linear Regression")
+                lrs_residual_plot = gr.Plot(label="SKLearn Linear Regression")
+    # Filtering logic
+    submit_btn.click(filter_data, inputs=[record, alg, district, year,
+                                          dt_max_depth, dt_min_samples_split, dt_min_samples_leaf,
+                                          rf_n_estimators, rf_max_depth,
+                                          lr_learning_rate, lr_num_iterations],
+                     outputs=[filtered_records, predictions, total_x_test, filtered_x_test, evaluation, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot])
+    # Run all algorithms/dataset optimization
+    all_btn.click(process_all_algorithms, inputs=[dt_max_depth, dt_min_samples_split, dt_min_samples_leaf,
+                                                  rf_n_estimators, rf_max_depth,
+                                                  lr_learning_rate, lr_num_iterations],
+                  outputs=[predictions, evaluation, scatter_plot, custom_scatter_plot, sklearn_scatter_plot, dt_scatter_plot, rf_scatter_plot, lr_scatter_plot, line_plot, custom_line_plot, sklearn_line_plot, dt_line_plot, rf_line_plot, lr_line_plot, dt_residual_plot, dts_residual_plot, rf_residual_plot, rfs_residual_plot, rfsdt_residual_plot, lr_residual_plot, lrs_residual_plot])
+if __name__ == "__main__":
+    gr_output.launch()

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+pandas>=2.1.3
+numpy>=1.26.2
+gradio>=4.7.1
+scikit-learn>=1.3.2