File size: 3,346 Bytes
57176e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import  GaussianNB
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

class StHelper:

    def __init__(self,X,y):
        self.X = X
        self.y = y
        # Apply train test split
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

    def create_base_estimators(self,estimators,voting_type):

        algos = []

        if 'KNN' in estimators:
            knn_clf = KNeighborsClassifier()
            algos.append(('knn', knn_clf))
        if 'Logistic Regression' in estimators:
            log_clf = LogisticRegression(solver="lbfgs", random_state=42)
            algos.append(('lr', log_clf))
        if 'Gaussian Naive Bayes' in estimators:
            gnb_clf = GaussianNB()
            algos.append(('gnb', gnb_clf))
        if 'SVM' in estimators:
            if voting_type == "hard":
                svm_clf = SVC(gamma="scale", random_state=42)
            else:
                svm_clf = SVC(gamma="scale", probability=True, random_state=42)
            algos.append(('svc', svm_clf))
        if 'Random Forest' in estimators:
            rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
            algos.append(('rf', rnd_clf))

        return algos

    def train_voting_classifier(self,algos, voting_type):

        voting_clf = VotingClassifier(
            estimators=algos,
            voting=voting_type)

        voting_clf.fit(self.X_train, self.y_train)
        y_pred = voting_clf.predict(self.X_test)

        accuracy = accuracy_score(self.y_test, y_pred)

        return voting_clf, accuracy

    def draw_main_graph(self,voting_clf,ax):

        XX, YY, input_array = self.draw_meshgrid()
        labels = voting_clf.predict(input_array)
        ax.contourf(XX, YY, labels.reshape(XX.shape), alpha=0.5, cmap='rainbow')

    def plot_other_graphs(self,algos):

        figs = []
        XX, YY, input_array = self.draw_meshgrid()

        for estimator in algos:
            estimator[1].fit(self.X_train, self.y_train)
            labels = estimator[1].predict(input_array)
            fig1, ax1 = plt.subplots()
            ax1.contourf(XX, YY, labels.reshape(XX.shape), alpha=0.5, cmap='rainbow')
            figs.append(fig1)

        return figs

    def calculate_base_model_accuracy(self,algos):

        accuracy_scores = []

        for model in algos:
            model[1].fit(self.X_train, self.y_train)
            y_pred = model[1].predict(self.X_test)
            accuracy_scores.append(accuracy_score(self.y_test, y_pred))

        return accuracy_scores

    def draw_meshgrid(self):
        a = np.arange(start=self.X[:, 0].min() - 1, stop=self.X[:, 0].max() + 1, step=0.01)
        b = np.arange(start=self.X[:, 1].min() - 1, stop=self.X[:, 1].max() + 1, step=0.01)

        XX, YY = np.meshgrid(a, b)

        input_array = np.array([XX.ravel(), YY.ravel()]).T

        return XX, YY, input_array

        labels = voting_clf.predict(input_array)