File size: 2,448 Bytes
d7e53e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import learning_curve
from sklearn.metrics import (
    roc_curve, precision_recall_curve,
    confusion_matrix, classification_report
)

def regression_graphs(graph_type, X, y, model, pipeline, y_test, preds):
    if graph_type == "Actual vs Predicted":
        fig, ax = plt.subplots()
        ax.plot(y_test.values[:100])
        ax.plot(preds[:100])
        ax.legend(["Actual", "Predicted"])
    elif graph_type == "Residual Plot":
        fig, ax = plt.subplots()
        ax.scatter(preds, y_test - preds)
        ax.axhline(0)
    elif graph_type == "Residual Histogram":
        fig, ax = plt.subplots()
        ax.hist(y_test - preds, bins=30)
    elif graph_type == "Feature Importance":
        fig = None
        if hasattr(model, "feature_importances_"):
            fig, ax = plt.subplots()
            ax.bar(range(len(model.feature_importances_)), model.feature_importances_)
    elif graph_type == "Learning Curve":
        sizes, train_scores, test_scores = learning_curve(
            pipeline, X, y
        )
        fig, ax = plt.subplots()
        ax.plot(sizes, train_scores.mean(axis=1))
        ax.plot(sizes, test_scores.mean(axis=1))
        ax.legend(["Train", "Test"])
    
    return fig

def classification_graphs(graph_type, pipeline, X_test, y_test, preds):
    if graph_type == "Confusion Matrix":
        cm = confusion_matrix(y_test, preds)
        fig, ax = plt.subplots()
        ax.imshow(cm)
        ax.set_title("Confusion Matrix")
    elif graph_type == "ROC Curve":
        probs = pipeline.predict_proba(X_test)[:, 1]
        fpr, tpr, _ = roc_curve(y_test, probs)
        fig, ax = plt.subplots()
        ax.plot(fpr, tpr)
        ax.set_title("ROC Curve")
    elif graph_type == "Per-Class Metrics Table":
        fig = classification_report(y_test, preds, output_dict=True)
        fig = pd.DataFrame(fig).transpose()
    elif graph_type == "Precision-Recall Curve":
        probs = pipeline.predict_proba(X_test)[:, 1]
        p, r, _ = precision_recall_curve(y_test, probs)
        fig, ax = plt.subplots()
        ax.plot(r, p)
        ax.set_title("Precision-Recall Curve")
    elif graph_type == "Probability Histogram":
        probs = pipeline.predict_proba(X_test)[:, 1]
        fig, ax = plt.subplots()
        ax.hist(probs, bins=20)
        ax.set_title("Prediction Probability Histogram")
    
    return fig