import matplotlib.pyplot as plt import pandas as pd from sklearn.model_selection import learning_curve from sklearn.metrics import ( roc_curve, precision_recall_curve, confusion_matrix, classification_report ) def regression_graphs(graph_type, X, y, model, pipeline, y_test, preds): if graph_type == "Actual vs Predicted": fig, ax = plt.subplots() ax.plot(y_test.values[:100]) ax.plot(preds[:100]) ax.legend(["Actual", "Predicted"]) elif graph_type == "Residual Plot": fig, ax = plt.subplots() ax.scatter(preds, y_test - preds) ax.axhline(0) elif graph_type == "Residual Histogram": fig, ax = plt.subplots() ax.hist(y_test - preds, bins=30) elif graph_type == "Feature Importance": fig = None if hasattr(model, "feature_importances_"): fig, ax = plt.subplots() ax.bar(range(len(model.feature_importances_)), model.feature_importances_) elif graph_type == "Learning Curve": sizes, train_scores, test_scores = learning_curve( pipeline, X, y ) fig, ax = plt.subplots() ax.plot(sizes, train_scores.mean(axis=1)) ax.plot(sizes, test_scores.mean(axis=1)) ax.legend(["Train", "Test"]) return fig def classification_graphs(graph_type, pipeline, X_test, y_test, preds): if graph_type == "Confusion Matrix": cm = confusion_matrix(y_test, preds) fig, ax = plt.subplots() ax.imshow(cm) ax.set_title("Confusion Matrix") elif graph_type == "ROC Curve": probs = pipeline.predict_proba(X_test)[:, 1] fpr, tpr, _ = roc_curve(y_test, probs) fig, ax = plt.subplots() ax.plot(fpr, tpr) ax.set_title("ROC Curve") elif graph_type == "Per-Class Metrics Table": fig = classification_report(y_test, preds, output_dict=True) fig = pd.DataFrame(fig).transpose() elif graph_type == "Precision-Recall Curve": probs = pipeline.predict_proba(X_test)[:, 1] p, r, _ = precision_recall_curve(y_test, probs) fig, ax = plt.subplots() ax.plot(r, p) ax.set_title("Precision-Recall Curve") elif graph_type == "Probability Histogram": probs = pipeline.predict_proba(X_test)[:, 1] fig, ax = plt.subplots() ax.hist(probs, bins=20) ax.set_title("Prediction Probability Histogram") return fig