| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| from sklearn.model_selection import learning_curve | |
| from sklearn.metrics import ( | |
| roc_curve, precision_recall_curve, | |
| confusion_matrix, classification_report | |
| ) | |
| def regression_graphs(graph_type, X, y, model, pipeline, y_test, preds): | |
| if graph_type == "Actual vs Predicted": | |
| fig, ax = plt.subplots() | |
| ax.plot(y_test.values[:100]) | |
| ax.plot(preds[:100]) | |
| ax.legend(["Actual", "Predicted"]) | |
| elif graph_type == "Residual Plot": | |
| fig, ax = plt.subplots() | |
| ax.scatter(preds, y_test - preds) | |
| ax.axhline(0) | |
| elif graph_type == "Residual Histogram": | |
| fig, ax = plt.subplots() | |
| ax.hist(y_test - preds, bins=30) | |
| elif graph_type == "Feature Importance": | |
| fig = None | |
| if hasattr(model, "feature_importances_"): | |
| fig, ax = plt.subplots() | |
| ax.bar(range(len(model.feature_importances_)), model.feature_importances_) | |
| elif graph_type == "Learning Curve": | |
| sizes, train_scores, test_scores = learning_curve( | |
| pipeline, X, y | |
| ) | |
| fig, ax = plt.subplots() | |
| ax.plot(sizes, train_scores.mean(axis=1)) | |
| ax.plot(sizes, test_scores.mean(axis=1)) | |
| ax.legend(["Train", "Test"]) | |
| return fig | |
| def classification_graphs(graph_type, pipeline, X_test, y_test, preds): | |
| if graph_type == "Confusion Matrix": | |
| cm = confusion_matrix(y_test, preds) | |
| fig, ax = plt.subplots() | |
| ax.imshow(cm) | |
| ax.set_title("Confusion Matrix") | |
| elif graph_type == "ROC Curve": | |
| probs = pipeline.predict_proba(X_test)[:, 1] | |
| fpr, tpr, _ = roc_curve(y_test, probs) | |
| fig, ax = plt.subplots() | |
| ax.plot(fpr, tpr) | |
| ax.set_title("ROC Curve") | |
| elif graph_type == "Per-Class Metrics Table": | |
| fig = classification_report(y_test, preds, output_dict=True) | |
| fig = pd.DataFrame(fig).transpose() | |
| elif graph_type == "Precision-Recall Curve": | |
| probs = pipeline.predict_proba(X_test)[:, 1] | |
| p, r, _ = precision_recall_curve(y_test, probs) | |
| fig, ax = plt.subplots() | |
| ax.plot(r, p) | |
| ax.set_title("Precision-Recall Curve") | |
| elif graph_type == "Probability Histogram": | |
| probs = pipeline.predict_proba(X_test)[:, 1] | |
| fig, ax = plt.subplots() | |
| ax.hist(probs, bins=20) | |
| ax.set_title("Prediction Probability Histogram") | |
| return fig |