import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots from sklearn.metrics import roc_curve, auc, confusion_matrix def plot_churn_distribution(df: pd.DataFrame) -> go.Figure: counts = df["Churn"].value_counts().reset_index() counts.columns = ["Churn", "Count"] counts["Label"] = counts["Churn"].map({0: "Retained", 1: "Churned"}) fig = px.pie( counts, values="Count", names="Label", hole=0.45, color="Label", color_discrete_map={"Retained": "#636EFA", "Churned": "#EF553B"}, ) fig.update_traces(textinfo="percent+label+value") fig.update_layout(title="Customer Churn Distribution", showlegend=False) return fig def plot_feature_histogram(df: pd.DataFrame, feature: str) -> go.Figure: temp = df.copy() temp["Churn_Label"] = temp["Churn"].map({0: "Retained", 1: "Churned"}) fig = px.histogram( temp, x=feature, color="Churn_Label", barmode="overlay", color_discrete_map={"Retained": "#636EFA", "Churned": "#EF553B"}, opacity=0.7, ) fig.update_layout(title=f"Distribution of {feature} by Churn Status") return fig def plot_categorical_churn_rate(df: pd.DataFrame, feature: str) -> go.Figure: grouped = df.groupby(feature)["Churn"].mean().reset_index() grouped.columns = [feature, "Churn Rate"] grouped["Churn Rate"] = (grouped["Churn Rate"] * 100).round(1) fig = px.bar( grouped, x=feature, y="Churn Rate", text="Churn Rate", color="Churn Rate", color_continuous_scale="RdYlGn_r", ) fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside") fig.update_layout( title=f"Churn Rate by {feature}", yaxis_title="Churn Rate (%)", coloraxis_showscale=False, ) return fig def plot_correlation_heatmap(df: pd.DataFrame, numeric_cols: list[str]) -> go.Figure: corr = df[numeric_cols].corr() fig = px.imshow( corr, text_auto=".2f", color_continuous_scale="RdBu_r", aspect="auto", zmin=-1, zmax=1, ) fig.update_layout(title="Feature Correlation Heatmap") return fig def plot_roc_curves(model_entries: list, y_test) -> go.Figure: """Accept list of (name, model, X_test) tuples so each model can use its own test data.""" fig = go.Figure() for name, model, X_test in model_entries: if hasattr(model, "predict_proba"): y_proba = model.predict_proba(X_test)[:, 1] else: y_proba = model.decision_function(X_test) fpr, tpr, _ = roc_curve(y_test, y_proba) roc_auc = auc(fpr, tpr) fig.add_trace(go.Scatter( x=fpr, y=tpr, mode="lines", name=f"{name} (AUC={roc_auc:.3f})", )) fig.add_trace(go.Scatter( x=[0, 1], y=[0, 1], mode="lines", line=dict(dash="dash", color="gray"), name="Random", )) fig.update_layout( title="ROC Curves — Model Comparison", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate", legend=dict(x=0.55, y=0.05), ) return fig def plot_confusion_matrix(y_true, y_pred, title: str = "Confusion Matrix") -> go.Figure: cm = confusion_matrix(y_true, y_pred) labels = ["Retained", "Churned"] fig = px.imshow( cm, text_auto=True, color_continuous_scale="Blues", x=labels, y=labels, aspect="equal", ) fig.update_layout( title=title, xaxis_title="Predicted", yaxis_title="Actual", coloraxis_showscale=False, ) return fig def plot_gauge(probability: float) -> go.Figure: color = "#2ecc71" if probability < 0.3 else "#f39c12" if probability < 0.6 else "#e74c3c" fig = go.Figure(go.Indicator( mode="gauge+number", value=probability * 100, number={"suffix": "%"}, gauge={ "axis": {"range": [0, 100]}, "bar": {"color": color}, "steps": [ {"range": [0, 30], "color": "#d5f5e3"}, {"range": [30, 60], "color": "#fdebd0"}, {"range": [60, 100], "color": "#fadbd8"}, ], }, title={"text": "Churn Probability"}, )) fig.update_layout(height=300) return fig def plot_segments(X_2d: np.ndarray, cluster_labels: np.ndarray, churn: np.ndarray) -> go.Figure: seg_df = pd.DataFrame({ "UMAP_1": X_2d[:, 0], "UMAP_2": X_2d[:, 1], "Cluster": cluster_labels.astype(str), "Churn": np.where(churn == 1, "Churned", "Retained"), }) fig = px.scatter( seg_df, x="UMAP_1", y="UMAP_2", color="Cluster", symbol="Churn", opacity=0.6, symbol_map={"Churned": "x", "Retained": "circle"}, ) fig.update_layout( title="Customer Segments (UMAP Projection)", xaxis_title="UMAP Dimension 1", yaxis_title="UMAP Dimension 2", ) return fig def plot_metric_history(history: dict[str, list[float]], batch_labels: list[str]) -> go.Figure: fig = go.Figure() for metric_name, values in history.items(): fig.add_trace(go.Scatter( x=batch_labels[:len(values)], y=values, mode="lines+markers", name=metric_name, )) fig.update_layout( title="Model Performance Over Streaming Batches", xaxis_title="Batch", yaxis_title="Score", yaxis=dict(range=[0, 1]), ) return fig