File size: 3,213 Bytes
94d2494
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
src.visualisations.regression – Regression-specific charts.
"""

from __future__ import annotations

from typing import Dict

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.pipeline import Pipeline

from src import config
from src.visualisations.common import _base_layout


def actual_vs_predicted_chart(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    model_name: str = "Model",
) -> go.Figure:
    """Scatter plot of actual vs predicted values."""
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=y_true, y=y_pred, mode="markers",
        marker=dict(color="#6C63FF", size=6, opacity=0.6, line=dict(width=0.5, color="#EAEAF5")),
        name="Predictions",
    ))

    min_val = min(float(np.min(y_true)), float(np.min(y_pred)))
    max_val = max(float(np.max(y_true)), float(np.max(y_pred)))
    fig.add_trace(go.Scatter(
        x=[min_val, max_val], y=[min_val, max_val],
        mode="lines", line=dict(dash="dash", color="#FF6584", width=2),
        name="Perfect Prediction",
    ))

    fig.update_layout(
        **_base_layout(title=f"🎯 Actual vs Predicted β€” {model_name}"),
        xaxis=dict(title="Actual Values", gridcolor="#2A2E3F"),
        yaxis=dict(title="Predicted Values", gridcolor="#2A2E3F"),
    )
    return fig


def residual_plot(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    model_name: str = "Model",
) -> go.Figure:
    """Residual plot for regression models."""
    residuals = y_true - y_pred

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=y_pred, y=residuals, mode="markers",
        marker=dict(color="#43D8C9", size=6, opacity=0.6, line=dict(width=0.5, color="#EAEAF5")),
        name="Residuals",
    ))
    fig.add_hline(y=0, line_dash="dash", line_color="#FF6584", line_width=2,
                  annotation_text="Zero Error", annotation_position="top right")

    fig.update_layout(
        **_base_layout(title=f"πŸ“‰ Residual Plot β€” {model_name}"),
        xaxis=dict(title="Predicted Values", gridcolor="#2A2E3F"),
        yaxis=dict(title="Residuals (Actual βˆ’ Predicted)", gridcolor="#2A2E3F"),
    )
    return fig


def regression_comparison_chart(
    fitted_models: Dict[str, Pipeline],
    X_test: pd.DataFrame,
    y_test: pd.Series,
) -> go.Figure:
    """Overlay actual vs predicted for all regression models."""
    fig = go.Figure()

    sort_idx = np.argsort(y_test.values)
    x_axis = np.arange(len(y_test))

    fig.add_trace(go.Scatter(
        x=x_axis, y=y_test.values[sort_idx],
        mode="lines", line=dict(color="#EAEAF5", width=2), name="Actual",
    ))

    for i, (name, pipeline) in enumerate(fitted_models.items()):
        y_pred = pipeline.predict(X_test)
        fig.add_trace(go.Scatter(
            x=x_axis, y=y_pred[sort_idx], mode="lines",
            line=dict(color=config.COLOR_PALETTE[i % len(config.COLOR_PALETTE)], width=1.5),
            name=name, opacity=0.8,
        ))

    fig.update_layout(
        **_base_layout(title="πŸ“ˆ Predictions Overlay (sorted by actual)"),
        xaxis=dict(title="Sample Index (sorted)", gridcolor="#2A2E3F"),
        yaxis=dict(title="Target Value", gridcolor="#2A2E3F"),
    )
    return fig