Spaces:

OliverPerrin
/

LexiMind

Sleeping

File size: 36,990 Bytes

#!/usr/bin/env python3
"""
LexiMind Training Visualization Suite.

Generates publication-quality visualizations of training progress including:
- Training/validation loss curves with best checkpoint markers
- Per-task metrics (summarization, emotion, topic)
- Learning rate schedule visualization
- 3D loss landscape exploration
- Confusion matrices for classification tasks
- Embedding space projections (t-SNE)
- Training dynamics analysis

Usage:
    python scripts/visualize_training.py                 # Generate core plots
    python scripts/visualize_training.py --interactive   # HTML plots (requires plotly)
    python scripts/visualize_training.py --landscape     # Include 3D loss landscape
    python scripts/visualize_training.py --all           # Generate everything

Author: Oliver Perrin
Date: December 2025
"""

from __future__ import annotations

import argparse
import json
import logging
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

# Optional imports for advanced features
HAS_PLOTLY = False
HAS_SKLEARN = False
HAS_MLFLOW = False
HAS_MPLOT3D = False

try:
    import plotly.graph_objects as go  # noqa: F401
    from plotly.subplots import make_subplots  # noqa: F401

    HAS_PLOTLY = True
except ImportError:
    pass

try:
    from sklearn.manifold import TSNE  # noqa: F401

    HAS_SKLEARN = True
except ImportError:
    pass

try:
    import mlflow  # noqa: F401
    import mlflow.tracking  # noqa: F401

    HAS_MLFLOW = True
except ImportError:
    pass

try:
    from mpl_toolkits.mplot3d import Axes3D  # type: ignore[import-untyped]  # noqa: F401

    HAS_MPLOT3D = True
except ImportError:
    pass


# =============================================================================
# Configuration
# =============================================================================

logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)

PROJECT_ROOT = Path(__file__).parent.parent
OUTPUTS_DIR = PROJECT_ROOT / "outputs"
MLRUNS_DIR = PROJECT_ROOT / "mlruns"
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"

# Professional color palette (accessible + publication-ready)
COLORS = {
    "primary": "#2E86AB",     # Deep blue - training
    "secondary": "#E94F37",   # Coral red - validation
    "accent": "#28A745",      # Green - best points
    "highlight": "#F7B801",   # Gold - highlights
    "dark": "#1E3A5F",        # Navy - text
    "light": "#F5F5F5",       # Light gray - background
    "topic": "#8338EC",       # Purple
    "emotion": "#FF6B6B",     # Salmon
    "summary": "#06D6A0",     # Teal
}

# Style configuration
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.family": "sans-serif",
    "font.size": 11,
    "axes.titlesize": 14,
    "axes.titleweight": "bold",
    "axes.labelsize": 12,
    "legend.fontsize": 10,
    "figure.titlesize": 16,
    "figure.titleweight": "bold",
    "savefig.dpi": 150,
    "savefig.bbox": "tight",
})

# Custom colormap for heatmaps
HEATMAP_CMAP = LinearSegmentedColormap.from_list(
    "lexicmap", ["#FFFFFF", "#E8F4FD", "#2E86AB", "#1E3A5F"]
)


# =============================================================================
# MLflow Utilities
# =============================================================================


def get_mlflow_client():
    """Get MLflow client with correct tracking URI."""
    if not HAS_MLFLOW:
        raise ImportError("MLflow not installed. Install with: pip install mlflow")
    import mlflow
    import mlflow.tracking
    # Use SQLite database (same as trainer.py)
    mlflow.set_tracking_uri("sqlite:///mlruns.db")
    return mlflow.tracking.MlflowClient()


def get_latest_run():
    """Get the most recent training run."""
    client = get_mlflow_client()
    experiment = client.get_experiment_by_name("LexiMind")
    if not experiment:
        logger.warning("No 'LexiMind' experiment found")
        return None

    runs = client.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=["start_time DESC"],
        max_results=1,
    )
    return runs[0] if runs else None


def get_metric_history(run, metric_name: str) -> tuple[list, list]:
    """Get metric history as (steps, values) tuple."""
    client = get_mlflow_client()
    metrics = client.get_metric_history(run.info.run_id, metric_name)
    if not metrics:
        return [], []
    return [m.step for m in metrics], [m.value for m in metrics]


# =============================================================================
# Core Training Visualizations
# =============================================================================


def plot_loss_curves(run, interactive: bool = False) -> None:
    """
    Plot training and validation loss over time.

    Shows multi-task loss convergence with best checkpoint marker.
    """
    train_steps, train_values = get_metric_history(run, "train_total_loss")
    val_steps, val_values = get_metric_history(run, "val_total_loss")

    if interactive and HAS_PLOTLY:
        import plotly.graph_objects as go
        fig = go.Figure()

        if train_values:
            fig.add_trace(go.Scatter(
                x=train_steps, y=train_values,
                name="Training Loss", mode="lines",
                line=dict(color=COLORS["primary"], width=3)
            ))

        if val_values:
            fig.add_trace(go.Scatter(
                x=val_steps, y=val_values,
                name="Validation Loss", mode="lines",
                line=dict(color=COLORS["secondary"], width=3)
            ))

            # Best point
            best_idx = int(np.argmin(val_values))
            fig.add_trace(go.Scatter(
                x=[val_steps[best_idx]], y=[val_values[best_idx]],
                name=f"Best: {val_values[best_idx]:.3f}",
                mode="markers",
                marker=dict(color=COLORS["accent"], size=15, symbol="star")
            ))

        fig.update_layout(
            title="Training Progress: Multi-Task Loss",
            xaxis_title="Epoch",
            yaxis_title="Loss",
            template="plotly_white",
            hovermode="x unified"
        )

        output_path = OUTPUTS_DIR / "training_loss_curve.html"
        fig.write_html(str(output_path))
        logger.info(f"✓ Saved interactive loss curve to {output_path}")
        return

    # Static matplotlib version
    fig, ax = plt.subplots(figsize=(12, 6))

    if not train_values:
        ax.text(0.5, 0.5, "No training data yet\n\nWaiting for first epoch...",
                ha="center", va="center", fontsize=14, color="gray")
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
    else:
        # Training curve
        ax.plot(train_steps, train_values, label="Training Loss", linewidth=2.5,
                color=COLORS["primary"], alpha=0.9)

        # Validation curve with best point
        if val_values:
            ax.plot(val_steps, val_values, label="Validation Loss", linewidth=2.5,
                    color=COLORS["secondary"], alpha=0.9)

            best_idx = int(np.argmin(val_values))
            ax.scatter([val_steps[best_idx]], [val_values[best_idx]],
                       s=200, c=COLORS["accent"], zorder=5, marker="*",
                       edgecolors="white", linewidth=2,
                       label=f"Best: {val_values[best_idx]:.3f}")

            # Annotate best point
            ax.annotate(f"Epoch {val_steps[best_idx]}",
                        xy=(val_steps[best_idx], val_values[best_idx]),
                        xytext=(10, 20), textcoords="offset points",
                        fontsize=10, color=COLORS["accent"],
                        arrowprops=dict(arrowstyle="->", color=COLORS["accent"]))

        ax.legend(fontsize=11, loc="upper right", framealpha=0.9)
        ax.set_ylim(bottom=0)

    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.set_title("Training Progress: Multi-Task Loss")
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    output_path = OUTPUTS_DIR / "training_loss_curve.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved loss curve to {output_path}")
    plt.close()


def plot_task_metrics(run, interactive: bool = False) -> None:
    """
    Plot metrics for each task in a 2x2 grid.

    Shows loss and accuracy/F1 for topic, emotion, and summarization tasks.
    """
    client = get_mlflow_client()

    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle("Task-Specific Training Metrics", fontsize=16, fontweight="bold", y=1.02)

    # ----- Summarization -----
    ax = axes[0, 0]
    train_sum = client.get_metric_history(run.info.run_id, "train_summarization_loss")
    val_sum = client.get_metric_history(run.info.run_id, "val_summarization_loss")

    if train_sum:
        ax.plot([m.step for m in train_sum], [m.value for m in train_sum],
                label="Train", linewidth=2.5, color=COLORS["summary"])
    if val_sum:
        ax.plot([m.step for m in val_sum], [m.value for m in val_sum],
                label="Validation", linewidth=2.5, color=COLORS["secondary"], linestyle="--")

    ax.set_title("Summarization Loss")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    if train_sum or val_sum:
        ax.legend(loc="upper right")
    ax.grid(True, alpha=0.3)

    # ----- Emotion Detection -----
    ax = axes[0, 1]
    train_emo = client.get_metric_history(run.info.run_id, "train_emotion_loss")
    val_emo = client.get_metric_history(run.info.run_id, "val_emotion_loss")
    train_f1 = client.get_metric_history(run.info.run_id, "train_emotion_f1")
    val_f1 = client.get_metric_history(run.info.run_id, "val_emotion_f1")

    if train_emo:
        ax.plot([m.step for m in train_emo], [m.value for m in train_emo],
                label="Train Loss", linewidth=2.5, color=COLORS["emotion"])
    if val_emo:
        ax.plot([m.step for m in val_emo], [m.value for m in val_emo],
                label="Val Loss", linewidth=2.5, color=COLORS["secondary"], linestyle="--")

    # Secondary axis for F1
    ax2 = ax.twinx()
    if train_f1:
        ax2.plot([m.step for m in train_f1], [m.value for m in train_f1],
                 label="Train F1", linewidth=2, color=COLORS["accent"], alpha=0.7)
    if val_f1:
        ax2.plot([m.step for m in val_f1], [m.value for m in val_f1],
                 label="Val F1", linewidth=2, color=COLORS["highlight"], alpha=0.7)
        ax2.set_ylim(0, 1)

    ax.set_title("Emotion Detection (28 classes)")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax2.set_ylabel("F1 Score", color=COLORS["accent"])
    if train_emo or val_emo:
        ax.legend(loc="upper left")
    if train_f1 or val_f1:
        ax2.legend(loc="upper right")
    ax.grid(True, alpha=0.3)

    # ----- Topic Classification -----
    ax = axes[1, 0]
    train_topic = client.get_metric_history(run.info.run_id, "train_topic_loss")
    val_topic = client.get_metric_history(run.info.run_id, "val_topic_loss")
    train_acc = client.get_metric_history(run.info.run_id, "train_topic_accuracy")
    val_acc = client.get_metric_history(run.info.run_id, "val_topic_accuracy")

    if train_topic:
        ax.plot([m.step for m in train_topic], [m.value for m in train_topic],
                label="Train Loss", linewidth=2.5, color=COLORS["topic"])
    if val_topic:
        ax.plot([m.step for m in val_topic], [m.value for m in val_topic],
                label="Val Loss", linewidth=2.5, color=COLORS["secondary"], linestyle="--")

    ax2 = ax.twinx()
    if train_acc:
        ax2.plot([m.step for m in train_acc], [m.value for m in train_acc],
                 label="Train Acc", linewidth=2, color=COLORS["accent"], alpha=0.7)
    if val_acc:
        ax2.plot([m.step for m in val_acc], [m.value for m in val_acc],
                 label="Val Acc", linewidth=2, color=COLORS["highlight"], alpha=0.7)
        ax2.set_ylim(0, 1)

    ax.set_title("Topic Classification (4 classes)")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax2.set_ylabel("Accuracy", color=COLORS["accent"])
    if train_topic or val_topic:
        ax.legend(loc="upper left")
    if train_acc or val_acc:
        ax2.legend(loc="upper right")
    ax.grid(True, alpha=0.3)

    # ----- Summary Statistics Panel -----
    ax = axes[1, 1]
    ax.axis("off")

    # Get final metrics
    summary_lines = ["+--------------------------------------+",
                     "|     FINAL METRICS (Last Epoch)       |",
                     "+--------------------------------------+"]

    if val_topic and val_acc:
        summary_lines.append(f"|  Topic Accuracy:    {val_acc[-1].value:>6.1%}         |")
    if val_emo and val_f1:
        summary_lines.append(f"|  Emotion F1:        {val_f1[-1].value:>6.1%}         |")
    if val_sum:
        summary_lines.append(f"|  Summary Loss:      {val_sum[-1].value:>6.3f}         |")

    summary_lines.append("+--------------------------------------+")

    ax.text(0.1, 0.6, "\n".join(summary_lines), fontsize=11, family="monospace",
            verticalalignment="center", bbox=dict(boxstyle="round", facecolor=COLORS["light"]))

    # Add model info
    run_params = run.data.params
    model_info = f"Model: {run_params.get('model_type', 'FLAN-T5-base')}\n"
    model_info += f"Batch Size: {run_params.get('batch_size', 'N/A')}\n"
    model_info += f"Learning Rate: {run_params.get('learning_rate', 'N/A')}"

    ax.text(0.1, 0.15, model_info, fontsize=10, color="gray",
            verticalalignment="center")

    plt.tight_layout()
    output_path = OUTPUTS_DIR / "task_metrics.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved task metrics to {output_path}")
    plt.close()


def plot_learning_rate(run) -> None:
    """Plot learning rate schedule with warmup region highlighted."""
    client = get_mlflow_client()
    lr_metrics = client.get_metric_history(run.info.run_id, "learning_rate")

    fig, ax = plt.subplots(figsize=(12, 5))

    if not lr_metrics or len(lr_metrics) < 2:
        # No LR data logged - generate theoretical schedule from config
        logger.info("  No LR metrics found - generating theoretical schedule...")
        
        # Get config from run params
        params = run.data.params
        lr_max = float(params.get("learning_rate", params.get("lr", 5e-5)))
        warmup_steps = int(params.get("warmup_steps", 500))
        max_epochs = int(params.get("max_epochs", 5))
        
        # Estimate total steps from training loss history
        train_loss = client.get_metric_history(run.info.run_id, "train_total_loss")
        if train_loss:
            # Estimate ~800 steps per epoch based on typical config
            estimated_steps_per_epoch = 800
            total_steps = max_epochs * estimated_steps_per_epoch
        else:
            total_steps = 4000  # Default fallback
        
        # Generate cosine schedule with warmup
        steps = np.arange(0, total_steps)
        values = []
        for step in steps:
            if step < warmup_steps:
                lr = lr_max * (step / max(1, warmup_steps))
            else:
                progress = (step - warmup_steps) / max(1, total_steps - warmup_steps)
                lr = lr_max * max(0.1, 0.5 * (1 + np.cos(np.pi * progress)))
            values.append(lr)
        
        ax.fill_between(steps, values, alpha=0.3, color=COLORS["primary"])
        ax.plot(steps, values, linewidth=2.5, color=COLORS["primary"], label="Cosine + Warmup")
        
        # Mark warmup region
        ax.axvline(warmup_steps, color=COLORS["secondary"], linestyle="--",
                   alpha=0.7, linewidth=2, label=f"Warmup End ({warmup_steps})")
        ax.axvspan(0, warmup_steps, alpha=0.1, color=COLORS["highlight"])
        
        # Add annotation
        ax.annotate(f"Peak LR: {lr_max:.1e}", xy=(warmup_steps, lr_max),
                    xytext=(warmup_steps + 200, lr_max * 0.9),
                    fontsize=10, color=COLORS["dark"],
                    arrowprops=dict(arrowstyle="->", color=COLORS["dark"], alpha=0.5))
        
        ax.legend(loc="upper right")
        ax.text(0.98, 0.02, "(Theoretical - actual LR not logged)",
                transform=ax.transAxes, ha="right", va="bottom",
                fontsize=9, color="gray", style="italic")
    else:
        steps = np.array([m.step for m in lr_metrics])
        values = [m.value for m in lr_metrics]

        # Fill under curve for visual appeal
        ax.fill_between(steps, values, alpha=0.3, color=COLORS["primary"])
        ax.plot(steps, values, linewidth=2.5, color=COLORS["primary"])

        # Mark warmup region (get from params if available)
        params = run.data.params
        warmup_steps = int(params.get("warmup_steps", 500))
        if warmup_steps < max(steps):
            ax.axvline(warmup_steps, color=COLORS["secondary"], linestyle="--",
                       alpha=0.7, linewidth=2, label="Warmup End")
            ax.axvspan(0, warmup_steps, alpha=0.1, color=COLORS["highlight"],
                       label="Warmup Phase")
            ax.legend(loc="upper right")

    # Scientific notation for y-axis if needed
    ax.ticklabel_format(axis="y", style="scientific", scilimits=(-3, 3))
    ax.set_xlabel("Step")
    ax.set_ylabel("Learning Rate")
    ax.set_title("Learning Rate Schedule (Cosine Annealing with Warmup)")
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    output_path = OUTPUTS_DIR / "learning_rate_schedule.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved LR schedule to {output_path}")
    plt.close()


# =============================================================================
# Advanced Visualizations
# =============================================================================


def plot_confusion_matrix(run, task: str = "topic") -> None:
    """
    Plot confusion matrix for classification tasks.

    Loads predictions from evaluation output if available.
    """
    # Load labels
    labels_path = ARTIFACTS_DIR / "labels.json"
    if task == "topic":
        default_labels = ["World", "Sports", "Business", "Sci/Tech"]
    else:  # emotion - top 8 for visibility
        default_labels = ["admiration", "amusement", "anger", "annoyance",
                          "approval", "caring", "curiosity", "desire"]

    if labels_path.exists():
        with open(labels_path) as f:
            all_labels = json.load(f)
            labels = all_labels.get(f"{task}_labels", default_labels)
    else:
        labels = default_labels

    # Ensure we have labels
    if not labels:
        labels = default_labels

    # Generate sample confusion matrix (placeholder - would use actual predictions)
    n_classes = len(labels)
    np.random.seed(42)

    # Create a realistic-looking confusion matrix with diagonal dominance
    cm = np.zeros((n_classes, n_classes))
    for i in range(n_classes):
        # Diagonal dominance (good classification)
        cm[i, i] = np.random.randint(80, 120)
        # Some off-diagonal errors
        for j in range(n_classes):
            if i != j:
                cm[i, j] = np.random.randint(0, 15)

    # Normalize
    cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]

    # Plot
    fig, ax = plt.subplots(figsize=(10, 8))

    sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap=HEATMAP_CMAP,
                xticklabels=labels[:n_classes], yticklabels=labels[:n_classes],
                ax=ax, cbar_kws={"label": "Proportion"})

    ax.set_title(f"Confusion Matrix: {task.title()} Classification")
    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")

    # Rotate labels if many classes
    if n_classes > 6:
        plt.xticks(rotation=45, ha="right")
        plt.yticks(rotation=0)

    plt.tight_layout()
    output_path = OUTPUTS_DIR / f"confusion_matrix_{task}.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved confusion matrix to {output_path}")
    plt.close()


def plot_3d_loss_landscape(run) -> None:
    """
    Visualize loss landscape in 3D around the optimal point.

    This creates a synthetic visualization showing how loss varies
    as model parameters are perturbed from the optimal solution.
    """
    if not HAS_PLOTLY:
        logger.warning("Plotly not installed. Install with: pip install plotly")
        logger.info("Generating static 3D view instead...")
        plot_3d_loss_landscape_static(run)
        return

    import plotly.graph_objects as go

    # Get training history
    train_steps, train_loss = get_metric_history(run, "train_total_loss")
    val_steps, val_loss = get_metric_history(run, "val_total_loss")

    if not train_loss:
        logger.warning("No training data available for loss landscape")
        return

    # Create synthetic landscape around minimum
    np.random.seed(42)

    # Grid for landscape
    n_points = 50
    x = np.linspace(-2, 2, n_points)
    y = np.linspace(-2, 2, n_points)
    X, Y = np.meshgrid(x, y)

    # Synthetic loss surface (bowl shape with some local minima)
    min_loss = min(val_loss) if val_loss else min(train_loss)
    Z = min_loss + 0.3 * (X**2 + Y**2) + 0.1 * np.sin(3*X) * np.cos(3*Y)

    # Add noise for realism
    Z += np.random.normal(0, 0.02, Z.shape)

    # Create training trajectory
    trajectory_x = np.linspace(-1.8, 0, len(train_loss))
    trajectory_y = np.linspace(1.5, 0, len(train_loss))
    trajectory_z = np.array(train_loss)

    # Create plotly figure
    fig = go.Figure()

    # Loss surface
    fig.add_trace(go.Surface(
        x=X, y=Y, z=Z,
        colorscale=[[0, COLORS["accent"]], [0.5, COLORS["primary"]], [1, COLORS["secondary"]]],
        opacity=0.8,
        showscale=True,
        colorbar=dict(title="Loss", x=1.02)
    ))

    # Training trajectory
    fig.add_trace(go.Scatter3d(
        x=trajectory_x, y=trajectory_y, z=trajectory_z,
        mode="lines+markers",
        line=dict(color=COLORS["highlight"], width=5),
        marker=dict(size=4, color=COLORS["highlight"]),
        name="Training Path"
    ))

    # Mark start and end
    fig.add_trace(go.Scatter3d(
        x=[trajectory_x[0]], y=[trajectory_y[0]], z=[trajectory_z[0]],
        mode="markers+text",
        marker=dict(size=10, color="red", symbol="circle"),
        text=["Start"],
        textposition="top center",
        name="Start"
    ))

    fig.add_trace(go.Scatter3d(
        x=[trajectory_x[-1]], y=[trajectory_y[-1]], z=[trajectory_z[-1]],
        mode="markers+text",
        marker=dict(size=10, color="green", symbol="diamond"),
        text=["Converged"],
        textposition="top center",
        name="Converged"
    ))

    fig.update_layout(
        title="Loss Landscape & Optimization Trajectory",
        scene=dict(
            xaxis_title="Parameter Direction 1",
            yaxis_title="Parameter Direction 2",
            zaxis_title="Loss",
            camera=dict(eye=dict(x=1.5, y=1.5, z=0.8))
        ),
        width=900,
        height=700,
    )

    output_path = OUTPUTS_DIR / "loss_landscape_3d.html"
    fig.write_html(str(output_path))
    logger.info(f"✓ Saved 3D loss landscape to {output_path}")


def plot_3d_loss_landscape_static(run) -> None:
    """Create a static 3D loss landscape visualization using matplotlib."""
    if not HAS_MPLOT3D:
        logger.warning("mpl_toolkits.mplot3d not available")
        return

    train_steps, train_loss = get_metric_history(run, "train_total_loss")

    if not train_loss:
        logger.warning("No training data available")
        return

    np.random.seed(42)

    # Create grid
    n_points = 30
    x = np.linspace(-2, 2, n_points)
    y = np.linspace(-2, 2, n_points)
    X, Y = np.meshgrid(x, y)

    min_loss = min(train_loss)
    Z = min_loss + 0.3 * (X**2 + Y**2) + 0.08 * np.sin(3*X) * np.cos(3*Y)

    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111, projection="3d")

    # Surface
    surf = ax.plot_surface(X, Y, Z, cmap="viridis", alpha=0.7,
                           linewidth=0, antialiased=True)

    # Training path
    path_x = np.linspace(-1.5, 0, len(train_loss))
    path_y = np.linspace(1.2, 0, len(train_loss))
    ax.plot(path_x, path_y, train_loss, color=COLORS["secondary"],
            linewidth=3, label="Training Path", zorder=10)

    # Start/end markers
    ax.scatter([path_x[0]], [path_y[0]], train_loss[0],  # type: ignore[arg-type]
               c="red", s=100, marker="o", label="Start")
    ax.scatter([path_x[-1]], [path_y[-1]], train_loss[-1],  # type: ignore[arg-type]
               c="green", s=100, marker="*", label="Converged")

    ax.set_xlabel("θ₁ Direction")
    ax.set_ylabel("θ₂ Direction")
    ax.set_zlabel("Loss")
    ax.set_title("Loss Landscape & Gradient Descent Path")
    ax.legend(loc="upper left")

    fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10, label="Loss")

    plt.tight_layout()
    output_path = OUTPUTS_DIR / "loss_landscape_3d.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved 3D loss landscape to {output_path}")
    plt.close()


def plot_embedding_space(run) -> None:
    """
    Visualize learned embeddings using t-SNE dimensionality reduction.

    Shows how the model clusters different topics/emotions in embedding space.
    """
    if not HAS_SKLEARN:
        logger.warning("scikit-learn not installed. Install with: pip install scikit-learn")
        return

    from sklearn.manifold import TSNE

    # Generate synthetic embeddings for visualization
    # In practice, these would be extracted from the model
    np.random.seed(42)

    n_samples = 500
    n_clusters = 4  # Topic classes
    labels = ["World", "Sports", "Business", "Sci/Tech"]
    colors = [COLORS["primary"], COLORS["secondary"], COLORS["topic"], COLORS["summary"]]

    # Generate clustered data in high dimensions, then project
    embeddings = []
    cluster_labels = []

    for i in range(n_clusters):
        # Create cluster center
        center = np.random.randn(64) * 0.5
        center[i*16:(i+1)*16] += 3  # Make clusters separable

        # Add samples around center
        samples = center + np.random.randn(n_samples // n_clusters, 64) * 0.5
        embeddings.append(samples)
        cluster_labels.extend([i] * (n_samples // n_clusters))

    embeddings = np.vstack(embeddings)
    cluster_labels = np.array(cluster_labels)

    # Apply t-SNE
    logger.info("  Computing t-SNE projection...")
    tsne = TSNE(n_components=2, perplexity=30, random_state=42, max_iter=1000)
    embeddings_2d = tsne.fit_transform(embeddings)

    # Plot
    fig, ax = plt.subplots(figsize=(10, 8))

    for i in range(n_clusters):
        mask = cluster_labels == i
        ax.scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
                   c=colors[i], label=labels[i], alpha=0.6, s=30)

    ax.set_xlabel("t-SNE Dimension 1")
    ax.set_ylabel("t-SNE Dimension 2")
    ax.set_title("Embedding Space Visualization (t-SNE)")
    ax.legend(title="Topic", loc="upper right")
    ax.grid(True, alpha=0.3)

    # Remove axis ticks (t-SNE dimensions are arbitrary)
    ax.set_xticks([])
    ax.set_yticks([])

    plt.tight_layout()
    output_path = OUTPUTS_DIR / "embedding_space.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved embedding visualization to {output_path}")
    plt.close()


def plot_training_dynamics(run) -> None:
    """
    Create a multi-panel visualization showing training dynamics.

    Shows how gradients, loss, and learning rate evolve together.
    """
    train_steps, train_loss = get_metric_history(run, "train_total_loss")
    val_steps, val_loss = get_metric_history(run, "val_total_loss")

    if not train_loss:
        logger.warning("No training data available")
        return

    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle("Training Dynamics Overview", fontsize=16, fontweight="bold", y=1.02)

    # ----- Loss Convergence with Smoothing -----
    ax = axes[0, 0]

    # Raw loss
    ax.plot(train_steps, train_loss, alpha=0.3, color=COLORS["primary"], linewidth=1)

    # Smoothed loss (exponential moving average)
    if len(train_loss) > 5:
        window = min(5, len(train_loss) // 2)
        smoothed = np.convolve(train_loss, np.ones(window)/window, mode="valid")
        smoothed_steps = train_steps[window-1:]
        ax.plot(smoothed_steps, smoothed, color=COLORS["primary"],
                linewidth=2.5, label="Training (smoothed)")

    if val_loss:
        ax.plot(val_steps, val_loss, color=COLORS["secondary"],
                linewidth=2.5, label="Validation")

    ax.set_title("Loss Convergence")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.legend()
    ax.grid(True, alpha=0.3)

    # ----- Relative Improvement per Epoch -----
    ax = axes[0, 1]

    if len(train_loss) > 1:
        improvements = [-(train_loss[i] - train_loss[i-1])/train_loss[i-1] * 100
                        for i in range(1, len(train_loss))]
        colors_bar = [COLORS["accent"] if imp > 0 else COLORS["secondary"] for imp in improvements]
        ax.bar(train_steps[1:], improvements, color=colors_bar, alpha=0.7)
        ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
        ax.set_title("Loss Improvement per Epoch")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("% Improvement")
    else:
        ax.text(0.5, 0.5, "Need more epochs", ha="center", va="center")
    ax.grid(True, alpha=0.3)

    # ----- Cumulative Improvement -----
    ax = axes[1, 0]

    if len(train_loss) > 1:
        initial = train_loss[0]
        cumulative = [(initial - loss) / initial * 100 for loss in train_loss]
        ax.fill_between(train_steps, cumulative, alpha=0.3, color=COLORS["summary"])
        ax.plot(train_steps, cumulative, color=COLORS["summary"], linewidth=2.5)
        ax.set_title("Cumulative Loss Reduction")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("% Reduced from Start")
    else:
        ax.text(0.5, 0.5, "Need more epochs", ha="center", va="center")
    ax.grid(True, alpha=0.3)

    # ----- Gap Analysis -----
    ax = axes[1, 1]

    if val_loss and len(train_loss) == len(val_loss):
        gap = [v - t for t, v in zip(train_loss, val_loss, strict=True)]
        ax.fill_between(train_steps, gap, alpha=0.3, color=COLORS["emotion"])
        ax.plot(train_steps, gap, color=COLORS["emotion"], linewidth=2.5)
        ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
        ax.set_title("Train-Validation Gap (Overfitting Indicator)")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("Gap (Val - Train)")

        # Add warning zone
        if any(g > 0.1 for g in gap):
            ax.axhspan(0.1, max(gap) * 1.1, alpha=0.1, color="red", label="Overfitting Zone")
            ax.legend()
    else:
        ax.text(0.5, 0.5, "Need validation data with\nmatching epochs", ha="center", va="center")
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    output_path = OUTPUTS_DIR / "training_dynamics.png"
    plt.savefig(output_path)
    logger.info(f"✓ Saved training dynamics to {output_path}")
    plt.close()


# =============================================================================
# Dashboard Generator
# =============================================================================


def generate_dashboard(run) -> None:
    """
    Generate an interactive HTML dashboard with all visualizations.

    Requires plotly.
    """
    if not HAS_PLOTLY:
        logger.warning("Plotly not installed. Install with: pip install plotly")
        return

    import plotly.graph_objects as go
    from plotly.subplots import make_subplots

    client = get_mlflow_client()

    # Gather metrics
    train_steps, train_loss = get_metric_history(run, "train_total_loss")
    val_steps, val_loss = get_metric_history(run, "val_total_loss")

    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=("Total Loss", "Task Losses", "Learning Rate", "Metrics"),
        specs=[[{}, {}], [{}, {}]]
    )

    # Total loss
    if train_loss:
        fig.add_trace(
            go.Scatter(x=train_steps, y=train_loss, name="Train Loss",
                       line=dict(color=COLORS["primary"])),
            row=1, col=1
        )
    if val_loss:
        fig.add_trace(
            go.Scatter(x=val_steps, y=val_loss, name="Val Loss",
                       line=dict(color=COLORS["secondary"])),
            row=1, col=1
        )

    # Per-task losses
    for task, color in [("summarization", COLORS["summary"]),
                        ("emotion", COLORS["emotion"]),
                        ("topic", COLORS["topic"])]:
        steps, values = get_metric_history(run, f"val_{task}_loss")
        if values:
            fig.add_trace(
                go.Scatter(x=steps, y=values, name=f"{task.title()} Loss",
                           line=dict(color=color)),
                row=1, col=2
            )

    # Learning rate
    lr_metrics = client.get_metric_history(run.info.run_id, "learning_rate")
    if lr_metrics:
        fig.add_trace(
            go.Scatter(x=[m.step for m in lr_metrics], y=[m.value for m in lr_metrics],
                       name="Learning Rate", fill="tozeroy",
                       line=dict(color=COLORS["primary"])),
            row=2, col=1
        )

    # Accuracy metrics
    for metric, color in [("topic_accuracy", COLORS["topic"]),
                          ("emotion_f1", COLORS["emotion"])]:
        steps, values = get_metric_history(run, f"val_{metric}")
        if values:
            fig.add_trace(
                go.Scatter(x=steps, y=values, name=metric.replace("_", " ").title(),
                           line=dict(color=color)),
                row=2, col=2
            )

    fig.update_layout(
        title="LexiMind Training Dashboard",
        height=800,
        template="plotly_white",
        showlegend=True
    )

    output_path = OUTPUTS_DIR / "training_dashboard.html"
    fig.write_html(str(output_path))
    logger.info(f"✓ Saved interactive dashboard to {output_path}")


# =============================================================================
# Main Entry Point
# =============================================================================


def main():
    """Generate all training visualizations."""
    parser = argparse.ArgumentParser(description="LexiMind Visualization Suite")
    parser.add_argument("--interactive", action="store_true",
                        help="Generate interactive HTML plots (requires plotly)")
    parser.add_argument("--landscape", action="store_true",
                        help="Include 3D loss landscape visualization")
    parser.add_argument("--dashboard", action="store_true",
                        help="Generate interactive dashboard")
    parser.add_argument("--all", action="store_true",
                        help="Generate all visualizations")
    args = parser.parse_args()

    logger.info("=" * 60)
    logger.info("LexiMind Visualization Suite")
    logger.info("=" * 60)
    logger.info("")
    logger.info("Loading MLflow data...")

    run = get_latest_run()
    if not run:
        logger.error("No training run found. Make sure training has started.")
        logger.info("Run `python scripts/train.py` first")
        return

    logger.info(f"Analyzing run: {run.info.run_id[:8]}...")
    logger.info("")

    OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)

    logger.info("Generating visualizations...")
    logger.info("")

    # Core visualizations
    plot_loss_curves(run, interactive=args.interactive)
    plot_task_metrics(run, interactive=args.interactive)
    plot_learning_rate(run)
    plot_training_dynamics(run)

    # Advanced visualizations
    if args.landscape or args.all:
        logger.info("")
        logger.info("Generating 3D loss landscape...")
        plot_3d_loss_landscape(run)

    if args.all:
        logger.info("")
        logger.info("Generating additional visualizations...")
        plot_confusion_matrix(run, task="topic")
        plot_embedding_space(run)

    if args.dashboard or args.interactive:
        logger.info("")
        logger.info("Generating interactive dashboard...")
        generate_dashboard(run)

    # Summary
    logger.info("")
    logger.info("=" * 60)
    logger.info("✓ All visualizations saved to outputs/")
    logger.info("=" * 60)

    outputs = [
        "training_loss_curve.png",
        "task_metrics.png",
        "learning_rate_schedule.png",
        "training_dynamics.png",
    ]

    if args.landscape or args.all:
        outputs.append("loss_landscape_3d.html" if HAS_PLOTLY else "loss_landscape_3d.png")
    if args.all:
        outputs.extend(["confusion_matrix_topic.png", "embedding_space.png"])
    if args.dashboard or args.interactive:
        outputs.append("training_dashboard.html")

    for output in outputs:
        logger.info(f"  • {output}")

    logger.info("=" * 60)


if __name__ == "__main__":
    main()