{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2745946d",
   "metadata": {},
   "source": [
    "# Multi-Talker Pipeline: Results Visualization & Comparison\n",
    "\n",
    "This notebook visualizes benchmark results from comparing three audio source separation approaches:\n",
    "- **ICA**: Simple, fast Independent Component Analysis\n",
    "- **Frankenstein**: ICA + English language bias for talker selection\n",
    "- **ICA+DeepLearning**: Two-pass (spatial + temporal) separation with SepFormer\n",
    "\n",
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "318a1d0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import json\n",
    "from pathlib import Path\n",
    "from datetime import datetime\n",
    "\n",
    "# Set style\n",
    "sns.set_style('whitegrid')\n",
    "plt.rcParams['figure.figsize'] = (12, 6)\n",
    "plt.rcParams['font.size'] = 10\n",
    "\n",
    "print(\"Imports successful!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b191c6b1",
   "metadata": {},
   "source": [
    "## Load Benchmark Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec482d67",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Path to benchmark results\n",
    "RESULTS_DIR = Path('../benchmark_results')\n",
    "CSV_FILE = RESULTS_DIR / 'benchmark_results.csv'\n",
    "JSON_FILE = RESULTS_DIR / 'benchmark_results.json'\n",
    "\n",
    "# Load CSV\n",
    "if CSV_FILE.exists():\n",
    "    df = pd.read_csv(CSV_FILE)\n",
    "    print(f\"Loaded {len(df)} results from {CSV_FILE}\")\n",
    "    print(f\"\\nColumns: {list(df.columns)}\")\n",
    "    print(f\"\\nDataframe shape: {df.shape}\")\n",
    "    df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fe2465a",
   "metadata": {},
   "source": [
    "## 1. Execution Time Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ac852c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Filter only successful runs\n",
    "df_success = df[df['status'] == 'SUCCESS'].copy()\n",
    "\n",
    "if len(df_success) > 0:\n",
    "    # Execution time by approach\n",
    "    fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
    "    \n",
    "    # Bar chart\n",
    "    exec_times = df_success.groupby('approach')['execution_time_seconds'].mean()\n",
    "    exec_times.plot(kind='bar', ax=axes[0], color=['#1f77b4', '#ff7f0e', '#2ca02c'])\n",
    "    axes[0].set_title('Average Execution Time', fontsize=12, fontweight='bold')\n",
    "    axes[0].set_ylabel('Time (seconds)')\n",
    "    axes[0].set_xlabel('Approach')\n",
    "    axes[0].tick_params(axis='x', rotation=45)\n",
    "    \n",
    "    # Add value labels on bars\n",
    "    for i, v in enumerate(exec_times):\n",
    "        axes[0].text(i, v + 5, f'{v:.0f}s', ha='center', va='bottom', fontweight='bold')\n",
    "    \n",
    "    # Box plot (distribution)\n",
    "    df_success.boxplot(column='execution_time_seconds', by='approach', ax=axes[1])\n",
    "    axes[1].set_title('Execution Time Distribution', fontsize=12, fontweight='bold')\n",
    "    axes[1].set_ylabel('Time (seconds)')\n",
    "    axes[1].set_xlabel('Approach')\n",
    "    plt.suptitle('')  # Remove the default title\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # Statistics\n",
    "    print(\"\\n=== EXECUTION TIME STATISTICS ===\")\n",
    "    print(df_success.groupby('approach')['execution_time_seconds'].describe().round(2))\n",
    "else:\n",
    "    print(\"No successful runs to display\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7758f016",
   "metadata": {},
   "source": [
    "## 2. Speedup Metric (Realtime Factor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac1f2073",
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(df_success) > 0:\n",
    "    fig, ax = plt.subplots(figsize=(10, 5))\n",
    "    \n",
    "    # Calculate speedup (audio_duration / execution_time)\n",
    "    df_success['speedup'] = df_success['duration_seconds'] / df_success['execution_time_seconds']\n",
    "    \n",
    "    # Plot\n",
    "    speedup_by_approach = df_success.groupby('approach')['speedup'].mean()\n",
    "    speedup_by_approach.plot(kind='bar', ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c'])\n",
    "    \n",
    "    ax.set_title('Average Speedup (Realtime Factor)', fontsize=12, fontweight='bold')\n",
    "    ax.set_ylabel('Speedup (1x = realtime)')\n",
    "    ax.set_xlabel('Approach')\n",
    "    ax.axhline(y=1.0, color='red', linestyle='--', label='Realtime (1x)')\n",
    "    ax.legend()\n",
    "    ax.tick_params(axis='x', rotation=45)\n",
    "    \n",
    "    # Add value labels\n",
    "    for i, v in enumerate(speedup_by_approach):\n",
    "        ax.text(i, v + 0.01, f'{v:.3f}x', ha='center', va='bottom', fontweight='bold')\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    print(\"\\n=== SPEEDUP STATISTICS ===\")\n",
    "    print(f\"(1x = realtime, <1x = slower than realtime)\")\n",
    "    print(speedup_by_approach.round(4))\n",
    "else:\n",
    "    print(\"No data available\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63fdc692",
   "metadata": {},
   "source": [
    "## 3. Speaker Detection Accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b135268c",
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(df_success) > 0:\n",
    "    fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
    "    \n",
    "    # Speaker count statistics\n",
    "    speaker_stats = df_success.groupby('approach')['n_speakers'].agg(['mean', 'std', 'min', 'max'])\n",
    "    \n",
    "    # Bar chart with error bars\n",
    "    speaker_stats['mean'].plot(kind='bar', ax=axes[0], color=['#1f77b4', '#ff7f0e', '#2ca02c'],\n",
    "                                yerr=speaker_stats['std'], capsize=5)\n",
    "    axes[0].set_title('Average Speaker Count Detection', fontsize=12, fontweight='bold')\n",
    "    axes[0].set_ylabel('Number of Speakers')\n",
    "    axes[0].set_xlabel('Approach')\n",
    "    axes[0].axhline(y=4, color='green', linestyle='--', label='Expected (4)')\n",
    "    axes[0].set_ylim([3, 5])\n",
    "    axes[0].legend()\n",
    "    axes[0].tick_params(axis='x', rotation=45)\n",
    "    \n",
    "    # Distribution\n",
    "    speaker_by_approach = [df_success[df_success['approach'] == app]['n_speakers'].values \n",
    "                          for app in df_success['approach'].unique()]\n",
    "    axes[1].boxplot(speaker_by_approach, labels=df_success['approach'].unique())\n",
    "    axes[1].set_title('Speaker Count Distribution', fontsize=12, fontweight='bold')\n",
    "    axes[1].set_ylabel('Number of Speakers')\n",
    "    axes[1].axhline(y=4, color='green', linestyle='--', label='Expected (4)')\n",
    "    axes[1].legend()\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    print(\"\\n=== SPEAKER COUNT STATISTICS ===\")\n",
    "    print(speaker_stats.round(2))\n",
    "else:\n",
    "    print(\"No data available\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7ea07dd",
   "metadata": {},
   "source": [
    "## 4. Per-File Performance Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d7a0d21",
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(df_success) > 0:\n",
    "    # Pivot table: files vs approaches\n",
    "    exec_time_pivot = df_success.pivot_table(\n",
    "        values='execution_time_seconds',\n",
    "        index='input_file',\n",
    "        columns='approach',\n",
    "        aggfunc='mean'\n",
    "    )\n",
    "    \n",
    "    print(\"\\n=== EXECUTION TIME BY FILE (seconds) ===\")\n",
    "    print(exec_time_pivot.round(1))\n",
    "    \n",
    "    # Visualization\n",
    "    if not exec_time_pivot.empty:\n",
    "        fig, ax = plt.subplots(figsize=(12, 6))\n",
    "        exec_time_pivot.plot(kind='bar', ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c'])\n",
    "        ax.set_title('Execution Time per Test File', fontsize=12, fontweight='bold')\n",
    "        ax.set_ylabel('Time (seconds)')\n",
    "        ax.set_xlabel('Input File')\n",
    "        ax.legend(title='Approach')\n",
    "        plt.tight_layout()\n",
    "        plt.show()\n",
    "else:\n",
    "    print(\"No data available\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5daa4901",
   "metadata": {},
   "source": [
    "## 5. Heatmap: All Metrics Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f618616b",
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(df_success) > 0:\n",
    "    # Create normalized metrics for heatmap\n",
    "    heatmap_data = df_success.groupby('approach').agg({\n",
    "        'execution_time_seconds': 'mean',\n",
    "        'n_speakers': 'mean',\n",
    "        'speedup': 'mean',\n",
    "        'input_file': 'count'  # Number of tests\n",
    "    }).round(2)\n",
    "    \n",
    "    heatmap_data.columns = ['Avg Exec Time (s)', 'Avg Speaker Count', 'Speedup', 'Tests Run']\n",
    "    \n",
    "    # Normalize for visualization (0-1 scale)\n",
    "    heatmap_normalized = heatmap_data.copy()\n",
    "    for col in heatmap_normalized.columns:\n",
    "        min_val = heatmap_normalized[col].min()\n",
    "        max_val = heatmap_normalized[col].max()\n",
    "        if max_val > min_val:\n",
    "            heatmap_normalized[col] = (heatmap_normalized[col] - min_val) / (max_val - min_val)\n",
    "    \n",
    "    # Plot\n",
    "    fig, ax = plt.subplots(figsize=(10, 5))\n",
    "    sns.heatmap(heatmap_normalized.T, annot=heatmap_data.T, fmt='.2f', cmap='RdYlGn_r',\n",
    "                cbar_kws={'label': 'Normalized Score'}, ax=ax)\n",
    "    ax.set_title('Approach Comparison Heatmap', fontsize=12, fontweight='bold')\n",
    "    ax.set_xlabel('Approach')\n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    print(\"\\n=== METRICS SUMMARY ===\")\n",
    "    print(heatmap_data)\n",
    "else:\n",
    "    print(\"No data available\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1661fb4f",
   "metadata": {},
   "source": [
    "## 6. Approach Characteristics Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4aefd7a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Summary characteristics of each approach\n",
    "approach_info = {\n",
    "    'ica': {\n",
    "        'Separation': 'FastICA',\n",
    "        'DoA Method': 'Mixing matrix energy ratios',\n",
    "        'Speed': 'Fast',\n",
    "        'ToI Priority': 'Spatial + Energy + Language',\n",
    "        'Best For': 'Clean environments'\n",
    "    },\n",
    "    'frankenstein': {\n",
    "        'Separation': 'FastICA',\n",
    "        'DoA Method': 'None (amplitude panning)',\n",
    "        'Speed': 'Fast',\n",
    "        'ToI Priority': 'English language (heavy bias)',\n",
    "        'Best For': 'Multilingual targets'\n",
    "    },\n",
    "    'ica_deeplearning': {\n",
    "        'Separation': 'PCA+ICA (Pass 1) + SepFormer (Pass 2)',\n",
    "        'DoA Method': 'Mixing matrix (Pass 1 only)',\n",
    "        'Speed': 'Slow/GPU-optimized',\n",
    "        'ToI Priority': 'Spatial + Energy + Language',\n",
    "        'Best For': 'Overlapping speech'\n",
    "    }\n",
    "}\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"APPROACH CHARACTERISTICS SUMMARY\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "for approach, chars in approach_info.items():\n",
    "    print(f\"\\n{approach.upper()}:\")\n",
    "    print(\"-\" * 70)\n",
    "    for key, value in chars.items():\n",
    "        print(f\"  {key:20s}: {value}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2406760e",
   "metadata": {},
   "source": [
    "## 7. Error Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ec56743",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_failed = df[df['status'] == 'FAILED']\n",
    "\n",
    "if len(df_failed) > 0:\n",
    "    print(f\"\\n=== FAILED RUNS: {len(df_failed)} ===\")\n",
    "    for idx, row in df_failed.iterrows():\n",
    "        print(f\"\\nFile: {row['input_file']}\")\n",
    "        print(f\"Approach: {row['approach']}\")\n",
    "        print(f\"Error: {row.get('error', 'Unknown')}\")\n",
    "else:\n",
    "    print(\"\\n✅ No failed runs - all approaches successful!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "670b3204",
   "metadata": {},
   "source": [
    "## 8. Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fc31327f",
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(df_success) > 0:\n",
    "    print(\"\\n\" + \"=\"*70)\n",
    "    print(\"APPROACH SELECTION RECOMMENDATIONS\")\n",
    "    print(\"=\"*70)\n",
    "    \n",
    "    # Fastest\n",
    "    fastest = df_success.groupby('approach')['execution_time_seconds'].mean().idxmin()\n",
    "    print(f\"\\n⚡ FASTEST: {fastest.upper()}\")\n",
    "    print(f\"   Avg time: {df_success[df_success['approach']==fastest]['execution_time_seconds'].mean():.1f}s\")\n",
    "    print(f\"   Use when: You need realtime or near-realtime processing\")\n",
    "    \n",
    "    # Best for multilingual\n",
    "    print(f\"\\n🌍 BEST FOR MULTILINGUAL: frankenstein\")\n",
    "    print(f\"   Heavy English bias helps when target speaker is known to be English\")\n",
    "    \n",
    "    # Best for complex\n",
    "    print(f\"\\n📊 BEST FOR OVERLAPPING SPEECH: ica_deeplearning\")\n",
    "    print(f\"   Two-pass approach handles temporal overlap better\")\n",
    "    print(f\"   Good for: multi-speaker conversations, active background\")\n",
    "    \n",
    "    # Balanced\n",
    "    print(f\"\\n⚖️  BALANCED CHOICE: ica\")\n",
    "    print(f\"   Good performance + reasonable speed\")\n",
    "    print(f\"   Spatial information helps talker selection\")\n",
    "    \n",
    "    print(\"\\n\" + \"=\"*70)\n",
    "else:\n",
    "    print(\"No data available for recommendations\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "400ed39e",
   "metadata": {},
   "source": [
    "## 9. Export Summary Report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "728bc904",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a summary report\n",
    "if len(df_success) > 0:\n",
    "    summary_report = f\"\"\"\n",
    "    \n",
    "=================================================================\n",
    "MULTI-TALKER AUDIO SOURCE SEPARATION BENCHMARK REPORT\n",
    "=================================================================\n",
    "\n",
    "Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n",
    "\n",
    "--- OVERALL STATISTICS ---\n",
    "Total Runs: {len(df)}\n",
    "Successful: {len(df_success)}\n",
    "Failed: {len(df_failed)}\n",
    "\n",
    "--- EXECUTION TIME ---\n",
    "{df_success.groupby('approach')['execution_time_seconds'].agg(['mean', 'min', 'max']).round(1).to_string()}\n",
    "\n",
    "--- SPEAKER DETECTION ---\n",
    "{df_success.groupby('approach')['n_speakers'].describe().round(2).to_string()}\n",
    "\n",
    "--- SPEEDUP (Realtime Factor) ---\n",
    "{df_success.groupby('approach')['speedup'].mean().round(4).to_string()}\n",
    "\n",
    "--- RECOMMENDATION ---\n",
    "Fastest Approach: {df_success.groupby('approach')['execution_time_seconds'].mean().idxmin().upper()}\n",
    "Best for Multilingual: frankenstein (English priority)\n",
    "Best for Overlapping: ica_deeplearning (Two-pass)\n",
    "Balanced: ica (Speed + Spatial Info)\n",
    "\n",
    "=================================================================\n",
    "    \"\"\"\n",
    "    \n",
    "    print(summary_report)\n",
    "    \n",
    "    # Save to file\n",
    "    report_path = RESULTS_DIR / 'BENCHMARK_REPORT.txt'\n",
    "    with open(report_path, 'w') as f:\n",
    "        f.write(summary_report)\n",
    "    print(f\"\\n✅ Report saved to: {report_path}\")\n",
    "else:\n",
    "    print(\"No data available\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "audio-2026 (3.12.7)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}