{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Correlations Example\n", "\n", "Clean, reproducible correlation analysis for Hyperview, DAT, Intuition-1, and EnMAP submissions.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How To Use\n", "\n", "1. Update paths in the configuration cells below.\n", "2. Run cells from top to bottom.\n", "3. The notebook will generate:\n", " - `metrics.xlsx` (aggregated `all_metrics.json` from run folders),\n", " - `correlation_results.xlsx` (cross-split PLCC/SRCC/RMSE + custom score).\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from openpyxl import Workbook\n", "from scipy.stats import pearsonr, spearmanr\n", "from sklearn.metrics import mean_squared_error\n", "\n", "\n", "# Shared utility functions used by all sections.\n", "def compute_plcc_srcc_rmse(a: np.ndarray, b: np.ndarray) -> tuple[float, float, float]:\n", " \"\"\"Return PLCC, SRCC, and RMSE for two equally sized arrays.\"\"\"\n", " plcc = pearsonr(a, b)[0]\n", " srcc = spearmanr(a, b)[0]\n", " rmse = float(np.sqrt(mean_squared_error(a, b)))\n", " return plcc, srcc, rmse\n", "\n", "\n", "def load_submission_flat(csv_path: Path) -> np.ndarray | None:\n", " \"\"\"Load submission CSV, drop `sample_index` if present, and flatten to 1D.\"\"\"\n", " if not csv_path.exists():\n", " return None\n", " return pd.read_csv(csv_path).drop(columns=['sample_index'], errors='ignore').values.flatten()\n", "\n", "\n", "def load_custom_score(json_path: Path) -> float | str:\n", " \"\"\"Load `custom` metric from JSON file; return 'N/A' if missing.\"\"\"\n", " if not json_path.exists():\n", " return 'N/A'\n", " with json_path.open('r', encoding='utf-8') as f:\n", " return json.load(f).get('custom', 'N/A')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1) Export Metrics Workbook\n", "\n", "Collect `all_metrics.json` from each run directory and export grouped sheets (`P`, `K`, `Mg`, `pH`, `stats`).\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Saved metrics workbook to: /mnt/d/new_runs_decoder_v2/metrics.xlsx\n", "Number of runs exported: 43\n" ] } ], "source": [ "# Configure source directory with run folders and output Excel path.\n", "RUNS_DIR = Path('/mnt/d/new_runs_decoder_v2')\n", "OUTPUT_METRICS_XLSX = RUNS_DIR / 'metrics.xlsx'\n", "\n", "# Columns to export per worksheet.\n", "SHEETS_DEFINITIONS = {\n", " 'P': [\n", " 'run_name',\n", " 'P_avg_acc', 'P_acc', 'P_f1', 'P_mcc', 'P_kappa',\n", " 'P_r2', 'P_mse', 'P_mae',\n", " ],\n", " 'K': [\n", " 'run_name',\n", " 'K_avg_acc', 'K_acc', 'K_f1', 'K_mcc', 'K_kappa',\n", " 'K_r2', 'K_mse', 'K_mae',\n", " ],\n", " 'Mg': [\n", " 'run_name',\n", " 'Mg_avg_acc', 'Mg_acc', 'Mg_f1', 'Mg_mcc', 'Mg_kappa',\n", " 'Mg_r2', 'Mg_mse', 'Mg_mae',\n", " ],\n", " 'pH': [\n", " 'run_name',\n", " 'pH_avg_acc', 'pH_acc', 'pH_f1', 'pH_mcc', 'pH_kappa',\n", " 'pH_r2', 'pH_mse', 'pH_mae',\n", " ],\n", " 'stats': [\n", " 'run_name',\n", " 'mean_avg_acc', 'std_avg_acc',\n", " 'mean_acc', 'std_acc',\n", " 'mean_mcc', 'std_mcc',\n", " 'mean_f1', 'std_f1',\n", " 'P_score', 'K_score', 'Mg_score', 'pH_score',\n", " 'custom',\n", " ],\n", "}\n", "\n", "\n", "def export_metrics_workbook(runs_dir: Path, output_xlsx: Path) -> int:\n", " \"\"\"Scan run folders and export JSON metrics to a multi-sheet Excel workbook.\"\"\"\n", " rows: list[dict] = []\n", "\n", " for run_path in sorted(runs_dir.iterdir()):\n", " if not run_path.is_dir():\n", " continue\n", " metrics_path = run_path / 'all_metrics.json'\n", " if not metrics_path.exists():\n", " continue\n", "\n", " with metrics_path.open('r', encoding='utf-8') as f:\n", " metrics = json.load(f)\n", "\n", " row = {'run_name': run_path.name}\n", " row.update(metrics)\n", " rows.append(row)\n", "\n", " wb = Workbook()\n", " wb.remove(wb.active)\n", "\n", " for sheet_name, columns in SHEETS_DEFINITIONS.items():\n", " ws = wb.create_sheet(title=sheet_name)\n", " ws.append(columns)\n", " for row in rows:\n", " ws.append([row.get(col, '') for col in columns])\n", "\n", " wb.save(output_xlsx)\n", " return len(rows)\n", "\n", "\n", "n_runs = export_metrics_workbook(RUNS_DIR, OUTPUT_METRICS_XLSX)\n", "print(f'Saved metrics workbook to: {OUTPUT_METRICS_XLSX}')\n", "print(f'Number of runs exported: {n_runs}')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2) Correlation Study Across Splits\n", "\n", "Compute PLCC/SRCC/RMSE and attach `custom` score for selected model directories.\n", "\n", "Comparisons included:\n", "- Hyperview submission vs Hyperview GT,\n", "- Intuition submission vs Hyperview GT,\n", "- DAT submission vs EnMAP GT,\n", "- Intuition vs Hyperview submission,\n", "- EnMAP (per AOI) vs EnMAP GT,\n", "- EnMAP (per AOI) vs DAT submission.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_base_decoder_name_UperNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_64_epochs_100_lr_5e-05\n", "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_base_decoder_name_UNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_64_epochs_100_lr_5e-05\n", "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_large_decoder_name_UperNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_32_epochs_100_lr_5e-05\n", "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_large_decoder_name_UNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_32_epochs_100_lr_5e-05\n", "Saved correlation workbook to: /home/jsadel/fastEO_uc4/correlation_results.xlsx\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
modelhyperview_plcchyperview_srccdat_plccdat_srcc
0backbone_name_terramind_v1_base_decoder_name_U...0.9223760.9341780.9467350.958205
1backbone_name_terramind_v1_base_decoder_name_U...0.9230120.9324050.9462470.955108
2backbone_name_terramind_v1_large_decoder_name_...0.9159830.9267090.9365360.945435
3backbone_name_terramind_v1_large_decoder_name_...0.9189010.9302180.9407730.952539
\n", "
" ], "text/plain": [ " model hyperview_plcc \\\n", "0 backbone_name_terramind_v1_base_decoder_name_U... 0.922376 \n", "1 backbone_name_terramind_v1_base_decoder_name_U... 0.923012 \n", "2 backbone_name_terramind_v1_large_decoder_name_... 0.915983 \n", "3 backbone_name_terramind_v1_large_decoder_name_... 0.918901 \n", "\n", " hyperview_srcc dat_plcc dat_srcc \n", "0 0.934178 0.946735 0.958205 \n", "1 0.932405 0.946247 0.955108 \n", "2 0.926709 0.936536 0.945435 \n", "3 0.930218 0.940773 0.952539 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Input GT paths.\n", "HYPERVIEW_GT_PATH = Path('/home/jsadel/fast_eo/hyperview_data/test_gt.csv')\n", "ENMAP_GT_PATH = Path('/home/jsadel/fast_eo/hyperview_data/enmap_test_gt.csv')\n", "\n", "# Select model directories to compare.\n", "MODEL_DIRS = [\n", " Path('/mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_base_decoder_name_UperNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_64_epochs_100_lr_5e-05'),\n", " Path('/mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_base_decoder_name_UNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_64_epochs_100_lr_5e-05'),\n", " Path('/mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_large_decoder_name_UperNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_32_epochs_100_lr_5e-05'),\n", " Path('/mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_large_decoder_name_UNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_32_epochs_100_lr_5e-05'),\n", "]\n", "\n", "# EnMAP AOIs (one submission file per AOI).\n", "AOIS = [\n", " '20231106T103216Z',\n", " '20231109T101043Z',\n", " '20231117T101736Z',\n", " '20240324T103513Z',\n", " '20240331T101712Z',\n", " '20240427T101706Z',\n", " '20240501T102037Z',\n", " '20240505T102406Z',\n", " '20240702T102823Z',\n", "]\n", "\n", "CORRELATION_OUTPUT_XLSX = Path('correlation_results.xlsx')\n", "\n", "# Load GT arrays once and flatten values.\n", "hyperview_gt = pd.read_csv(HYPERVIEW_GT_PATH).iloc[:, 1:].values.flatten()\n", "enmap_gt = pd.read_csv(ENMAP_GT_PATH).iloc[:, 1:].values.flatten()\n", "\n", "summary_rows: list[dict] = []\n", "\n", "with pd.ExcelWriter(CORRELATION_OUTPUT_XLSX) as writer:\n", " for idx, model_dir in enumerate(MODEL_DIRS, 1):\n", " print(f'Processing: {model_dir}')\n", "\n", " dat_pred = load_submission_flat(model_dir / 'test_dat_submission.csv')\n", " hv_pred = load_submission_flat(model_dir / 'submission.csv')\n", " i1_pred = load_submission_flat(model_dir / 'test_intuition_submission.csv')\n", "\n", " rows: list[list] = []\n", "\n", " def maybe_metric(metric_name: str, ref: np.ndarray | None, pred: np.ndarray | None, score: float | str = 'N/A') -> None:\n", " if ref is None or pred is None:\n", " rows.append([metric_name, 'N/A', 'N/A', 'N/A', score])\n", " return\n", " plcc, srcc, rmse = compute_plcc_srcc_rmse(pred, ref)\n", " rows.append([metric_name, plcc, srcc, rmse, score])\n", "\n", " maybe_metric(\n", " 'Hyperview vs Hyperview GT',\n", " hyperview_gt,\n", " hv_pred,\n", " load_custom_score(model_dir / 'all_metrics.json'),\n", " )\n", " maybe_metric(\n", " 'Intuition vs Hyperview GT',\n", " hyperview_gt,\n", " i1_pred,\n", " load_custom_score(model_dir / 'test_intuition_all_metrics.json'),\n", " )\n", " maybe_metric(\n", " 'DAT vs EnMAP GT',\n", " enmap_gt,\n", " dat_pred,\n", " load_custom_score(model_dir / 'test_dat_all_metrics.json'),\n", " )\n", " maybe_metric('Intuition vs Hyperview Submission', hv_pred, i1_pred)\n", "\n", " for aoi in AOIS:\n", " enmap_pred = load_submission_flat(model_dir / f'test_enmap_{aoi}_submission.csv')\n", " enmap_score = load_custom_score(model_dir / f'test_enmap_{aoi}_all_metrics.json')\n", "\n", " maybe_metric(f'EnMAP {aoi} vs EnMAP GT', enmap_gt, enmap_pred, enmap_score)\n", " maybe_metric(f'EnMAP {aoi} vs DAT', dat_pred, enmap_pred)\n", "\n", " df = pd.DataFrame(rows, columns=['Metric', 'PLCC', 'SRCC', 'RMSE', 'Score'])\n", " df.to_excel(writer, sheet_name=f'Model_{idx:02d}', index=False)\n", "\n", " # Compact summary row for quick comparison across models.\n", " hv_row = df[df['Metric'] == 'Hyperview vs Hyperview GT'].head(1)\n", " dat_row = df[df['Metric'] == 'DAT vs EnMAP GT'].head(1)\n", " summary_rows.append(\n", " {\n", " 'model': model_dir.name,\n", " 'hyperview_plcc': hv_row['PLCC'].iloc[0] if not hv_row.empty else 'N/A',\n", " 'hyperview_srcc': hv_row['SRCC'].iloc[0] if not hv_row.empty else 'N/A',\n", " 'dat_plcc': dat_row['PLCC'].iloc[0] if not dat_row.empty else 'N/A',\n", " 'dat_srcc': dat_row['SRCC'].iloc[0] if not dat_row.empty else 'N/A',\n", " }\n", " )\n", "\n", " pd.DataFrame(summary_rows).to_excel(writer, sheet_name='Summary', index=False)\n", "\n", "print(f'Saved correlation workbook to: {CORRELATION_OUTPUT_XLSX.resolve()}')\n", "pd.DataFrame(summary_rows)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Notes\n", "\n", "- This notebook keeps the original workflow but removes duplicated cells.\n", "- To test another experiment group, only edit `MODEL_DIRS` and rerun the last section.\n" ] } ], "metadata": { "kernelspec": { "display_name": "fastEO", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.14" } }, "nbformat": 4, "nbformat_minor": 2 }