{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Correlations Example\n", "\n", "Clean, reproducible correlation analysis for Hyperview, DAT, Intuition-1, and EnMAP submissions.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How To Use\n", "\n", "1. Update paths in the configuration cells below.\n", "2. Run cells from top to bottom.\n", "3. The notebook will generate:\n", " - `metrics.xlsx` (aggregated `all_metrics.json` from run folders),\n", " - `correlation_results.xlsx` (cross-split PLCC/SRCC/RMSE + custom score).\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from openpyxl import Workbook\n", "from scipy.stats import pearsonr, spearmanr\n", "from sklearn.metrics import mean_squared_error\n", "\n", "\n", "# Shared utility functions used by all sections.\n", "def compute_plcc_srcc_rmse(a: np.ndarray, b: np.ndarray) -> tuple[float, float, float]:\n", " \"\"\"Return PLCC, SRCC, and RMSE for two equally sized arrays.\"\"\"\n", " plcc = pearsonr(a, b)[0]\n", " srcc = spearmanr(a, b)[0]\n", " rmse = float(np.sqrt(mean_squared_error(a, b)))\n", " return plcc, srcc, rmse\n", "\n", "\n", "def load_submission_flat(csv_path: Path) -> np.ndarray | None:\n", " \"\"\"Load submission CSV, drop `sample_index` if present, and flatten to 1D.\"\"\"\n", " if not csv_path.exists():\n", " return None\n", " return pd.read_csv(csv_path).drop(columns=['sample_index'], errors='ignore').values.flatten()\n", "\n", "\n", "def load_custom_score(json_path: Path) -> float | str:\n", " \"\"\"Load `custom` metric from JSON file; return 'N/A' if missing.\"\"\"\n", " if not json_path.exists():\n", " return 'N/A'\n", " with json_path.open('r', encoding='utf-8') as f:\n", " return json.load(f).get('custom', 'N/A')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1) Export Metrics Workbook\n", "\n", "Collect `all_metrics.json` from each run directory and export grouped sheets (`P`, `K`, `Mg`, `pH`, `stats`).\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Saved metrics workbook to: /mnt/d/new_runs_decoder_v2/metrics.xlsx\n", "Number of runs exported: 43\n" ] } ], "source": [ "# Configure source directory with run folders and output Excel path.\n", "RUNS_DIR = Path('/mnt/d/new_runs_decoder_v2')\n", "OUTPUT_METRICS_XLSX = RUNS_DIR / 'metrics.xlsx'\n", "\n", "# Columns to export per worksheet.\n", "SHEETS_DEFINITIONS = {\n", " 'P': [\n", " 'run_name',\n", " 'P_avg_acc', 'P_acc', 'P_f1', 'P_mcc', 'P_kappa',\n", " 'P_r2', 'P_mse', 'P_mae',\n", " ],\n", " 'K': [\n", " 'run_name',\n", " 'K_avg_acc', 'K_acc', 'K_f1', 'K_mcc', 'K_kappa',\n", " 'K_r2', 'K_mse', 'K_mae',\n", " ],\n", " 'Mg': [\n", " 'run_name',\n", " 'Mg_avg_acc', 'Mg_acc', 'Mg_f1', 'Mg_mcc', 'Mg_kappa',\n", " 'Mg_r2', 'Mg_mse', 'Mg_mae',\n", " ],\n", " 'pH': [\n", " 'run_name',\n", " 'pH_avg_acc', 'pH_acc', 'pH_f1', 'pH_mcc', 'pH_kappa',\n", " 'pH_r2', 'pH_mse', 'pH_mae',\n", " ],\n", " 'stats': [\n", " 'run_name',\n", " 'mean_avg_acc', 'std_avg_acc',\n", " 'mean_acc', 'std_acc',\n", " 'mean_mcc', 'std_mcc',\n", " 'mean_f1', 'std_f1',\n", " 'P_score', 'K_score', 'Mg_score', 'pH_score',\n", " 'custom',\n", " ],\n", "}\n", "\n", "\n", "def export_metrics_workbook(runs_dir: Path, output_xlsx: Path) -> int:\n", " \"\"\"Scan run folders and export JSON metrics to a multi-sheet Excel workbook.\"\"\"\n", " rows: list[dict] = []\n", "\n", " for run_path in sorted(runs_dir.iterdir()):\n", " if not run_path.is_dir():\n", " continue\n", " metrics_path = run_path / 'all_metrics.json'\n", " if not metrics_path.exists():\n", " continue\n", "\n", " with metrics_path.open('r', encoding='utf-8') as f:\n", " metrics = json.load(f)\n", "\n", " row = {'run_name': run_path.name}\n", " row.update(metrics)\n", " rows.append(row)\n", "\n", " wb = Workbook()\n", " wb.remove(wb.active)\n", "\n", " for sheet_name, columns in SHEETS_DEFINITIONS.items():\n", " ws = wb.create_sheet(title=sheet_name)\n", " ws.append(columns)\n", " for row in rows:\n", " ws.append([row.get(col, '') for col in columns])\n", "\n", " wb.save(output_xlsx)\n", " return len(rows)\n", "\n", "\n", "n_runs = export_metrics_workbook(RUNS_DIR, OUTPUT_METRICS_XLSX)\n", "print(f'Saved metrics workbook to: {OUTPUT_METRICS_XLSX}')\n", "print(f'Number of runs exported: {n_runs}')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2) Correlation Study Across Splits\n", "\n", "Compute PLCC/SRCC/RMSE and attach `custom` score for selected model directories.\n", "\n", "Comparisons included:\n", "- Hyperview submission vs Hyperview GT,\n", "- Intuition submission vs Hyperview GT,\n", "- DAT submission vs EnMAP GT,\n", "- Intuition vs Hyperview submission,\n", "- EnMAP (per AOI) vs EnMAP GT,\n", "- EnMAP (per AOI) vs DAT submission.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_base_decoder_name_UperNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_64_epochs_100_lr_5e-05\n", "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_base_decoder_name_UNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_64_epochs_100_lr_5e-05\n", "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_large_decoder_name_UperNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_32_epochs_100_lr_5e-05\n", "Processing: /mnt/d/new_runs_decoder_v2/backbone_name_terramind_v1_large_decoder_name_UNetDecoder_output_size_4_mask_none_fill_last_with_150_True_target_scaling_std_batch_size_32_epochs_100_lr_5e-05\n", "Saved correlation workbook to: /home/jsadel/fastEO_uc4/correlation_results.xlsx\n" ] }, { "data": { "text/html": [ "
| \n", " | model | \n", "hyperview_plcc | \n", "hyperview_srcc | \n", "dat_plcc | \n", "dat_srcc | \n", "
|---|---|---|---|---|---|
| 0 | \n", "backbone_name_terramind_v1_base_decoder_name_U... | \n", "0.922376 | \n", "0.934178 | \n", "0.946735 | \n", "0.958205 | \n", "
| 1 | \n", "backbone_name_terramind_v1_base_decoder_name_U... | \n", "0.923012 | \n", "0.932405 | \n", "0.946247 | \n", "0.955108 | \n", "
| 2 | \n", "backbone_name_terramind_v1_large_decoder_name_... | \n", "0.915983 | \n", "0.926709 | \n", "0.936536 | \n", "0.945435 | \n", "
| 3 | \n", "backbone_name_terramind_v1_large_decoder_name_... | \n", "0.918901 | \n", "0.930218 | \n", "0.940773 | \n", "0.952539 | \n", "