WEO-SAS
/

sen2sr

@@ -15,5 +15,6 @@
     "hard_constraint_bands": null,
     "weights_file": "model.safetensor",
     "hard_constraint_file": "hard_constraint.safetensor",
-    "description": "SEN2SRLite RGBN x4: Sentinel-2 RGBN 10m -> 2.5m super-resolution (4x, CNN)"
-}

     "hard_constraint_bands": null,
     "weights_file": "model.safetensor",
     "hard_constraint_file": "hard_constraint.safetensor",
+    "description": "SEN2SRLite RGBN x4: Sentinel-2 RGBN 10m -> 2.5m super-resolution (4x, CNN)",
+    "variant": "main"
+}

evaluate.py ADDED Viewed

	@@ -0,0 +1,408 @@

+"""
+evaluate.py
+===========
+Evaluate WEO-SAS/sen2sr models using the opensr-test benchmark suite, then
+update the HuggingFace model card Evaluation Results (model-index YAML) for
+the WEO-SAS/sen2sr repo.
+This script lives inside each branch of WEO-SAS/sen2sr and is meant to be run
+from the directory returned by snapshot_download():
+    from huggingface_hub import snapshot_download
+    local_dir = snapshot_download("WEO-SAS/sen2sr")   # or specify revision
+    import subprocess, sys
+    subprocess.run([sys.executable, f"{local_dir}/evaluate.py", "--push"])
+Or from the command line after cloning/downloading:
+    python evaluate.py --push --token hf_...
+Requirements
+------------
+    pip install opensr-test huggingface_hub sen2sr safetensors
+Outputs
+-------
+1. A CSV file with per-sample metric values.
+2. Updated model-index YAML in the WEO-SAS/sen2sr main-branch README.md,
+   using the HuggingFace EvalResult / ModelCardData API.
+HF Evaluation Results format
+-----------------------------
+Each result is keyed by (task_type, dataset_type, metric_type) and indexed
+under the model variant name.  Running this script from different variants
+accumulates results in the shared README on the main branch.
+"""
+from __future__ import annotations
+import argparse
+import csv
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional
+import numpy as np
+import torch
+# ---------------------------------------------------------------------------
+# Metric metadata
+# ---------------------------------------------------------------------------
+METRIC_COLS = [
+    "reflectance", "spectral", "spatial",
+    "synthesis", "hallucination", "omission", "improvement",
+]
+DATASETS = ["naip", "spot", "venus", "spain_crops", "spain_urban"]
+# Human-readable names used in the HF model card
+DATASET_NAMES = {
+    "naip":         "NAIP",
+    "spot":         "SPOT",
+    "venus":        "Venus",
+    "spain_crops":  "Spain Crops",
+    "spain_urban":  "Spain Urban",
+}
+METRIC_NAMES = {
+    "reflectance":   "Reflectance Distance (L1)",
+    "spectral":      "Spectral Angle Distance",
+    "spatial":       "Phase Correlation Error",
+    "synthesis":     "Synthesis Score",
+    "hallucination": "Hallucination Score",
+    "omission":      "Omission Score",
+    "improvement":   "Improvement Score",
+}
+# ---------------------------------------------------------------------------
+# Model loading
+# ---------------------------------------------------------------------------
+def load_model_from_local(local_dir: str):
+    """Load the model from the snapshot directory."""
+    config_path = os.path.join(local_dir, "config.json")
+    with open(config_path) as f:
+        config = json.load(f)
+    if local_dir not in sys.path:
+        sys.path.insert(0, local_dir)
+    # Clear any cached module from a previous variant
+    for mod in ["model", "sen2sr_pt", "predictor", "base"]:
+        sys.modules.pop(mod, None)
+    # Dynamically load model.py from the local dir
+    import importlib.util
+    spec   = importlib.util.spec_from_file_location("model", os.path.join(local_dir, "model.py"))
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["model"] = module
+    spec.loader.exec_module(module)
+    return module.Model(local_dir=local_dir), config
+# ---------------------------------------------------------------------------
+# Inference
+# ---------------------------------------------------------------------------
+def run_sr(model, lr_np: np.ndarray, in_channels: int) -> np.ndarray:
+    """
+    Run SR on a single LR patch.
+    lr_np      : (C_avail, H, W) float32 in [0, 1]
+    in_channels: channels the model expects
+    Returns    : (C_out, H*sf, W*sf) float32
+    """
+    C_avail = lr_np.shape[0]
+    if in_channels == C_avail:
+        inp = lr_np
+    elif in_channels > C_avail:
+        pad = np.zeros((in_channels - C_avail,) + lr_np.shape[1:], dtype=np.float32)
+        inp = np.concatenate([lr_np, pad], axis=0)
+    else:
+        inp = lr_np[:in_channels]
+    return model.predict(inp)
+# ---------------------------------------------------------------------------
+# Per-dataset evaluation
+# ---------------------------------------------------------------------------
+def evaluate_dataset(
+    model,
+    in_channels:  int,
+    dataset_name: str,
+    max_samples:  Optional[int] = None,
+    verbose:      bool = True,
+) -> Dict[str, float]:
+    """
+    Evaluate model on one opensr-test dataset.
+    Returns dict of metric_name → mean_value (nan if unavailable).
+    """
+    try:
+        import opensr_test
+    except ImportError:
+        raise ImportError("pip install opensr-test")
+    try:
+        dataset = opensr_test.load(dataset_name)
+    except Exception as e:
+        print(f"  [WARN] Could not load '{dataset_name}': {e}")
+        return {}
+    metrics_obj = opensr_test.Metrics()
+    accum: Dict[str, list] = {m: [] for m in METRIC_COLS}
+    n = len(dataset) if max_samples is None else min(max_samples, len(dataset))
+    for i in range(n):
+        sample = dataset[i]
+        lr = sample["lr"]
+        hr = sample["hr"]
+        if isinstance(lr, torch.Tensor):
+            lr = lr.cpu().numpy()
+        if isinstance(hr, torch.Tensor):
+            hr = hr.cpu().numpy()
+        lr = lr.astype(np.float32)
+        hr = hr.astype(np.float32)
+        if lr.ndim == 2:
+            lr = lr[np.newaxis]
+        if hr.ndim == 2:
+            hr = hr[np.newaxis]
+        try:
+            sr = run_sr(model, lr, in_channels)
+        except Exception as e:
+            print(f"    [WARN] SR failed on sample {i}: {e}")
+            continue
+        lr_t = torch.from_numpy(lr)
+        sr_t = torch.from_numpy(sr[:lr_t.shape[0]])
+        hr_t = torch.from_numpy(hr)
+        try:
+            result = metrics_obj.compute(lr=lr_t, sr=sr_t, hr=hr_t)
+        except Exception as e:
+            print(f"    [WARN] Metrics failed on sample {i}: {e}")
+            continue
+        for m in METRIC_COLS:
+            val = result.get(m)
+            if val is not None:
+                v = float(val.mean()) if hasattr(val, "mean") else float(val)
+                accum[m].append(v)
+        if verbose and (i + 1) % 10 == 0:
+            print(f"    {i+1}/{n}", end="\r")
+    if verbose:
+        print()
+    return {m: float(np.mean(vs)) if vs else float("nan") for m, vs in accum.items()}
+# ---------------------------------------------------------------------------
+# HF model card update
+# ---------------------------------------------------------------------------
+def build_eval_results(
+    variant:  str,
+    results:  Dict[str, Dict[str, float]],  # dataset → metric → value
+) -> list:
+    """
+    Build a list of huggingface_hub.EvalResult objects for one variant.
+    One EvalResult per (dataset × metric) combination.
+    """
+    from huggingface_hub import EvalResult
+    eval_results = []
+    for ds_name, metrics in results.items():
+        for metric_name, value in metrics.items():
+            if np.isnan(value):
+                continue
+            eval_results.append(
+                EvalResult(
+                    task_type        = "image-to-image",
+                    task_name        = "Super-Resolution",
+                    dataset_type     = f"opensr-test-{ds_name}",
+                    dataset_name     = DATASET_NAMES.get(ds_name, ds_name),
+                    dataset_config   = ds_name,
+                    metric_type      = metric_name,
+                    metric_name      = METRIC_NAMES.get(metric_name, metric_name),
+                    metric_value     = round(value, 6),
+                    model_name       = variant,
+                )
+            )
+    return eval_results
+def update_model_card(
+    variant:      str,
+    eval_results: list,
+    repo_id:      str = "WEO-SAS/sen2sr",
+    token:        Optional[str] = None,
+    push:         bool = False,
+) -> None:
+    """
+    Load the model card from the HF main branch, merge/replace this variant's
+    eval results, and optionally push back.
+    """
+    from huggingface_hub import ModelCard, ModelCardData
+    from huggingface_hub.repocard_data import model_index_to_eval_results, eval_results_to_model_index
+    print(f"\nLoading model card from {repo_id} (main)...")
+    try:
+        card = ModelCard.load(repo_id, token=token)
+    except Exception as e:
+        print(f"  [WARN] Could not load card: {e}. Creating empty card.")
+        card = ModelCard("---\n---\n")
+    existing: list = card.data.eval_results or []
+    # Remove old entries for this variant, keep other variants
+    kept = [r for r in existing if getattr(r, "model_name", None) != variant]
+    merged = kept + eval_results
+    card.data.eval_results = merged
+    print(f"  Model-index now has {len(merged)} EvalResult entries "
+          f"({len(eval_results)} from '{variant}', {len(kept)} from other variants).")
+    if push:
+        print(f"  Pushing updated card to {repo_id}...")
+        card.push_to_hub(repo_id, token=token)
+        print("  Done.")
+    else:
+        print("  --push not set; card not pushed. Pass --push to update HF.")
+        print("\n--- model-index YAML preview ---")
+        print(card.data.to_yaml())
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main():
+    # Detect local_dir: script is inside the snapshot directory
+    local_dir = str(Path(__file__).parent.resolve())
+    parser = argparse.ArgumentParser(
+        description="Evaluate WEO-SAS/sen2sr and update HF model card"
+    )
+    parser.add_argument(
+        "--local-dir", default=local_dir,
+        help="Path to the snapshot_download directory (default: script location)",
+    )
+    parser.add_argument(
+        "--datasets", nargs="+", default=DATASETS, choices=DATASETS,
+        help="Datasets to evaluate on (default: all 5)",
+    )
+    parser.add_argument(
+        "--max-samples", type=int, default=None,
+        help="Cap samples per dataset for a quick smoke-test",
+    )
+    parser.add_argument(
+        "--output", default=None,
+        help="CSV output path (default: sen2sr_<variant>_eval.csv in local_dir)",
+    )
+    parser.add_argument(
+        "--repo-id", default="WEO-SAS/sen2sr",
+        help="HuggingFace repo whose main-branch card to update",
+    )
+    parser.add_argument(
+        "--token", default=os.environ.get("HF_TOKEN"),
+        help="HuggingFace token (default: $HF_TOKEN)",
+    )
+    parser.add_argument(
+        "--push", action="store_true",
+        help="Push updated model card to HF after evaluation",
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true",
+        help="Print model-index YAML preview without pushing",
+    )
+    args = parser.parse_args()
+    # Load model + config
+    print(f"Loading model from {args.local_dir} ...")
+    model, config = load_model_from_local(args.local_dir)
+    variant     = config.get("variant", "unknown")
+    in_channels = config.get("in_channels", 4)
+    print(f"Variant : {variant}")
+    print(f"In-ch   : {in_channels}")
+    print(f"Desc    : {config.get('description', '')}")
+    # CSV output path
+    csv_path = args.output or os.path.join(args.local_dir, f"sen2sr_{variant}_eval.csv")
+    # Evaluate
+    all_results: Dict[str, Dict[str, float]] = {}
+    rows = []
+    for ds in args.datasets:
+        print(f"\n[{variant}] Dataset: {ds}")
+        metrics = evaluate_dataset(model, in_channels, ds, args.max_samples)
+        if not metrics:
+            continue
+        all_results[ds] = metrics
+        rows.append({"variant": variant, "dataset": ds, **metrics})
+        print(f"  {'Metric':<18} {'Value':>10}")
+        print(f"  {'-'*30}")
+        for m in METRIC_COLS:
+            arrow = "↑" if m in ("synthesis", "improvement") else "↓"
+            print(f"  {m:<18} {metrics.get(m, float('nan')):>9.4f} {arrow}")
+    # Save CSV
+    if rows:
+        fieldnames = ["variant", "dataset"] + METRIC_COLS
+        with open(csv_path, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(rows)
+        print(f"\nCSV saved: {csv_path}")
+    # Build HF EvalResult objects
+    if not all_results:
+        print("No results to push.")
+        return
+    eval_results = build_eval_results(variant, all_results)
+    print(f"\nBuilt {len(eval_results)} EvalResult entries for '{variant}'.")
+    # Update model card (optionally push)
+    update_model_card(
+        variant      = variant,
+        eval_results = eval_results,
+        repo_id      = args.repo_id,
+        token        = args.token,
+        push         = args.push and not args.dry_run,
+    )
+    # Summary table (mean across datasets)
+    print("\n" + "="*60)
+    print(f"SUMMARY — {variant} — mean across {list(all_results.keys())}")
+    print("="*60)
+    means = {
+        m: float(np.nanmean([v[m] for v in all_results.values() if m in v]))
+        for m in METRIC_COLS
+    }
+    print(f"  {'Metric':<18} {'Mean':>10}")
+    print(f"  {'-'*30}")
+    for m in METRIC_COLS:
+        arrow = "↑" if m in ("synthesis", "improvement") else "↓"
+        print(f"  {m:<18} {means.get(m, float('nan')):>9.4f} {arrow}")
+if __name__ == "__main__":
+    main()