"""Upload only essential model files to HuggingFace (best fold + metadata).

Usage: python upload_models.py [--cleanup] [--exp all|exp6_ce|exp7_supcon]
"""
import json
import os
import sys
from pathlib import Path

from dotenv import load_dotenv
from huggingface_hub import HfApi

# Carica HF_TOKEN dal .env (root progetto: resnet18/.env)
_env = Path(__file__).resolve().parent.parent.parent / ".env"
if _env.exists():
    load_dotenv(_env)

HF_REPO = "marcellorusso/orchid-ncd-models"
EXP_BASE = os.path.join(os.path.dirname(__file__), "..", "..", "experiments")
EXP_BASE = os.path.abspath(EXP_BASE)

MODELS = ["resnet18", "resnet50", "convnext_tiny", "convnext_small", "dinov2_small", "dinov2_base"]

VARIANT_DISPLAY = {
    "resnet18": "ResNet-18",
    "resnet50": "ResNet-50",
    "convnext_tiny": "ConvNeXt-Tiny",
    "convnext_small": "ConvNeXt-Small",
    "dinov2_small": "DINOv2-Small",
    "dinov2_base": "DINOv2-Base",
}

EXPERIMENTS = [
    {"prefix": "exp6_clean_split", "exp": "exp6_ce", "label": "Exp 6 (CE)"},
    {"prefix": "exp7_supcon", "exp": "exp7_supcon", "label": "Exp 7 (SupCon)"},
    {"prefix": "exp8_sphor", "exp": "exp8_sphor", "label": "Exp 8 (SpHOR)"},
    {"prefix": "exp9_ncd", "exp": "exp9_ncd", "label": "Exp 9 (NCD)"},
    {"prefix": "exp10_mgh_ncd", "exp": "exp10_mgh_ncd", "label": "Exp 10 (MGH-NCD)"},
]


def upload_model(api, model, exp, exp_prefix):
    exp_name = f"{exp_prefix}_{model}"
    local_dir = os.path.join(EXP_BASE, exp_name)
    remote_dir = f"{exp}/{exp_name}"

    results_path = os.path.join(local_dir, "results.json")
    if not os.path.exists(results_path):
        print(f"  SKIP {exp_name}: no results.json")
        return False

    results = json.load(open(results_path))
    best_fold = results.get("best_fold")
    if best_fold is None:
        print(f"  SKIP {exp_name}: no best_fold in results")
        return False

    best_pt = f"best_fold_{best_fold}.pt"
    if not os.path.exists(os.path.join(local_dir, best_pt)):
        print(f"  SKIP {exp_name}: {best_pt} not found")
        return False

    mean_f1 = results.get("mean_f1", 0)
    test_f1 = results.get("test_metrics", {}).get("f1_macro", 0)
    print(f"\n  Uploading {exp_name} (Val F1={mean_f1:.4f}, Test F1={test_f1:.4f})")

    files_to_upload = [
        ("results.json", "results.json"),
        ("config.json", "config.json"),
        (best_pt, best_pt),
    ]

    adapter_dir = os.path.join(local_dir, f"best_fold_{best_fold}_adapter")

    for local_name, remote_name in files_to_upload:
        local_path = os.path.join(local_dir, local_name)
        if not os.path.exists(local_path):
            continue
        size_kb = os.path.getsize(local_path) // 1024
        print(f"    {remote_name} ({size_kb}KB)")
        api.upload_file(
            repo_id=HF_REPO,
            path_or_fileobj=local_path,
            path_in_repo=f"{remote_dir}/{remote_name}",
        )

    if os.path.isdir(adapter_dir):
        print(f"    best_fold_{best_fold}_adapter/")
        api.upload_folder(
            repo_id=HF_REPO,
            folder_path=adapter_dir,
            path_in_repo=f"{remote_dir}/best_fold_{best_fold}_adapter",
        )

    emb_dir = os.path.join(local_dir, "embeddings")
    if os.path.isdir(emb_dir):
        print(f"    embeddings/")
        api.upload_folder(
            repo_id=HF_REPO,
            folder_path=emb_dir,
            path_in_repo=f"{remote_dir}/embeddings",
        )

    print(f"    Done: {exp_name}")
    return True


def cleanup_old_files(api):
    """Remove old fold weights and non-best fold files from HF."""
    print("\n  Cleaning up old files from HF...")
    all_files = api.list_repo_files(HF_REPO)
    removed = 0

    for exp_config in EXPERIMENTS:
        exp = exp_config["exp"]
        exp_prefix = exp_config["prefix"]

        for model in MODELS:
            exp_name = f"{exp_prefix}_{model}"
            prefix = f"{exp}/{exp_name}/"

            local_results = os.path.join(EXP_BASE, exp_name, "results.json")
            if not os.path.exists(local_results):
                continue
            best_fold = json.load(open(local_results)).get("best_fold")
            if best_fold is None:
                continue

            best_pt = f"best_fold_{best_fold}.pt"
            best_adapter = f"best_fold_{best_fold}_adapter"

            for f in all_files:
                if not f.startswith(prefix):
                    continue
                fname = f[len(prefix):]

                if fname in ("results.json", "config.json", best_pt):
                    continue
                if fname.startswith(best_adapter):
                    continue

                if fname.startswith("best_fold_") and fname.endswith(".pt"):
                    print(f"    DELETE {f}")
                    api.delete_file(repo_id=HF_REPO, path_in_repo=f)
                    removed += 1
                elif fname.startswith("fold_") and fname.endswith(".json"):
                    print(f"    DELETE {f}")
                    api.delete_file(repo_id=HF_REPO, path_in_repo=f)
                    removed += 1
                elif fname.startswith("REPORT_"):
                    print(f"    DELETE {f}")
                    api.delete_file(repo_id=HF_REPO, path_in_repo=f)
                    removed += 1

    print(f"  Cleaned up {removed} old files")


def _build_results_table(exp, exp_prefix):
    rows = []
    for model in MODELS:
        exp_name = f"{exp_prefix}_{model}"
        local_results = os.path.join(EXP_BASE, exp_name, "results.json")
        if not os.path.exists(local_results):
            continue
        r = json.load(open(local_results))
        mean_f1 = r.get("mean_f1", 0)
        std_f1 = r.get("std_f1", 0)
        tm = r.get("test_metrics", {})
        test_acc = tm.get("accuracy", 0)
        test_f1 = tm.get("f1_macro", 0)
        best_fold = r.get("best_fold", "?")
        num_folds = r.get("num_folds", 5)
        display = VARIANT_DISPLAY.get(model, model)
        rows.append(
            f"| {display} | {mean_f1:.4f} ± {std_f1:.4f} | {test_f1:.4f} | {test_acc:.4f} | {best_fold} | {num_folds} |"
        )
    return rows


def _build_results_yaml(exp6_rows, exp7_rows, exp8_rows, exp9_rows=None, exp10_rows=None):
    """Generate structured YAML for model evaluation results."""
    lines = ["model-index:"]
    all_rows = []
    for exp_label, rows in [("Exp 6 (CE)", exp6_rows), ("Exp 7 (SupCon)", exp7_rows),
                             ("Exp 8 (SpHOR)", exp8_rows), ("Exp 9 (NCD)", exp9_rows or []),
                             ("Exp 10 (Ultimate)", exp10_rows or [])]:
        for row in rows:
            model_name = row.split("|")[1].strip()
            f1_val = row.split("|")[2].strip().split(" ")[0]
            all_rows.append((exp_label, model_name, f1_val))

    if not all_rows:
        return ""

    for i, (exp, model, f1) in enumerate(all_rows):
        lines.append(f"  - name: {model}")
        lines.append("    results:")
        lines.append("      - task:")
        lines.append("          type: image-classification")
        lines.append("        dataset:")
        lines.append('          name: orchid-ncd-dataset')
        lines.append('          type: marcellorusso/orchid-ncd-dataset')
        lines.append("        metrics:")
        lines.append("          - name: Macro F1 Score")
        lines.append("            type: f1_macro")
        lines.append(f"            value: {f1}")
        lines.append("        source:")
        lines.append(f"          name: {exp}")
        lines.append(f"          url: https://huggingface.co/datasets/marcellorusso/orchid-ncd-dataset")

    return "\n".join(lines)


def update_readme(api):
    """Update HF model repo README with current results."""
    header = "| Model | Val F1 (macro) | Test F1 | Test Acc | Best Fold | Folds |\n|---|---|---|---|---|---|"

    exp6_rows = _build_results_table("exp6_ce", "exp6_clean_split")
    exp7_rows = _build_results_table("exp7_supcon", "exp7_supcon")
    exp8_rows = _build_results_table("exp8_sphor", "exp8_sphor")
    exp9_rows = _build_results_table("exp9_ncd", "exp9_ncd")
    exp10_rows = _build_results_table("exp10_mgh_ncd", "exp10_mgh_ncd")

    sections = []

    if exp6_rows:
        sections.append(f"""## Exp 6: Cross-Entropy

5-fold stratified cross-validation on deduplicated clean split (2,232 train, 300 test).

**Training recipe:** epochs=100, patience=15, effective batch=32 (gradient accumulation ×4), per-architecture LR from registry.

{header}
{chr(10).join(exp6_rows)}""")

    if exp7_rows:
        sections.append(f"""## Exp 7: Supervised Contrastive Learning

Two-phase training: SupCon pretraining (InfoNCE, τ=0.07) → CE fine-tuning with frozen backbone.

**Recipe:** same per-architecture optimizer/LR, projection dim=128, CE Phase LR=0.01, patience=15 on val metrics.

{header}
{chr(10).join(exp7_rows)}""")

    if exp8_rows:
        sections.append(f"""## Exp 8: Spherical Orthogonal Prototypes (SpHOR)

Two-phase training: SupCon + Spherical Orthogonal Prototypes → CE fine-tuning with frozen backbone.

**Recipe:** same as Exp 7, with spherical prototype repulsion (repulse=0.01).

{header}
{chr(10).join(exp8_rows)}""")

    if exp9_rows:
        sections.append(f"""## Exp 9: Novel Class Discovery (NCD)

NCD scenario: O. majellensis excluded from training (hidden novel class).

**Recipe:** same as Exp 8.

{header}
{chr(10).join(exp9_rows)}""")

    if exp10_rows:
        sections.append(f"""## Exp 10: Ultimate Experiment

Final experiment combining multi-granularity features, hard negative mining, deep fine-tuning, and OSR ensemble.

**Recipe:** LoRA r=32 + partial unfreeze for DINOv2, full fine-tuning for ConvNeXt/ResNet. Hard negative weight 5.0 on (majellensis, sphegodes) pairs.

{header}
{chr(10).join(exp10_rows)}""")

    results_yaml = _build_results_yaml(exp6_rows, exp7_rows, exp8_rows, exp9_rows, exp10_rows)

    readme = f"""---
library_name: pytorch
tags:
- computer-vision
- image-classification
- fine-grained-classification
- ophrys-orchids
- resnet
- convnext
- dinov2
datasets:
- marcellorusso/orchid-ncd-dataset
license: mit
pipeline_tag: image-classification
{results_yaml}
---

# OrchID-NCD Models

Trained model weights for the [OrchID-NCD](https://huggingface.co/spaces/marcellorusso/orchid-ncd) project — ultra-fine-grained visual classification of *Ophrys* orchids.

Fine-grained classification of six cryptic *Ophrys* species (*O. exaltata, O. garganica, O. incubacea, O. majellensis, O. sphegodes, O. sphegodes* Palena) using ResNet-18/50, ConvNeXt-Tiny/Small, and DINOv2-Small/Base.

{chr(10).join(sections)}

## Structure

```
exp6_ce/                           — Exp 6 (Cross-Entropy)
  exp6_clean_split_resnet18/
    results.json                   — aggregated metrics + test results
    config.json                    — training configuration
    best_fold_N.pt                 — weights of the best fold
    best_fold_N_adapter/           — LoRA adapter (DINOv2 only)
exp7_supcon/                       — Exp 7 (SupCon + CE fine-tune)
  exp7_supcon_resnet18/
    ...
exp8_sphor/                        — Exp 8 (SpHOR)
  exp8_sphor_resnet18/
    ...
exp9_ncd/                          — Exp 9 (NCD)
  exp9_ncd_resnet18/
    ...
exp10_mgh_ncd/                     — Exp 10 (MGH-NCD)
  exp10_mgh_ncd_resnet18/
    ...
```

## Usage

The classifier in the [OrchID-NCD Space](https://huggingface.co/spaces/marcellorusso/orchid-ncd) downloads these weights at startup and uses them for inference.

## Links

- [Dataset](https://huggingface.co/datasets/marcellorusso/orchid-ncd-dataset)
- [Live Demo](https://huggingface.co/spaces/marcellorusso/orchid-ncd)
- [GitHub](https://github.com/squidslab/OrchID)
"""

    import tempfile
    with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
        f.write(readme)
        f.flush()
        api.upload_file(
            repo_id=HF_REPO,
            path_or_fileobj=f.name,
            path_in_repo="README.md",
        )
        os.unlink(f.name)
    print("  README.md updated")


def main():
    token = os.environ.get("HF_TOKEN")
    api = HfApi(token=token)
    api.whoami()
    print("Authenticated OK")

    do_cleanup = "--cleanup" in sys.argv

    # Determine which experiments to upload
    exp_filter = "all"
    for arg in sys.argv:
        if arg.startswith("--exp="):
            exp_filter = arg.split("=")[1]
        elif arg == "--exp" and sys.argv.index(arg) + 1 < len(sys.argv):
            exp_filter = sys.argv[sys.argv.index(arg) + 1]

    uploaded = 0
    total = 0
    for exp_config in EXPERIMENTS:
        exp = exp_config["exp"]
        exp_prefix = exp_config["prefix"]
        label = exp_config["label"]

        if exp_filter != "all" and exp_filter not in exp:
            continue

        print(f"\n=== Uploading {label} ({exp}) ===")
        for model in MODELS:
            total += 1
            try:
                if upload_model(api, model, exp, exp_prefix):
                    uploaded += 1
            except Exception as e:
                print(f"  ERROR {model}: {e}")

    print(f"\nUploaded {uploaded}/{total} models")

    if do_cleanup:
        cleanup_old_files(api)

    update_readme(api)

    print("\nDone!")


if __name__ == "__main__":
    main()