File size: 36,540 Bytes

c43a0be

"""
Project EDEN — Chart Generation & Model Card Enhancement Script
Generates training visualizations for every model card and pushes to HuggingFace.

Charts generated:
  Collection (EDEN-Core-Scripts repo):
    - energy_accuracy_overview.png  : all models, energy vs accuracy scatter
    - eag_leaderboard.png           : all models ranked by EAG
    - co2_comparison.png            : CO2 baseline vs EDEN per architecture

  Per-model repo:
    - training_curve.png            : accuracy + cumulative energy vs epoch
    - eag_curve.png                 : EAG metric trajectory over epochs

Model cards also get:
    - model-index YAML (enables HF native metrics widget)
    - Embedded chart images in README
"""

import os, json, glob
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
from huggingface_hub import HfApi, create_repo, upload_file

# ─── CONFIG ──────────────────────────────────────────────────────────────────
HF_TOKEN       = os.environ.get("HF_TOKEN", "")
HF_USER        = "Shanmuk4622"
HF_ORG         = HF_USER
BASE_DIR       = os.path.dirname(os.path.abspath(__file__))
CHARTS_DIR     = os.path.join(BASE_DIR, "charts")
HF_READMES_DIR = os.path.join(BASE_DIR, "hf_readmes")
os.makedirs(CHARTS_DIR, exist_ok=True)
os.makedirs(HF_READMES_DIR, exist_ok=True)

api = HfApi(token=HF_TOKEN)

# ─── DARK THEME PALETTE ──────────────────────────────────────────────────────
DARK_BG    = "#0d1117"
CARD_BG    = "#161b22"
GRID_COLOR = "#21262d"
TEXT_COLOR = "#e6edf3"
GREEN      = "#2ea043"
GREEN_LT   = "#56d364"
ORANGE     = "#f0883e"
BLUE       = "#58a6ff"
PURPLE     = "#bc8cff"
RED        = "#f85149"
MUTED      = "#8b949e"

# ─── STATIC METADATA (mirrors eden_hf_upload.py) ─────────────────────────────
HARDWARE = {
    "gpu": "NVIDIA GeForce GTX 1080 Ti (11 GB VRAM, 250 W TDP)",
    "cpu": "Intel Xeon W-2125 (4 cores / 8 threads @ 4.00 GHz)",
    "ram": "63.66 GB System RAM",
    "os":  "Windows 10",
}

PHASE_MAP = {
    "test1": "Phase 2 – Progressive Unfreezing + AMP (E2AM SOTA)",
    "test2": "Baseline – Standard Full Training (Reference Study)",
    "test3": "Phase 2 – EDEN Classic Energy-Aware Sparse Training",
}

PHASE_DETAIL = {
    "test1": (
        "**Phase 1 – Zero-Overhead Initialization:** Dataset pre-loaded into pinned "
        "System RAM to eliminate disk I/O power spikes.\n\n"
        "**Phase 2 – Progressive Unfreezing:** Backbone frozen for the first "
        "`E_unfreeze` epochs (only the classification head trains). At `E_unfreeze`, "
        "all layers are unfrozen and the learning rate is decayed. "
        "Gradient accumulation over N micro-batches simulates large batch sizes "
        "without proportional VRAM cost, slashing power-draw spikes.\n\n"
        "**AMP (Automated Mixed Precision):** `torch.cuda.amp.autocast()` halves "
        "GPU memory bandwidth, reducing energy per backward pass.\n\n"
        "**Sparse Regularisation:** L1 penalty `λ·Σ|W|` applied to trainable "
        "weights, driving dead neurons to zero and enabling future pruning."
    ),
    "test2": (
        "Standard full fine-tuning used as the **Brute-Force Baseline** for "
        "energy comparison. All layers trained from epoch 1 with a fixed learning "
        "rate and no gradient accumulation. Included for transparent EAG benchmarking."
    ),
    "test3": (
        "**Phase 1 – Zero-Overhead Initialization:** Dataset cached in System RAM.\n\n"
        "**Phase 2 – EDEN Classic:** Energy-aware training loop on classic CNN "
        "architectures. Applies the same EAG early-exit criterion "
        "(`EAG < γ_EAG` for 3 consecutive epochs → terminate), L1 sparsity "
        "penalty, and AMP to architectures like ResNet, VGG, AlexNet, DenseNet, "
        "InceptionV3, and UNet."
    ),
}

DATASET_META = {
    "CIFAR-10":           {"size": "60,000 images – 10 classes (32×32 px)",  "hf_name": "cifar10"},
    "CIFAR-100":          {"size": "60,000 images – 100 classes (32×32 px)", "hf_name": "cifar100"},
    "Custom-ImageNet300": {"size": "~450,000 images – 300 classes (224 px)", "hf_name": "imagenet"},
    "unknown":            {"size": "N/A", "hf_name": "unknown"},
}

# ─── HELPERS ─────────────────────────────────────────────────────────────────
def parse_name(filename):
    fn = filename.lower().replace("\\", "/")
    dataset, arch = "unknown", "unknown"
    if   "cifar100"    in fn: dataset = "CIFAR-100"
    elif "cifar10"     in fn: dataset = "CIFAR-10"
    elif "imagenet"    in fn: dataset = "Custom-ImageNet300"
    if   "efficientnet" in fn: arch = "EfficientNetV2"
    elif "convnext"     in fn: arch = "ConvNeXtV2"
    elif "mobilevit"    in fn: arch = "MobileViTv3"
    elif "resnet50"     in fn: arch = "ResNet50"
    elif "resnet18"     in fn: arch = "ResNet18"
    elif "vgg16"        in fn: arch = "VGG16"
    elif "alexnet"      in fn: arch = "AlexNet"
    elif "inception"    in fn: arch = "InceptionV3"
    elif "densenet"     in fn: arch = "DenseNet121"
    elif "unet"         in fn: arch = "UNet"
    return arch, dataset

def folder_to_phase(folder):
    return {"test1": "SOTA Optimized", "test2": "Baseline", "test3": "EDEN Classic"}.get(folder, folder)

def co2_kg(joules):
    return (joules / 3_600_000) * 0.475 if joules else 0

def setup_dark_style():
    plt.rcParams.update({
        'figure.facecolor':  DARK_BG,
        'axes.facecolor':    CARD_BG,
        'axes.edgecolor':    GRID_COLOR,
        'axes.labelcolor':   TEXT_COLOR,
        'axes.titlecolor':   TEXT_COLOR,
        'xtick.color':       MUTED,
        'ytick.color':       MUTED,
        'grid.color':        GRID_COLOR,
        'grid.alpha':        0.7,
        'text.color':        TEXT_COLOR,
        'font.family':       'DejaVu Sans',
        'font.size':         11,
        'axes.spines.top':   False,
        'axes.spines.right': False,
    })

def load_csv(csv_path):
    """Load training CSV and normalize column names across test1/2/3."""
    try:
        df = pd.read_csv(csv_path)
        df.rename(columns={
            'total_energy_j':     'epoch_total_energy_j',
            'energy_gpu_j':       'epoch_energy_gpu_j',
            'carbon_kg':          'carbon_emissions_kg',
            'vram_gb':            'vram_peak_gb',
            'grad_norm':          'avg_grad_norm',
        }, inplace=True)
        return df
    except Exception as e:
        print(f"  Warning: could not load {csv_path}: {e}")
        return None

# ─── LOAD RESULTS ────────────────────────────────────────────────────────────
with open(os.path.join(BASE_DIR, "results_summary.json")) as f:
    results = json.load(f)

stats_map = {}
for r in results:
    arch, dataset = parse_name(r["file"])
    key = f"{r['folder']}_{arch}_{dataset}"
    if key not in stats_map or (r["energy"] > 0 and stats_map[key]["energy"] == 0):
        stats_map[key] = r

# Baseline per dataset (prefer ResNet50 from test2)
baselines = {}
for key, v in stats_map.items():
    folder = key.split("_")[0]
    if folder != "test2": continue
    _, ds = parse_name(v["file"])
    if ds not in baselines:
        baselines[ds] = v
    if parse_name(v["file"])[0] == "ResNet50":
        baselines[ds] = v

# Collect all .pth files → deduplicate by repo name (highest accuracy wins)
pth_files = glob.glob(os.path.join(BASE_DIR, "**/*.pth"), recursive=True)
models_raw = []
for pth in pth_files:
    rel    = os.path.relpath(pth, BASE_DIR)
    folder = rel.split(os.sep)[0]
    arch, dataset = parse_name(rel)
    key  = f"{folder}_{arch}_{dataset}"
    stat = stats_map.get(key, {})
    models_raw.append({
        "pth": rel, "arch": arch, "dataset": dataset, "folder": folder,
        "accuracy": stat.get("accuracy", 0),
        "energy":   stat.get("energy",   0),
        "time":     stat.get("time",     0),
        "csv":      stat.get("file",    "N/A"),
    })

repo_model_map = {}
for m in models_raw:
    if m["arch"] == "unknown" or m["dataset"] == "unknown": continue
    repo_name = f"EDEN-{m['arch']}-{m['dataset'].replace(' ', '-')}"
    if repo_name not in repo_model_map or m["accuracy"] > repo_model_map[repo_name]["accuracy"]:
        repo_model_map[repo_name] = m

# ═══════════════════════════════════════════════════════════════════════════════
# PER-MODEL CHARTS
# ═══════════════════════════════════════════════════════════════════════════════

def generate_training_curve(model, out_path):
    """Dual-axis: Accuracy % (green) + Cumulative Energy MJ (orange) vs Epoch."""
    if model["csv"] == "N/A": return False
    csv_path = os.path.join(BASE_DIR, model["csv"])
    if not os.path.exists(csv_path): return False
    df = load_csv(csv_path)
    if df is None or len(df) < 2 or "cumulative_total_energy_j" not in df.columns:
        return False

    setup_dark_style()
    fig, ax1 = plt.subplots(figsize=(11, 5.5))
    fig.patch.set_facecolor(DARK_BG)
    ax1.set_facecolor(CARD_BG)

    epochs     = df["epoch"]
    acc_pct    = df["accuracy"] * 100
    energy_mj  = df["cumulative_total_energy_j"] / 1_000_000

    # Shade FROZEN phase (test3)
    if "status" in df.columns:
        frozen_mask = df["status"] == "FROZEN"
        if frozen_mask.any():
            f_min = df.loc[frozen_mask, "epoch"].min()
            f_max = df.loc[frozen_mask, "epoch"].max()
            ax1.axvspan(f_min, f_max, alpha=0.08, color=BLUE, label="Frozen phase")
            ax1.axvline(f_max + 0.5, color=BLUE, linewidth=1, linestyle=':', alpha=0.6)
            ax1.text(f_max + 0.7, acc_pct.min(), "Unfreeze →",
                     color=BLUE, fontsize=9, va='bottom', alpha=0.8)

    # Accuracy
    ax1.plot(epochs, acc_pct, color=GREEN_LT, linewidth=2.5, label="Accuracy (%)", zorder=3)
    ax1.fill_between(epochs, acc_pct, alpha=0.10, color=GREEN_LT)
    ax1.set_xlabel("Epoch", fontsize=12)
    ax1.set_ylabel("Accuracy (%)", color=GREEN_LT, fontsize=12)
    ax1.tick_params(axis='y', labelcolor=GREEN_LT)
    ax1.set_ylim(bottom=max(0, float(acc_pct.min()) * 0.93))
    ax1.grid(True, linestyle='--', alpha=0.35)

    # Final accuracy annotation
    ax1.annotate(
        f"{float(acc_pct.iloc[-1]):.2f}%",
        xy=(float(epochs.iloc[-1]), float(acc_pct.iloc[-1])),
        xytext=(-45, 12), textcoords='offset points',
        color=GREEN_LT, fontsize=10, fontweight='bold',
        arrowprops=dict(arrowstyle='->', color=GREEN_LT, lw=1.5)
    )

    # Energy (right axis)
    ax2 = ax1.twinx()
    ax2.set_facecolor(CARD_BG)
    ax2.plot(epochs, energy_mj, color=ORANGE, linewidth=2, linestyle='--',
             label="Cumulative Energy (MJ)", zorder=2, alpha=0.9)
    ax2.set_ylabel("Cumulative Energy (MJ)", color=ORANGE, fontsize=12)
    ax2.tick_params(axis='y', labelcolor=ORANGE)
    for sp in ax2.spines.values(): sp.set_color(GRID_COLOR)

    for sp in ax1.spines.values(): sp.set_color(GRID_COLOR)

    phase = folder_to_phase(model["folder"])
    ax1.set_title(
        f"Training Curve  ·  {model['arch']} on {model['dataset']}  [{phase}]",
        fontsize=13, fontweight='bold', pad=14
    )

    lines1, lab1 = ax1.get_legend_handles_labels()
    lines2, lab2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, lab1 + lab2,
               loc='lower right', facecolor=CARD_BG, edgecolor=GRID_COLOR,
               labelcolor=TEXT_COLOR, fontsize=10)

    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches='tight', facecolor=DARK_BG)
    plt.close()
    return True


def generate_eag_curve(model, out_path):
    """EAG metric trajectory, with frozen-phase shading for test3."""
    if model["csv"] == "N/A": return False
    csv_path = os.path.join(BASE_DIR, model["csv"])
    if not os.path.exists(csv_path): return False
    df = load_csv(csv_path)
    if df is None or "eag_metric" not in df.columns or len(df) < 2:
        return False

    eag_series = df["eag_metric"].replace(0, np.nan)
    valid      = eag_series.dropna()
    if len(valid) < 2: return False

    setup_dark_style()
    fig, ax = plt.subplots(figsize=(11, 4.5))
    fig.patch.set_facecolor(DARK_BG)
    ax.set_facecolor(CARD_BG)

    valid_epochs = df.loc[eag_series.notna(), "epoch"]

    ax.plot(valid_epochs, valid, color=BLUE, linewidth=2.5, zorder=3)
    ax.fill_between(valid_epochs, valid, alpha=0.12, color=BLUE)

    # Zero-line reference
    ax.axhline(0, color=MUTED, linewidth=0.8, linestyle='-', alpha=0.5)

    # Shade FROZEN phase
    if "status" in df.columns:
        frozen_mask = df["status"] == "FROZEN"
        if frozen_mask.any():
            f_min = df.loc[frozen_mask, "epoch"].min()
            f_max = df.loc[frozen_mask, "epoch"].max()
            ax.axvspan(f_min, f_max, alpha=0.08, color=PURPLE)
            ax.text((f_min + f_max) / 2, float(valid.max()) * 0.9,
                    "Frozen", color=PURPLE, fontsize=9, ha='center', alpha=0.8)

    ax.set_xlabel("Epoch", fontsize=12)
    ax.set_ylabel("EAG  (ΔAcc / ΔJoules)", color=BLUE, fontsize=12)
    ax.tick_params(axis='y', labelcolor=BLUE)
    ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    ax.grid(True, linestyle='--', alpha=0.35)
    for sp in ax.spines.values(): sp.set_color(GRID_COLOR)

    phase = folder_to_phase(model["folder"])
    ax.set_title(
        f"EAG Trajectory  ·  {model['arch']} on {model['dataset']}  [{phase}]",
        fontsize=13, fontweight='bold', pad=14
    )
    ax.text(0.01, 0.02,
            "Higher EAG = more accuracy gained per Joule consumed",
            transform=ax.transAxes, color=MUTED, fontsize=9, va='bottom')

    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches='tight', facecolor=DARK_BG)
    plt.close()
    return True

# ═══════════════════════════════════════════════════════════════════════════════
# COLLECTION CHARTS
# ═══════════════════════════════════════════════════════════════════════════════

def generate_energy_accuracy_overview(out_path):
    """Scatter: all models by energy (MJ) vs accuracy (%), colored by phase."""
    setup_dark_style()
    fig, ax = plt.subplots(figsize=(13, 7))
    fig.patch.set_facecolor(DARK_BG)
    ax.set_facecolor(CARD_BG)

    phase_color = {"test1": GREEN_LT, "test2": MUTED, "test3": BLUE}

    for repo_name, m in repo_model_map.items():
        if m["energy"] == 0 or m["accuracy"] == 0: continue
        color  = phase_color.get(m["folder"], MUTED)
        e_mj   = m["energy"] / 1_000_000
        acc    = m["accuracy"] * 100
        ax.scatter(e_mj, acc, s=130, color=color, alpha=0.85, zorder=3,
                   edgecolors='white', linewidths=0.5)
        label  = f"{m['arch']}\n{m['dataset']}"
        ax.annotate(label, (e_mj, acc), textcoords='offset points',
                    xytext=(6, 4), fontsize=7.5, color=TEXT_COLOR, alpha=0.8)

    legend_handles = [
        mpatches.Patch(color=GREEN_LT, label="SOTA Optimized  (E2AM Phase 2)"),
        mpatches.Patch(color=MUTED,    label="Baseline  (standard training)"),
        mpatches.Patch(color=BLUE,     label="EDEN Classic  (energy-aware CNNs)"),
    ]
    ax.legend(handles=legend_handles, facecolor=CARD_BG, edgecolor=GRID_COLOR,
              labelcolor=TEXT_COLOR, fontsize=10, loc='lower right')

    ax.set_xlabel("Total Training Energy (MJ)", fontsize=12)
    ax.set_ylabel("Final Accuracy (%)", fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.3)
    for sp in ax.spines.values(): sp.set_color(GRID_COLOR)

    ax.set_title("Project EDEN — Energy vs Accuracy  (all models)",
                 fontsize=14, fontweight='bold', pad=14)
    fig.text(0.5, 0.005,
             "↙  lower energy + higher accuracy  =  better Green SOTA",
             ha='center', color=MUTED, fontsize=10)

    plt.tight_layout(rect=[0, 0.03, 1, 1])
    plt.savefig(out_path, dpi=150, bbox_inches='tight', facecolor=DARK_BG)
    plt.close()
    print("  ✓ energy_accuracy_overview.png")


def generate_eag_leaderboard(out_path):
    """Horizontal bar chart: all models ranked by EAG score."""
    setup_dark_style()

    entries = []
    for repo_name, m in repo_model_map.items():
        b = baselines.get(m["dataset"], {})
        b_e, b_a = b.get("energy", 0), b.get("accuracy", 0)
        if b_e and m["energy"] and b_e != m["energy"]:
            d_j   = m["energy"] - b_e
            eag   = (m["accuracy"] - b_a) / d_j
            short = repo_name.replace("EDEN-", "")
            entries.append((short, eag, m["folder"]))

    if not entries: return
    entries.sort(key=lambda x: x[1])

    names  = [e[0] for e in entries]
    eags   = [e[1] for e in entries]
    colors = [GREEN_LT if v >= 0 else RED for v in eags]

    fig, ax = plt.subplots(figsize=(12, max(6, len(names) * 0.44)))
    fig.patch.set_facecolor(DARK_BG)
    ax.set_facecolor(CARD_BG)

    bars = ax.barh(names, eags, color=colors, alpha=0.85,
                   edgecolor=GRID_COLOR, linewidth=0.5, height=0.7)
    ax.axvline(0, color=MUTED, linewidth=1)

    rng = max(abs(min(eags)), abs(max(eags)))
    for bar, val in zip(bars, eags):
        pad  = rng * 0.015
        side = 'left' if val < 0 else 'right'
        xpos = val - pad if val < 0 else val + pad
        ax.text(xpos, bar.get_y() + bar.get_height() / 2,
                f"{val:.2e}", va='center', ha=side,
                color=TEXT_COLOR, fontsize=8.5)

    ax.set_xlabel("EAG Score  (ΔAccuracy / ΔJoules)  ·  Higher = Greener",
                  fontsize=11)
    ax.set_title("EAG Leaderboard — All EDEN Models vs Baseline",
                 fontsize=13, fontweight='bold', pad=12)
    ax.ticklabel_format(axis='x', style='sci', scilimits=(0, 0))
    ax.grid(True, axis='x', linestyle='--', alpha=0.3)
    for sp in ax.spines.values(): sp.set_color(GRID_COLOR)

    legend_handles = [
        mpatches.Patch(color=GREEN_LT, label="Positive EAG  (greener than baseline)"),
        mpatches.Patch(color=RED,      label="Negative EAG  (accuracy cost more energy)"),
    ]
    ax.legend(handles=legend_handles, facecolor=CARD_BG, edgecolor=GRID_COLOR,
              labelcolor=TEXT_COLOR, fontsize=9)

    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches='tight', facecolor=DARK_BG)
    plt.close()
    print("  ✓ eag_leaderboard.png")


def generate_co2_comparison(out_path):
    """Grouped bar: CO₂ emissions — Baseline vs EDEN per architecture+dataset."""
    setup_dark_style()

    pairs = {}
    for key, stat in stats_map.items():
        folder = key.split("_")[0]
        arch, ds = parse_name(stat["file"])
        if arch == "unknown" or stat["energy"] == 0: continue
        label = f"{arch}\n({ds})"
        pairs.setdefault(label, {})[folder] = stat

    # Keep only pairs that have both baseline (test2) and EDEN (test3)
    pairs = {k: v for k, v in pairs.items() if "test2" in v and "test3" in v}
    if not pairs: return

    labels      = sorted(pairs.keys())
    base_co2    = [co2_kg(pairs[l]["test2"]["energy"]) for l in labels]
    eden_co2    = [co2_kg(pairs[l]["test3"]["energy"]) for l in labels]

    x     = np.arange(len(labels))
    width = 0.38

    fig, ax = plt.subplots(figsize=(max(12, len(labels) * 1.6), 6))
    fig.patch.set_facecolor(DARK_BG)
    ax.set_facecolor(CARD_BG)

    ax.bar(x - width / 2, base_co2, width, label="Baseline", color=MUTED,
           alpha=0.85, edgecolor=GRID_COLOR)
    ax.bar(x + width / 2, eden_co2, width, label="EDEN Classic", color=GREEN_LT,
           alpha=0.85, edgecolor=GRID_COLOR)

    for i, (bc, ec) in enumerate(zip(base_co2, eden_co2)):
        if bc > 0:
            saving = (bc - ec) / bc * 100
            color  = GREEN_LT if saving > 0 else RED
            ax.text(x[i], max(bc, ec) * 1.04,
                    f"{saving:+.1f}%", ha='center', va='bottom',
                    color=color, fontsize=9, fontweight='bold')

    ax.set_xticks(x)
    ax.set_xticklabels(labels, fontsize=8.5)
    ax.set_ylabel("CO₂ Emissions (kg CO₂e)", fontsize=11)
    ax.set_title("CO₂ Emissions — Baseline vs EDEN Classic  (per architecture)",
                 fontsize=13, fontweight='bold', pad=12)
    ax.legend(facecolor=CARD_BG, edgecolor=GRID_COLOR, labelcolor=TEXT_COLOR, fontsize=10)
    ax.grid(True, axis='y', linestyle='--', alpha=0.3)
    for sp in ax.spines.values(): sp.set_color(GRID_COLOR)

    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches='tight', facecolor=DARK_BG)
    plt.close()
    print("  ✓ co2_comparison.png")

# ═══════════════════════════════════════════════════════════════════════════════
# README BUILDER
# ═══════════════════════════════════════════════════════════════════════════════

def build_model_readme(model, has_tc, has_eag):
    arch    = model["arch"]
    dataset = model["dataset"]
    folder  = model["folder"]
    acc     = model["accuracy"]
    energy  = model["energy"]
    t       = model["time"]
    phase   = folder_to_phase(folder)
    ds_meta = DATASET_META.get(dataset, DATASET_META["unknown"])
    model_co2 = co2_kg(energy)

    baseline = baselines.get(dataset, {})
    b_acc    = baseline.get("accuracy", 0)
    b_energy = baseline.get("energy",   0)
    b_arch   = parse_name(baseline.get("file", ""))[0] if baseline else "Baseline"

    if b_energy and energy and b_energy != energy:
        d_acc = acc - b_acc
        d_j   = energy - b_energy
        eag   = d_acc / d_j
        eag_str     = f"{eag:.4e}"
        savings_str = f"{(b_energy - energy) / b_energy * 100:.2f}%"
        acc_delta   = f"{d_acc * 100:+.2f}%"
    else:
        eag_str = "N/A"
        savings_str = "N/A"
        acc_delta   = "N/A"

    # Pull final F1 from CSV
    f1_val = None
    if model["csv"] != "N/A":
        csv_abs = os.path.join(BASE_DIR, model["csv"])
        if os.path.exists(csv_abs):
            try:
                df = pd.read_csv(csv_abs)
                if "f1_score" in df.columns:
                    f1_val = float(df["f1_score"].iloc[-1])
            except Exception:
                pass

    # model-index YAML (enables HF native metrics widget)
    metrics_yaml = f"    - type: accuracy\n      value: {acc:.4f}\n      name: Accuracy"
    if f1_val is not None:
        metrics_yaml += f"\n    - type: f1\n      value: {f1_val:.4f}\n      name: F1 Score"

    model_index = f"""model-index:
- name: EDEN-{arch}-{dataset}
  results:
  - task:
      type: image-classification
      name: Image Classification
    dataset:
      name: {dataset}
      type: {ds_meta['hf_name']}
    metrics:
{metrics_yaml}"""

    arch_tag  = arch.lower().replace(" ", "")
    yaml_co2  = f"{model_co2:.4f}" if model_co2 else "0"

    frontmatter = f"""---
language: en
license: apache-2.0
tags:
- image-classification
- green-ai
- energy-efficiency
- computer-vision
- {arch_tag}
- eden-framework
- e2am
- sustainable-ai
datasets:
- {ds_meta['hf_name']}
metrics:
- accuracy
co2_eq_emissions:
  emissions: {yaml_co2}
  unit: kg
  source: Estimated via CodeCarbon (grid factor 0.475 kg CO2e/kWh)
  hardware_used: NVIDIA GeForce GTX 1080 Ti
dataset_info:
  dataset_size: "{ds_meta['size']}"
{model_index}
---"""

    green_table = f"""| Metric | {b_arch} Baseline | **{arch} (EDEN)** | Δ |
|---|---|---|---|
| Accuracy | {b_acc:.4f} | **{acc:.4f}** | `{acc_delta}` |
| Total Energy (J) | {b_energy:,.0f} | **{energy:,.0f}** | `{savings_str} saved` |
| CO₂ Emissions (kg) | {co2_kg(b_energy):.4f} | **{model_co2:.4f}** | — |
| **EAG Score** | — | **{eag_str}** | ΔAcc/ΔJoules |"""

    chart_section = ""
    if has_tc or has_eag:
        chart_section = "\n## 📊 Training Visualizations\n"
        if has_tc:
            chart_section += (
                "\n### Accuracy & Energy over Training\n"
                "> Green = accuracy (left axis)  ·  Orange dashed = cumulative energy (right axis)\n\n"
                "![Training Curve](training_curve.png)\n"
            )
        if has_eag:
            chart_section += (
                "\n### EAG Metric Trajectory\n"
                "> EAG = ΔAccuracy / ΔJoules — positive means learning more per Joule than baseline\n\n"
                "![EAG Curve](eag_curve.png)\n"
            )
        chart_section += (
            f"\n### Project-Wide Overview\n"
            f"*All EDEN models: energy vs accuracy*\n\n"
            f"![Collection Overview](https://huggingface.co/{HF_ORG}/EDEN-Core-Scripts/resolve/main/energy_accuracy_overview.png)\n"
        )

    cite = f"""## Cite This Research
```bibtex
@misc{{eden2025,
  title     = {{Project EDEN: Energy-Driven Evolution of Networks}},
  author    = {{EDEN Research Team}},
  year      = {{2025}},
  note      = {{Hugging Face: {HF_ORG}}},
  url       = {{https://huggingface.co/{HF_ORG}}}
}}
```"""

    return f"""{frontmatter}

# EDEN-{arch}-{dataset} — *{phase}*

> **Primary KPI:** EAG (Energy-to-Accuracy Gradient) = `{eag_str}` ΔAcc/ΔJoules

## Abstract
This model is part of **Project EDEN (Energy-Driven Evolution of Networks)**, implementing the
**E2AM (Energy Efficient Advanced Model)** Framework. The goal is to shift AI benchmarking from
pure accuracy to *Green SOTA* — maximising predictive power per Joule consumed.

**Applied Technique:** {PHASE_MAP.get(folder, phase)}

## Profiling Environment
| Component | Specification |
|---|---|
| **GPU** | {HARDWARE['gpu']} |
| **CPU** | {HARDWARE['cpu']} |
| **RAM** | {HARDWARE['ram']} |
| **OS**  | {HARDWARE['os']} |
| **Dataset** | {dataset} — {ds_meta['size']} |

## 🟢 Green Delta Table
*Comparing this model against the reference baseline (ResNet-50 equivalent)*

{green_table}

> A **positive EAG** means this model learns more per Joule than the baseline.
> A **negative EAG** indicates a trade-off where higher accuracy required more energy investment.

## E2AM Algorithm — Applied Phases

{PHASE_DETAIL.get(folder, 'Standard training.')}

## Training Statistics
| Metric | Value |
|---|---|
| Final Accuracy | {acc:.4f} ({acc * 100:.2f}%) |
| Total Energy Consumed | {energy:,.0f} J ({energy / 3_600_000:.4f} kWh) |
| Training Time | {t:,.0f} s ({t / 3600:.2f} hrs) |
| Estimated CO₂ | {model_co2:.4f} kg CO₂e |
| Training Log | `{model['csv']}` |
{chart_section}
{cite}
"""


def build_core_scripts_readme():
    py_scripts = sorted(
        os.path.relpath(p, BASE_DIR)
        for p in glob.glob(os.path.join(BASE_DIR, "**/*.py"), recursive=True)
        if any(k in p for k in ["Algo_", "eden_", "mobilevit_model"])
    )
    scripts_md = "\n".join(f"- `{s}`" for s in py_scripts)

    return f"""---
language: en
license: apache-2.0
tags:
- green-ai
- energy-efficiency
- e2am
- eden-framework
- sustainable-ai
- image-classification
---

# EDEN-Core-Scripts — E2AM Framework Repository

> **Project EDEN (Energy-Driven Evolution of Networks)** — The complete algorithmic
> toolkit for Green SOTA image classification research.

## Why EDEN?
As deep learning models scale exponentially, the carbon footprint of training has reached
unsustainable levels. Project EDEN introduces the **EAG (Energy-to-Accuracy Gradient)** as
the primary KPI — shifting the paradigm from chasing raw accuracy to optimising *Green SOTA*.

## Profiling Environment
| Component | Specification |
|---|---|
| **GPU** | {HARDWARE['gpu']} |
| **CPU** | {HARDWARE['cpu']} |
| **RAM** | {HARDWARE['ram']} |
| **OS**  | {HARDWARE['os']} |

---

## 📊 Collection Overview

### Energy vs Accuracy — All Models
*SOTA Optimized (green) · Baseline (grey) · EDEN Classic (blue)*

![Energy vs Accuracy](energy_accuracy_overview.png)

### EAG Leaderboard — Ranked by Green Efficiency
![EAG Leaderboard](eag_leaderboard.png)

### CO₂ Emissions — Baseline vs EDEN Classic
![CO2 Comparison](co2_comparison.png)

---

## The E2AM Algorithm

### Phase 1 — Zero-Overhead Initialization
Dataset pre-loaded into **pinned System RAM** before training — eliminates disk I/O power spikes.

### Phase 2 — Two-Stage Energy-Aware Training
1. **Frozen Head Training** — Only the classification head trains for `E_unfreeze` epochs.
2. **Progressive Unfreezing** — All layers unlock at `E_unfreeze`; LR decayed (`×0.1`).
3. **Gradient Accumulation** — Simulates large batch sizes without VRAM spikes.
4. **AMP** — `torch.cuda.amp.autocast()` halves bandwidth per backward pass.
5. **Sparse L1 Penalty** — `L_total = CrossEntropy + λ·Σ|W_trainable|`
6. **EAG Early-Exit** — Terminates if `EAG < γ_EAG` for 3 consecutive epochs.

### Phase 3 — Hardware-Aware Deployment *(Post-Training)*
Saliency-energy pruning · INT8 quantization · Dynamic depth routing

## EAG — The Expert KPI
```
EAG = ΔAccuracy / ΔJoules
```
A higher EAG = more learning per unit of carbon footprint.

## Scripts in This Repository
{scripts_md}

## Cite This Research
```bibtex
@misc{{eden2025,
  title     = {{Project EDEN: Energy-Driven Evolution of Networks}},
  author    = {{EDEN Research Team}},
  year      = {{2025}},
  note      = {{Hugging Face: {HF_ORG}}},
  url       = {{https://huggingface.co/{HF_ORG}}}
}}
```
"""

# ═══════════════════════════════════════════════════════════════════════════════
# MAIN
# ═══════════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    print("=" * 65)
    print("  EDEN Chart Generator & HF Pusher")
    print("=" * 65)

    # ── 1. Collection charts ──────────────────────────────────────────────────
    print("\n[1/3] Generating collection charts...")
    generate_energy_accuracy_overview(os.path.join(CHARTS_DIR, "energy_accuracy_overview.png"))
    generate_eag_leaderboard(os.path.join(CHARTS_DIR, "eag_leaderboard.png"))
    generate_co2_comparison(os.path.join(CHARTS_DIR, "co2_comparison.png"))

    # ── 2. Per-model charts + READMEs ─────────────────────────────────────────
    print("\n[2/3] Generating per-model charts and READMEs...")
    chart_flags = {}  # repo_name -> (has_tc, has_eag)

    for repo_name, m in repo_model_map.items():
        model_chart_dir = os.path.join(CHARTS_DIR, repo_name)
        os.makedirs(model_chart_dir, exist_ok=True)

        tc_path  = os.path.join(model_chart_dir, "training_curve.png")
        eag_path = os.path.join(model_chart_dir, "eag_curve.png")

        has_tc  = generate_training_curve(m, tc_path)
        has_eag = generate_eag_curve(m, eag_path)
        chart_flags[repo_name] = (has_tc, has_eag)

        readme_text = build_model_readme(m, has_tc, has_eag)
        readme_path = os.path.join(HF_READMES_DIR, f"{repo_name}_README.md")
        with open(readme_path, "w", encoding="utf-8") as f:
            f.write(readme_text)
        print(f"  ✓ {repo_name:<45}  curve={has_tc}  eag={has_eag}")

    core_readme_text = build_core_scripts_readme()
    core_readme_path = os.path.join(HF_READMES_DIR, "EDEN-Core-Scripts_README.md")
    with open(core_readme_path, "w", encoding="utf-8") as f:
        f.write(core_readme_text)
    print("  ✓ EDEN-Core-Scripts README")

    # ── 3. Upload to HuggingFace ──────────────────────────────────────────────
    print("\n[3/3] Uploading to HuggingFace...")

    # Core scripts repo: README + collection charts + .py scripts
    print("  Uploading EDEN-Core-Scripts...")
    try:
        create_repo(repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN,
                    repo_type="model", exist_ok=True, private=False)
        upload_file(path_or_fileobj=core_readme_path, path_in_repo="README.md",
                    repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN, repo_type="model")
        for chart in ["energy_accuracy_overview.png", "eag_leaderboard.png", "co2_comparison.png"]:
            chart_abs = os.path.join(CHARTS_DIR, chart)
            if os.path.exists(chart_abs):
                upload_file(path_or_fileobj=chart_abs, path_in_repo=chart,
                            repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN, repo_type="model")
        for py in glob.glob(os.path.join(BASE_DIR, "**/*.py"), recursive=True):
            rel = os.path.relpath(py, BASE_DIR)
            if any(k in rel for k in ["Algo_", "eden_", "mobilevit_model"]):
                upload_file(path_or_fileobj=py, path_in_repo=rel.replace("\\", "/"),
                            repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN, repo_type="model")
        print("  ✓ EDEN-Core-Scripts")
    except Exception as e:
        print(f"  ✗ Core-Scripts: {e}")

    # Per-model repos
    for repo_name, m in repo_model_map.items():
        has_tc, has_eag = chart_flags[repo_name]
        model_chart_dir = os.path.join(CHARTS_DIR, repo_name)
        readme_path     = os.path.join(HF_READMES_DIR, f"{repo_name}_README.md")
        try:
            create_repo(repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN,
                        repo_type="model", exist_ok=True, private=False)
            upload_file(path_or_fileobj=readme_path, path_in_repo="README.md",
                        repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model")
            if has_tc:
                upload_file(
                    path_or_fileobj=os.path.join(model_chart_dir, "training_curve.png"),
                    path_in_repo="training_curve.png",
                    repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model")
            if has_eag:
                upload_file(
                    path_or_fileobj=os.path.join(model_chart_dir, "eag_curve.png"),
                    path_in_repo="eag_curve.png",
                    repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model")
            # Weights
            pth_abs = os.path.join(BASE_DIR, m["pth"])
            if os.path.exists(pth_abs):
                upload_file(path_or_fileobj=pth_abs,
                            path_in_repo=os.path.basename(m["pth"]),
                            repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model")
            # CSV log
            if m["csv"] != "N/A":
                csv_abs = os.path.join(BASE_DIR, m["csv"])
                if os.path.exists(csv_abs):
                    upload_file(path_or_fileobj=csv_abs,
                                path_in_repo=os.path.basename(m["csv"]),
                                repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model")
            print(f"  ✓ {repo_name}")
        except Exception as e:
            print(f"  ✗ {repo_name}: {e}")

    print("\n" + "=" * 65)
    print(f"  Done!  https://huggingface.co/{HF_ORG}")
    print("=" * 65)