""" Project EDEN - Hugging Face Upload Master Script Applies all 6 refinements: 1. Hardware transparency (1080 Ti / Xeon W-2125) 2. E2AM Phase mapping per model 3. Phase 1 Zero-Overhead Initialization highlight 4. Standardized Green Delta table in every README 5. YAML tags with co2_eq_emissions + dataset_size 6. Citation section in Main Repo """ import os import json import glob import math from huggingface_hub import HfApi, create_repo, upload_file # ─── CONFIG ────────────────────────────────────────────────────────────────── HF_TOKEN = os.environ.get("HF_TOKEN", "") HF_USER = "Shanmuk4622" # HF username (no org found, uploading under user) HF_ORG = HF_USER # use user namespace BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DRY_RUN = False # Live upload api = HfApi(token=HF_TOKEN) # ─── HARDWARE PROFILE ──────────────────────────────────────────────────────── HARDWARE = { "gpu": "NVIDIA GeForce GTX 1080 Ti (11 GB VRAM, 250 W TDP)", "cpu": "Intel Xeon W-2125 (4 cores / 8 threads @ 4.00 GHz)", "ram": "63.66 GB System RAM", "os": "Windows 10", } # ─── E2AM PHASE MAP ────────────────────────────────────────────────────────── # Maps folder -> technique label for README PHASE_MAP = { "test1": "Phase 2 – Progressive Unfreezing + AMP (E2AM SOTA)", "test2": "Baseline – Standard Full Training (Reference Study)", "test3": "Phase 2 – EDEN Classic Energy-Aware Sparse Training", } PHASE_DETAIL = { "test1": ( "**Phase 1 – Zero-Overhead Initialization:** Dataset pre-loaded into pinned " "System RAM to eliminate disk I/O power spikes.\n\n" "**Phase 2 – Progressive Unfreezing:** Backbone frozen for the first " "`E_unfreeze` epochs (only the classification head trains). At `E_unfreeze`, " "all layers are unfrozen and the learning rate is decayed. " "Gradient accumulation over N micro-batches simulates large batch sizes " "without proportional VRAM cost, slashing power-draw spikes.\n\n" "**AMP (Automated Mixed Precision):** `torch.cuda.amp.autocast()` halves " "GPU memory bandwidth, reducing energy per backward pass.\n\n" "**Sparse Regularisation:** L1 penalty `λ·Σ|W|` applied to trainable " "weights, driving dead neurons to zero and enabling future pruning." ), "test2": ( "Standard full fine-tuning used as the **Brute-Force Baseline** for " "energy comparison. All layers trained from epoch 1 with a fixed learning " "rate and no gradient accumulation. Included for transparent EAG benchmarking." ), "test3": ( "**Phase 1 – Zero-Overhead Initialization:** Dataset cached in System RAM.\n\n" "**Phase 2 – EDEN Classic:** Energy-aware training loop on classic CNN " "architectures. Applies the same EAG early-exit criterion " "(`EAG < γ_EAG` for 3 consecutive epochs → terminate), L1 sparsity " "penalty, and AMP to architectures like ResNet, VGG, AlexNet, DenseNet, " "InceptionV3, and UNet." ), } # ─── DATASET META ──────────────────────────────────────────────────────────── DATASET_META = { "CIFAR-10": {"size": "60,000 images – 10 classes (32×32 px)", "hf_name": "cifar10"}, "CIFAR-100": {"size": "60,000 images – 100 classes (32×32 px)", "hf_name": "cifar100"}, "Custom-ImageNet300": {"size": "~450,000 images – 300 classes (224 px)", "hf_name": "imagenet"}, "unknown": {"size": "N/A", "hf_name": "unknown"}, } # CO2: 0.475 kg CO2e per kWh (global average grid factor) KG_CO2_PER_KWH = 0.000000475 # per Joule # ─── HELPERS ───────────────────────────────────────────────────────────────── def parse_name(filename): fn = filename.lower().replace("\\", "/") dataset = "unknown" arch = "unknown" if "cifar100" in fn: dataset = "CIFAR-100" elif "cifar10" in fn: dataset = "CIFAR-10" elif "imagenet" in fn: dataset = "Custom-ImageNet300" if "efficientnet" in fn: arch = "EfficientNetV2" elif "convnext" in fn: arch = "ConvNeXtV2" elif "mobilevit" in fn: arch = "MobileViTv3" elif "resnet50" in fn: arch = "ResNet50" elif "resnet18" in fn: arch = "ResNet18" elif "vgg16" in fn: arch = "VGG16" elif "alexnet" in fn: arch = "AlexNet" elif "inception" in fn: arch = "InceptionV3" elif "densenet" in fn: arch = "DenseNet121" elif "unet" in fn: arch = "UNet" return arch, dataset def joules_to_co2(joules): kwh = joules / 3_600_000 return kwh * 0.475 # kg CO2e def folder_to_phase_label(folder): return {"test1": "SOTA Optimized", "test2": "Baseline", "test3": "EDEN Classic"}.get(folder, folder) # ─── LOAD STATS ────────────────────────────────────────────────────────────── with open(os.path.join(BASE_DIR, "results_summary.json")) as f: results = json.load(f) stats_map = {} for r in results: arch, dataset = parse_name(r["file"]) folder = r["folder"] key = f"{folder}_{arch}_{dataset}" if key not in stats_map or (r["energy"] > 0 and stats_map[key]["energy"] == 0): stats_map[key] = r # Build baseline map (ResNet50 from test2 per dataset) baselines = {} for key, v in stats_map.items(): folder, *rest = key.split("_") arch = v.get("arch") or parse_name(v["file"])[0] if folder == "test2": _, ds = parse_name(v["file"]) if ds not in baselines: baselines[ds] = v # prefer ResNet50 if parse_name(v["file"])[0] == "ResNet50": baselines[ds] = v # ─── COLLECT ALL MODELS ────────────────────────────────────────────────────── pth_files = glob.glob(os.path.join(BASE_DIR, "**/*.pth"), recursive=True) models = [] for pth in pth_files: rel = os.path.relpath(pth, BASE_DIR) parts = rel.split(os.sep) folder = parts[0] arch, dataset = parse_name(rel) key = f"{folder}_{arch}_{dataset}" stat = stats_map.get(key, {}) models.append({ "pth": rel, "arch": arch, "dataset": dataset, "folder": folder, "accuracy": stat.get("accuracy", 0), "energy": stat.get("energy", 0), "time": stat.get("time", 0), "csv": stat.get("file", "N/A"), }) # ─── README GENERATOR ──────────────────────────────────────────────────────── def build_readme(model): arch = model["arch"] dataset = model["dataset"] folder = model["folder"] acc = model["accuracy"] energy = model["energy"] t = model["time"] phase = folder_to_phase_label(folder) ds_meta = DATASET_META.get(dataset, DATASET_META["unknown"]) co2 = joules_to_co2(energy) if energy else 0 baseline = baselines.get(dataset, {}) b_acc = baseline.get("accuracy", 0) b_energy = baseline.get("energy", 0) b_arch = parse_name(baseline.get("file",""))[0] if baseline else "Baseline" # Green Delta if b_energy and energy: energy_savings_pct = (b_energy - energy) / b_energy * 100 d_acc = acc - b_acc d_j = energy - b_energy eag = d_acc / d_j if d_j != 0 else float("nan") eag_str = f"{eag:.4e}" savings_str = f"{energy_savings_pct:.2f}%" acc_delta = f"{d_acc*100:+.2f}%" else: energy_savings_pct = 0 eag_str = "N/A" savings_str = "N/A" acc_delta = "N/A" # YAML tags arch_tag = arch.lower().replace(" ","") yaml_co2 = f"{co2:.4f}" if co2 else "0" yaml = f"""--- language: en license: apache-2.0 tags: - image-classification - green-ai - energy-efficiency - computer-vision - {arch_tag} - eden-framework - e2am - sustainable-ai datasets: - {ds_meta['hf_name']} metrics: - accuracy co2_eq_emissions: emissions: {yaml_co2} unit: kg source: Estimated via CodeCarbon (grid factor 0.475 kg CO2e/kWh) hardware_used: NVIDIA GeForce GTX 1080 Ti dataset_info: dataset_size: "{ds_meta['size']}" ---""" # Technique section technique = PHASE_DETAIL.get(folder, "Standard training.") # Green Delta Table green_table = f"""| Metric | {b_arch} Baseline | **{arch} (EDEN)** | Δ | |---|---|---|---| | Accuracy | {b_acc:.4f} | **{acc:.4f}** | `{acc_delta}` | | Total Energy (J) | {b_energy:,.0f} | **{energy:,.0f}** | `{savings_str} saved` | | CO₂ Emissions (kg) | {joules_to_co2(b_energy):.4f} | **{co2:.4f}** | — | | **EAG Score** | — | **{eag_str}** | ΔAcc/ΔJoules |""" cite = f"""## Cite This Research If you use this model, please cite the **EDEN / E2AM Framework**: ```bibtex @misc{{eden2025, title = {{Project EDEN: Energy-Driven Evolution of Networks}}, author = {{EDEN Research Team}}, year = {{2025}}, note = {{Hugging Face Organization: ProjectEDEN}}, url = {{https://huggingface.co/{HF_ORG}}} }} ```""" readme = f"""{yaml} # EDEN-{arch}-{dataset} — *{phase}* > **Primary KPI:** EAG (Energy-to-Accuracy Gradient) = `{eag_str}` ΔAcc/ΔJoules ## Abstract This model is part of **Project EDEN (Energy-Driven Evolution of Networks)**, implementing the **E2AM (Energy Efficient Advanced Model)** Framework. The goal is to shift AI benchmarking from pure accuracy to *Green SOTA* — maximizing predictive power per Joule consumed. **Applied Technique:** {PHASE_MAP.get(folder, phase)} ## Profiling Environment | Component | Specification | |---|---| | **GPU** | {HARDWARE['gpu']} | | **CPU** | {HARDWARE['cpu']} | | **RAM** | {HARDWARE['ram']} | | **OS** | {HARDWARE['os']} | | **Dataset** | {dataset} — {ds_meta['size']} | ## 🟢 Green Delta Table *Comparing this model against the reference baseline (ResNet-50 equivalent)* {green_table} > A **positive EAG** means this model learns more per Joule than the baseline. > A **negative EAG** indicates a trade-off where higher accuracy required more energy investment. ## E2AM Algorithm — Applied Phases {technique} ## Training Statistics | Metric | Value | |---|---| | Final Accuracy | {acc:.4f} ({acc*100:.2f}%) | | Total Energy Consumed | {energy:,.0f} J ({energy/3_600_000:.4f} kWh) | | Training Time | {t:,.0f} s ({t/3600:.2f} hrs) | | Estimated CO₂ | {co2:.4f} kg CO₂e | | Training Log | `{model['csv']}` | {cite} """ return readme # ─── MAIN FRAMEWORK README ─────────────────────────────────────────────────── def build_main_repo_readme(): py_scripts = [os.path.relpath(p, BASE_DIR) for p in glob.glob(os.path.join(BASE_DIR, "**/*.py"), recursive=True) if any(k in p for k in ["Algo_", "eden_", "mobilevit_model"])] scripts_md = "\n".join(f"- `{s}`" for s in sorted(py_scripts)) return f"""--- language: en license: apache-2.0 tags: - green-ai - energy-efficiency - e2am - eden-framework - sustainable-ai - image-classification --- # EDEN-Core-Scripts — E2AM Framework Repository > **Project EDEN (Energy-Driven Evolution of Networks)** — The complete algorithmic > toolkit for Green SOTA image classification research. ## Why EDEN? As deep learning models scale exponentially, the carbon footprint of training has reached unsustainable levels. Project EDEN introduces the **EAG (Energy-to-Accuracy Gradient)** as the primary KPI — shifting the paradigm from chasing raw accuracy to optimising *Green SOTA*. ## Profiling Environment | Component | Specification | |---|---| | **GPU** | {HARDWARE['gpu']} | | **CPU** | {HARDWARE['cpu']} | | **RAM** | {HARDWARE['ram']} | | **OS** | {HARDWARE['os']} | ## The E2AM Algorithm — All Three Phases ### Phase 1 — Zero-Overhead Initialization Dataset pre-loaded into **pinned System RAM** before training begins. This eliminates disk I/O power spikes that would otherwise inflate energy readings and distort EAG comparisons between architectures. ### Phase 2 — Two-Stage Energy-Aware Training 1. **Frozen Head Training** — Only the classification head trains for the first `E_unfreeze` epochs. The backbone consumes no backward-pass energy. 2. **Progressive Unfreezing** — At epoch `E_unfreeze`, all layers unlock. Learning rate is decayed (`LR × 0.1`) for stable fine-tuning. 3. **Gradient Accumulation** — Gradients accumulated over N micro-batches, simulating large batch sizes without VRAM spikes. 4. **AMP (Automated Mixed Precision)** — `torch.cuda.amp.autocast()` halves bandwidth per backward pass. 5. **Sparse L1 Penalty** — `L_total = CrossEntropy + λ·Σ|W_trainable|` 6. **EAG Early-Exit** — Training terminates if `EAG < γ_EAG` for 3 consecutive epochs, preventing wasted compute. ### Phase 3 — Hardware-Aware Deployment *(Post-Training)* - **Saliency-Energy Pruning** — Filters with lowest `∂Accuracy/∂W ÷ Energy_cost` are pruned. - **INT8 Quantization** — Weights converted for edge-deployment readiness. - **Dynamic Depth Routing** — Simple images bypass the middle 50 % of layers via residual skip connections, slashing inference energy. ## EAG — The Expert KPI ``` EAG = ΔAccuracy / ΔJoules ``` EAG allows apples-to-apples comparison of any two models regardless of architecture family. A higher EAG = more learning per unit of carbon footprint. ## Scripts in This Repository {scripts_md} ## Cite This Research ```bibtex @misc{{eden2025, title = {{Project EDEN: Energy-Driven Evolution of Networks}}, author = {{EDEN Research Team}}, year = {{2025}}, note = {{Hugging Face Organization: ProjectEDEN}}, url = {{https://huggingface.co/{HF_ORG}}} }} ``` """ # ─── OUTPUT / UPLOAD ───────────────────────────────────────────────────────── OUT_DIR = os.path.join(BASE_DIR, "hf_readmes") os.makedirs(OUT_DIR, exist_ok=True) # 1. Main repo README main_readme = build_main_repo_readme() main_readme_path = os.path.join(OUT_DIR, "EDEN-Core-Scripts_README.md") with open(main_readme_path, "w", encoding="utf-8") as f: f.write(main_readme) print("✓ Main repo README written.") # 2. Per-model READMEs (deduplicated by repo name) generated_repos = set() repo_model_map = {} # repo_name -> (model, readme_text) for m in models: if m["arch"] == "unknown" or m["dataset"] == "unknown": continue repo_name = f"EDEN-{m['arch']}-{m['dataset'].replace(' ','-')}" # prefer highest-accuracy model per repo if repo_name not in repo_model_map or m["accuracy"] > repo_model_map[repo_name][0]["accuracy"]: readme_text = build_readme(m) repo_model_map[repo_name] = (m, readme_text) for repo_name, (m, readme_text) in repo_model_map.items(): path = os.path.join(OUT_DIR, f"{repo_name}_README.md") with open(path, "w", encoding="utf-8") as f: f.write(readme_text) print(f"✓ {repo_name} README written.") print(f"\n{'='*60}") print(f"Generated {len(repo_model_map)+1} README files in: {OUT_DIR}") if not DRY_RUN: print("\nStarting HF upload...") # Upload Main Repo README try: create_repo(repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN, repo_type="model", exist_ok=True, private=False) upload_file(path_or_fileobj=main_readme_path, path_in_repo="README.md", repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN, repo_type="model") # Upload all .py scripts for py in glob.glob(os.path.join(BASE_DIR, "**/*.py"), recursive=True): rel = os.path.relpath(py, BASE_DIR) if any(k in rel for k in ["Algo_","eden_","mobilevit_model"]): upload_file(path_or_fileobj=py, path_in_repo=rel.replace("\\","/"), repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN, repo_type="model") print("✓ Uploaded EDEN-Core-Scripts") except Exception as e: print(f"✗ Core-Scripts error: {e}") # Upload per-model repos for repo_name, (m, readme_text) in repo_model_map.items(): try: create_repo(repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model", exist_ok=True, private=False) readme_path = os.path.join(OUT_DIR, f"{repo_name}_README.md") upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model") # Upload weights pth_abs = os.path.join(BASE_DIR, m["pth"]) if os.path.exists(pth_abs): upload_file(path_or_fileobj=pth_abs, path_in_repo=os.path.basename(m["pth"]), repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model") # Upload CSV log if m["csv"] != "N/A": csv_abs = os.path.join(BASE_DIR, m["csv"]) if os.path.exists(csv_abs): upload_file(path_or_fileobj=csv_abs, path_in_repo=os.path.basename(m["csv"]), repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN, repo_type="model") print(f"✓ Uploaded {repo_name}") except Exception as e: print(f"✗ {repo_name} error: {e}") print("\nAll uploads complete.") else: print("\n[DRY RUN] Set DRY_RUN=False to execute HF uploads.")