File size: 4,371 Bytes
7511eae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """Convert colab_submission_script.py into a clean Colab .ipynb notebook.
Splits the script on the `# === ... STEP N ...` banner blocks and emits one
code cell per step, with a markdown intro cell at the top.
Usage:
python scripts/build_notebook.py
"""
from __future__ import annotations
import json
import re
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
SOURCE = REPO_ROOT / "colab_submission_script.py"
OUTPUT = REPO_ROOT / "notebooks" / "cdn_cache_optimizer_training.ipynb"
INTRO_MD = """\
# CDN Cache Optimizer β Training Notebook
OpenEnv-compliant reinforcement-learning agent for **edge CDN cache admission and eviction**.
Run **Runtime β Run all** in Colab to reproduce training, evaluation, schema-drift verification, and result charts in a single pass.
**Project links**
- Hugging Face Space: https://huggingface.co/spaces/umar-sharif821/cdn-cache-env-improvedone
- GitHub repo: https://github.com/umar-sharif821/cdn-cache-env-improvedone
**What this notebook does**
1. Bootstraps Colab (installs `gymnasium`, `torch`, `matplotlib`, `numpy`; mounts Drive if available).
2. Defines a `SchemaDriftGuard` that normalizes heterogeneous CDN log formats.
3. Builds an OpenEnv-compliant `CDNCacheEnv` (gymnasium 5-tuple, multi-component reward).
4. Trains a REINFORCE policy network.
5. Evaluates LRU baseline vs. the fine-tuned agent.
6. Saves `policy.pt`, `training_results.png`, `drift_report.json`, `metrics.json`.
**Reward function**
`R = w1 * Perf - w2 * Cost`, where `Perf` is edge-vs-origin latency savings and `Cost` is eviction churn + admitted bytes / capacity.
"""
STEP_TITLES = {
0: "Step 0 β Colab bootstrap (deps + Drive)",
1: "Step 1 β Imports & deterministic seeding",
2: "Step 2 β Schema Drift Guard",
3: "Step 3 β OpenEnv-compliant CDN cache environment",
4: "Step 4 β Policy network + REINFORCE training loop",
5: "Step 5 β Evaluation: LRU baseline vs fine-tuned agent",
6: "Step 6 β Comparison charts",
7: "Step 7 β Persist artifacts to Drive",
8: "Step 8 β Submission summary",
}
def make_code_cell(source: str) -> dict:
return {
"cell_type": "code",
"metadata": {},
"source": source,
"outputs": [],
"execution_count": None,
}
def make_md_cell(source: str) -> dict:
return {
"cell_type": "markdown",
"metadata": {},
"source": source,
}
def split_into_steps(text: str) -> list[tuple[int, str]]:
"""Return (step_index, body_without_banner) tuples in order."""
banner = re.compile(r"# ={5,}\n# STEP (\d+)[^\n]*\n# ={5,}\n")
matches = list(banner.finditer(text))
if not matches:
raise RuntimeError("No STEP banners found in source script.")
steps: list[tuple[int, str]] = []
for i, m in enumerate(matches):
step_idx = int(m.group(1))
start = m.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
body = text[start:end].strip("\n")
steps.append((step_idx, body))
return steps
def build_notebook() -> dict:
raw = SOURCE.read_text(encoding="utf-8")
docstring_match = re.match(r'"""(.*?)"""', raw, flags=re.DOTALL)
if docstring_match:
body = raw[docstring_match.end():].lstrip("\n")
else:
body = raw
steps = split_into_steps(body)
cells: list[dict] = [make_md_cell(INTRO_MD)]
for step_idx, code in steps:
title = STEP_TITLES.get(step_idx, f"Step {step_idx}")
cells.append(make_md_cell(f"## {title}"))
cells.append(make_code_cell(code))
return {
"nbformat": 4,
"nbformat_minor": 5,
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3",
},
"language_info": {
"name": "python",
"version": "3.11",
},
"colab": {"provenance": []},
},
"cells": cells,
}
def main() -> int:
OUTPUT.parent.mkdir(parents=True, exist_ok=True)
nb = build_notebook()
OUTPUT.write_text(json.dumps(nb, indent=2), encoding="utf-8")
print(f"Wrote {OUTPUT.relative_to(REPO_ROOT)} ({len(nb['cells'])} cells)")
return 0
if __name__ == "__main__":
raise SystemExit(main())
|