AutoDataLab2.0 / training /scripts /kaggle_run_all_1p5b_experiments.py
uchihamadara1816's picture
Upload 172 files
d02bacd verified
#!/usr/bin/env python3
"""
One-command Kaggle experiment runner for AutoDataLab++ Qwen2.5-1.5B.
It runs, sequentially:
1. SFT
2. DPO
3. SFT -> DPO
4. GRPO+RLVR continuation from SFT->DPO
5. GRPO continuation from SFT->DPO
6. PPO continuation from SFT->DPO
Every experiment evaluates both:
- non-RAG mode
- RAG mode
It writes one leaderboard:
/kaggle/working/cos_1p5b_all_runs/leaderboard.md
/kaggle/working/cos_1p5b_all_runs/leaderboard.csv
/kaggle/working/cos_1p5b_all_runs/leaderboard.json
Recommended Kaggle cell:
!python3 training/kaggle_run_all_1p5b_experiments.py --quick
If the best RL method beats SFT->DPO, keep it. If not, keep SFT->DPO and
report RL attempts as ablations.
"""
from __future__ import annotations
import argparse
import csv
import json
import os
import subprocess
import sys
from pathlib import Path
from typing import Any
REPO = Path(__file__).resolve().parents[2]
def run(cmd: list[str], cwd: Path) -> None:
print("\n" + "=" * 100, flush=True)
print("[cmd] " + " ".join(str(x) for x in cmd), flush=True)
print("=" * 100, flush=True)
subprocess.run(cmd, cwd=str(cwd), check=True)
def evidence_score(rows: list[dict[str, Any]]) -> float:
if not rows:
return -999.0
vals = []
for row in rows:
required = set(row.get("required_experts") or [])
routed = set(row.get("model_routed_required") or [])
coverage = len(required & routed) / max(len(required), 1)
no_fallback = 1.0 if not row.get("needed_fallback") else 0.0
policy_reward = float(row.get("policy_reward") or 0.0)
terminal = float(row.get("terminal_score") or 0.0)
vals.append(coverage + no_fallback + 0.1 * policy_reward + 0.05 * terminal)
return round(sum(vals) / len(vals), 4)
def collect_rows(root: Path, run_type: str, run_name: str, adapter: Path, eval_dir: Path) -> dict[str, Any]:
evidence_path = eval_dir / "evidence.json"
if not evidence_path.is_file():
return {
"run_type": run_type,
"run_name": run_name,
"adapter": str(adapter),
"eval_dir": str(eval_dir),
"score": -999.0,
"ok": False,
"rows": [],
}
rows = json.loads(evidence_path.read_text(encoding="utf-8"))
all_no_fallback = all(not r.get("needed_fallback") for r in rows)
all_full_coverage = all(
set(r.get("required_experts") or []).issubset(set(r.get("model_routed_required") or []))
for r in rows
)
return {
"run_type": run_type,
"run_name": run_name,
"adapter": str(adapter),
"eval_dir": str(eval_dir),
"score": evidence_score(rows),
"ok": bool(all_no_fallback and all_full_coverage),
"rows": rows,
}
def write_leaderboard(out_root: Path, summaries: list[dict[str, Any]]) -> None:
summaries = sorted(summaries, key=lambda x: x["score"], reverse=True)
out_root.mkdir(parents=True, exist_ok=True)
(out_root / "leaderboard.json").write_text(json.dumps(summaries, indent=2, default=str), encoding="utf-8")
with (out_root / "leaderboard.csv").open("w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(
f,
fieldnames=["rank", "run_type", "run_name", "score", "ok", "adapter", "eval_dir"],
)
writer.writeheader()
for i, row in enumerate(summaries, start=1):
writer.writerow(
{
"rank": i,
"run_type": row["run_type"],
"run_name": row["run_name"],
"score": row["score"],
"ok": row["ok"],
"adapter": row["adapter"],
"eval_dir": row["eval_dir"],
}
)
md = [
"# AutoDataLab++ 1.5B Training Leaderboard",
"",
"| Rank | Method | Run | Score | Full routing / no fallback | Adapter |",
"|---:|---|---|---:|---:|---|",
]
for i, row in enumerate(summaries, start=1):
md.append(
f"| {i} | {row['run_type']} | `{row['run_name']}` | {row['score']} | "
f"{row['ok']} | `{row['adapter']}` |"
)
if summaries:
best = summaries[0]
md += [
"",
"## Selected Best",
"",
f"- **Method:** {best['run_type']}",
f"- **Run:** `{best['run_name']}`",
f"- **Score:** {best['score']}",
f"- **Adapter:** `{best['adapter']}`",
f"- **Eval:** `{best['eval_dir']}`",
]
(out_root / "leaderboard.md").write_text("\n".join(md), encoding="utf-8")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--out-root", type=Path, default=Path("/kaggle/working/cos_1p5b_all_runs") if Path("/kaggle/working").is_dir() else Path("cos_1p5b_all_runs"))
ap.add_argument("--base-out-root", type=Path, default=Path("/kaggle/working/cos_1p5b_runs") if Path("/kaggle/working").is_dir() else Path("cos_1p5b_runs"))
ap.add_argument("--rl-out-root", type=Path, default=Path("/kaggle/working/cos_1p5b_rl_runs") if Path("/kaggle/working").is_dir() else Path("cos_1p5b_rl_runs"))
ap.add_argument("--hf-token", default=os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or "")
ap.add_argument("--model-id", default="Qwen/Qwen2.5-1.5B-Instruct")
ap.add_argument("--eval-tasks", default="expert_brief,risk_brief,crisis_brief")
ap.add_argument("--eval-rag-modes", default="false,true")
ap.add_argument("--quick", action="store_true", help="short but useful hackathon run")
ap.add_argument("--skip-sft", action="store_true")
ap.add_argument("--skip-dpo", action="store_true")
ap.add_argument("--skip-rl", action="store_true")
args = ap.parse_args()
py = sys.executable
common = [
"--model-id",
args.model_id,
"--eval-tasks",
args.eval_tasks,
"--eval-rag-modes",
args.eval_rag_modes,
"--hf-token",
args.hf_token,
]
base_common = common + ["--out-root", str(args.base_out_root)]
rl_common = common + ["--out-root", str(args.rl_out_root)]
if args.quick:
sft_epochs = "2"
dpo_epochs = "1"
rl_epochs = "1"
max_train_examples = "0"
max_train_states = "80"
else:
sft_epochs = "3"
dpo_epochs = "2"
rl_epochs = "2"
max_train_examples = "0"
max_train_states = "0"
summaries: list[dict[str, Any]] = []
if not args.skip_sft:
run(
[
py,
"training/scripts/kaggle_train_1p5b_methods.py",
"--method",
"sft",
"--epochs",
sft_epochs,
"--run-name",
"qwen15b_sft_all",
"--max-train-examples",
max_train_examples,
*base_common,
],
REPO,
)
summaries.append(
collect_rows(
args.out_root,
"sft",
"qwen15b_sft_all",
args.base_out_root / "qwen15b_sft_all" / "adapter",
args.base_out_root / "qwen15b_sft_all" / "eval",
)
)
if not args.skip_dpo:
run(
[
py,
"training/scripts/kaggle_train_1p5b_methods.py",
"--method",
"dpo",
"--epochs",
dpo_epochs,
"--run-name",
"qwen15b_dpo_all",
"--max-train-examples",
max_train_examples,
*base_common,
],
REPO,
)
summaries.append(
collect_rows(
args.out_root,
"dpo",
"qwen15b_dpo_all",
args.base_out_root / "qwen15b_dpo_all" / "adapter",
args.base_out_root / "qwen15b_dpo_all" / "eval",
)
)
run(
[
py,
"training/scripts/kaggle_train_1p5b_methods.py",
"--method",
"sft_then_dpo",
"--sft-epochs",
sft_epochs,
"--dpo-epochs",
dpo_epochs,
"--run-name",
"qwen15b_sft_then_dpo_all",
"--max-train-examples",
max_train_examples,
*base_common,
],
REPO,
)
sftdpo_adapter = args.base_out_root / "qwen15b_sft_then_dpo_all" / "adapter"
summaries.append(
collect_rows(
args.out_root,
"sft_then_dpo",
"qwen15b_sft_then_dpo_all",
sftdpo_adapter,
args.base_out_root / "qwen15b_sft_then_dpo_all" / "eval",
)
)
else:
sftdpo_adapter = args.base_out_root / "qwen15b_sft_then_dpo_all" / "adapter"
if not args.skip_rl:
for method, lr, anchor in [
("grpo_rlvr", "5e-6", "0.3"),
("grpo", "3e-6", "0.35"),
("ppo", "3e-6", "0.35"),
]:
run_name = f"qwen15b_{method}_safe_all"
run(
[
py,
"training/scripts/kaggle_rl_1p5b_methods.py",
"--method",
method,
"--init-adapter",
str(sftdpo_adapter),
"--epochs",
rl_epochs,
"--lr",
lr,
"--sft-anchor",
anchor,
"--max-train-states",
max_train_states,
"--run-name",
run_name,
*rl_common,
],
REPO,
)
summaries.append(
collect_rows(
args.out_root,
method,
run_name,
args.rl_out_root / run_name / "adapter",
args.rl_out_root / run_name / "eval",
)
)
write_leaderboard(args.out_root, summaries)
print("\n" + "=" * 100, flush=True)
print(f"[leaderboard] {args.out_root / 'leaderboard.md'}", flush=True)
print(f"[leaderboard] {args.out_root / 'leaderboard.csv'}", flush=True)
print(f"[leaderboard] {args.out_root / 'leaderboard.json'}", flush=True)
print("=" * 100, flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())