Spaces:

agarwalanu3103
/

clarify-rl

Sleeping

App Files Files Community

clarify-rl / scripts /refresh_all_plots.sh

agarwalanu3103

plots: add training progression + diagnostics, drop W&B links

099bec8 verified about 1 month ago

raw

history blame contribute delete

6.08 kB

	#!/usr/bin/env bash
	# Download all eval JSONs + log_history files from the Hub, then regenerate
	# the 6 submission plots with the same-base before/after comparison.
	#
	# Usage:
	# HF_TOKEN=hf_... (Acct 1: agarwalanu3103) used by default
	# HF_TOKEN_KANAN=hf_... (Acct 2: Kanan2005, owns Run 3 repo) — optional
	# HF_TOKEN_MNIT=hf_... (Acct 3: 2022uec1542, owns Run 4 repo) — optional
	# ./scripts/refresh_all_plots.sh
	#
	# Repos covered:
	# Run 1: agarwalanu3103/clarify-rl-grpo-qwen3-0-6b — 0.6B GRPO (beta=0, LR=1e-6)
	# Run 2: agarwalanu3103/clarify-rl-grpo-qwen3-1-7b — 1.7B GRPO (beta=0, LR=1e-6)
	# Run 3: Kanan2005/clarify-rl-grpo-qwen3-4b — 4B GRPO (beta=0, LR=1e-6)
	# Run 4: 2022uec1542/clarify-rl-grpo-qwen3-1-7b — 1.7B GRPO (beta=0.2, LR=5e-7) — KL-anchored

	set -euo pipefail

	: "${HF_TOKEN:?HF_TOKEN required (Acct 1: agarwalanu3103)}"

	OUT=outputs/run_artifacts
	mkdir -p "$OUT" plots

	source .venv/bin/activate

	# Forward all three tokens into the python pull so each repo can be reached
	# with the right credentials. Kanan/MNIT are optional — we skip those repos
	# silently if not provided.
	HF_TOKEN_KANAN="${HF_TOKEN_KANAN:-}" \
	HF_TOKEN_MNIT="${HF_TOKEN_MNIT:-}" \
	HF_TOKEN_AGARWAL="${HF_TOKEN}" \
	python <<'PY'
	import os
	import json
	import truststore; truststore.inject_into_ssl()
	from huggingface_hub import HfApi, hf_hub_download
	from pathlib import Path

	out = Path("outputs/run_artifacts")
	out.mkdir(parents=True, exist_ok=True)

	# Map: short label → (repo_id, env-var name holding the token for that repo)
	REPOS = {
	"0.6B": ("agarwalanu3103/clarify-rl-grpo-qwen3-0-6b", "HF_TOKEN_AGARWAL"),
	"1.7B": ("agarwalanu3103/clarify-rl-grpo-qwen3-1-7b", "HF_TOKEN_AGARWAL"),
	"4B": ("Kanan2005/clarify-rl-grpo-qwen3-4b", "HF_TOKEN_KANAN"),
	"1.7B-KL": ("2022uec1542/clarify-rl-grpo-qwen3-1-7b", "HF_TOKEN_MNIT"),
	}

	for size, (repo, token_var) in REPOS.items():
	token = os.environ.get(token_var) or ""
	if not token:
	print(f"[skip] {size} {repo}: {token_var} not set")
	continue
	api = HfApi(token=token)
	try:
	files = api.list_repo_files(repo)
	except Exception as exc:
	print(f"[skip] {repo}: {exc}")
	continue

	for f in files:
	if f.startswith("evals/") or f in ("log_history.json", "training_summary.json"):
	try:
	local = hf_hub_download(
	repo_id=repo,
	filename=f,
	token=token,
	local_dir=str(out / size),
	)
	print(f"[ok] {size}/{f}")
	except Exception as exc:
	print(f"[err] {size}/{f}: {exc}")
	PY

	echo
	echo "Files now under outputs/run_artifacts:"
	find outputs/run_artifacts -name '*.json' \| sort

	# Build the eval-flag list for whatever was actually downloaded.
	EVAL_FLAGS=()
	add_if() {
	if [ -f "$1" ]; then
	EVAL_FLAGS+=(--eval "$2=$1")
	echo " + ${2}: ${1}"
	fi
	}

	echo
	echo "Building eval list:"
	add_if outputs/eval_policy_v4.json "policy (deterministic)"
	add_if outputs/run_artifacts/v4/evals/eval_qwen3-0.6b_n50_v4.json "0.6B base"
	add_if outputs/run_artifacts/v4/evals/eval_clarify-rl-grpo-qwen3-0-6b_n50_v4.json "0.6B GRPO (Run 1)"
	add_if outputs/run_artifacts/v4/evals/eval_qwen3-1.7b_n50_v4.json "1.7B base"
	add_if outputs/run_artifacts/1.7B/evals/eval_clarify-rl-grpo-qwen3-1-7b_n50.json "1.7B GRPO no-KL (Run 2)"
	add_if outputs/run_artifacts/4B-base/evals/eval_qwen3-4b_qwen3-4b-base_n50_v4.json "4B base"
	# After Run 4 finishes its first eval, refresh_all_plots.sh will pick up the new
	# JSON automatically; until then it's silently skipped.
	for f in outputs/run_artifacts/1.7B-KL/evals/eval_*_n50_v4.json; do
	[ -f "$f" ] && add_if "$f" "1.7B GRPO +KL (Run 4)"
	done
	for f in outputs/run_artifacts/4B/evals/eval_*_n50_v4.json; do
	[ -f "$f" ] && add_if "$f" "4B GRPO (Run 3)"
	done
	add_if outputs/eval_qwen3-4b-instruct_n50_v4.json "4B-instruct"

	LOG_FLAGS=()
	[ -f outputs/run1_artifacts/log_history.json ] && LOG_FLAGS+=(--log-history "0.6B GRPO (Run 1, beta=0)=outputs/run1_artifacts/log_history.json")
	[ -f outputs/run_artifacts/1.7B/log_history.json ] && LOG_FLAGS+=(--log-history "1.7B GRPO (Run 2, beta=0)=outputs/run_artifacts/1.7B/log_history.json")
	[ -f outputs/run2_artifacts/log_history_partial.json ] && [ ! -f outputs/run_artifacts/1.7B/log_history.json ] && LOG_FLAGS+=(--log-history "1.7B GRPO (Run 2, in progress)=outputs/run2_artifacts/log_history_partial.json")
	[ -f outputs/run_artifacts/4B/log_history.json ] && LOG_FLAGS+=(--log-history "4B GRPO (Run 3, beta=0)=outputs/run_artifacts/4B/log_history.json")
	[ -f outputs/run_artifacts/1.7B-KL/log_history.json ] && LOG_FLAGS+=(--log-history "1.7B GRPO (Run 4, beta=0.2)=outputs/run_artifacts/1.7B-KL/log_history.json")
	# Until each run's final log_history.json is pushed to its repo, fall back to
	# the partial JSON that monitor_training scrapes from live job logs. This way
	# the reward curves stay current even mid-training.
	[ -f outputs/run4_artifacts/log_history_partial.json ] && [ ! -f outputs/run_artifacts/1.7B-KL/log_history.json ] && LOG_FLAGS+=(--log-history "1.7B GRPO (Run 4, beta=0.2 in-progress)=outputs/run4_artifacts/log_history_partial.json")
	[ -f outputs/run3_artifacts/log_history_partial.json ] && [ ! -f outputs/run_artifacts/4B/log_history.json ] && LOG_FLAGS+=(--log-history "4B GRPO (Run 3, beta=0 in-progress)=outputs/run3_artifacts/log_history_partial.json")

	echo
	echo "Running make_plots.py:"
	python scripts/make_plots.py "${LOG_FLAGS[@]}" "${EVAL_FLAGS[@]}" --out-dir plots

	# Also build the hackathon-narrative plots: per-family same-base delta and
	# the 4-run summary table. These need richer logic than make_plots.py supports,
	# so they live in compare_runs.py.
	echo
	echo "Running compare_runs.py:"
	python scripts/compare_runs.py --out-dir plots