neuralese_temp / scripts /sweep_length_penalty_lambda.sh
psidharth567's picture
Export neuralese codebase (cache and .env excluded).
dbc69f3
#!/usr/bin/env bash
set -eo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/conda_env.sh"
set -u
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
cd "${REPO_ROOT}"
export PYTHONPATH="${REPO_ROOT}/src"
# Sweep two GRPO runs over length_penalty_lambda:
# - 0.5
# - 0.25
#
# Each run keeps all other settings identical to BASE_CONFIG.
BASE_CONFIG="${BASE_CONFIG:-configs/grpo_llama32_3b_bf16.yaml}"
ACCELERATE_CONFIG="${ACCELERATE_CONFIG:-configs/accelerate_ddp_4gpu.yaml}"
NUM_PROCESSES="${NUM_PROCESSES:-4}"
PYTHON_BIN="${PYTHON_BIN:-python}"
OUT_ROOT="${OUT_ROOT:-artifacts/sweeps/length_penalty_lambda}"
LAMBDAS="${LAMBDAS:-0.5 0.25}"
export WANDB_MODE="${WANDB_MODE:-offline}"
export HF_HUB_OFFLINE="${HF_HUB_OFFLINE:-1}"
export HF_DATASETS_OFFLINE="${HF_DATASETS_OFFLINE:-1}"
export TRANSFORMERS_OFFLINE="${TRANSFORMERS_OFFLINE:-1}"
mkdir -p "${OUT_ROOT}"
TMP_DIR="$(mktemp -d "${OUT_ROOT}/tmp_cfgs.XXXXXX")"
trap 'rm -rf "${TMP_DIR}"' EXIT
echo "Base config: ${BASE_CONFIG}"
echo "Lambdas: ${LAMBDAS}"
echo "Output root: ${OUT_ROOT}"
for LAMBDA in ${LAMBDAS}; do
echo
echo "=== Running lambda=${LAMBDA} ==="
CFG_PATH="${TMP_DIR}/grpo_lambda_${LAMBDA}.yaml"
BASE_CONFIG="${BASE_CONFIG}" OUT_ROOT="${OUT_ROOT}" LAMBDA="${LAMBDA}" CFG_PATH="${CFG_PATH}" "${PYTHON_BIN}" - <<'PY'
import copy
import os
from pathlib import Path
import yaml
base_config = Path(os.environ["BASE_CONFIG"])
out_root = Path(os.environ["OUT_ROOT"])
cfg_path = Path(os.environ["CFG_PATH"])
lam = os.environ["LAMBDA"]
with base_config.open("r", encoding="utf-8") as handle:
cfg = yaml.safe_load(handle)
cfg = copy.deepcopy(cfg)
cfg.setdefault("objective", {})
cfg["objective"].setdefault("kwargs", {})
cfg["objective"]["kwargs"]["enable_length_penalty"] = True
cfg["objective"]["kwargs"]["reward_mode"] = "weighted_length_penalty"
cfg["objective"]["kwargs"]["length_penalty_lambda"] = float(lam)
cfg.setdefault("trainer", {})
base_run_name = cfg["trainer"].get("run_name", "grpo")
safe_lam = lam.replace(".", "p")
cfg["trainer"]["run_name"] = f"{base_run_name}-lambda-{safe_lam}"
cfg["trainer"]["output_dir"] = str(out_root / f"run_lambda_{safe_lam}")
with cfg_path.open("w", encoding="utf-8") as handle:
yaml.safe_dump(cfg, handle, sort_keys=False)
print(f"Wrote config: {cfg_path}")
print(f"run_name: {cfg['trainer']['run_name']}")
print(f"output_dir: {cfg['trainer']['output_dir']}")
PY
accelerate launch \
--config_file "${ACCELERATE_CONFIG}" \
--num_processes "${NUM_PROCESSES}" \
src/train_grpo.py \
--config "${CFG_PATH}"
done
echo
echo "Sweep complete. Runs are under: ${OUT_ROOT}"