#!/usr/bin/env bash set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)" OUT_DIR="$ROOT_DIR/ckpts/rl" TIMESTEPS=${TIMESTEPS:-200000} ENV_ID=${ENV_ID:-CartPole-v1} mkdir -p "$OUT_DIR" python - "$OUT_DIR" "$TIMESTEPS" "$ENV_ID" <<'PY' import sys from stable_baselines3 import PPO import gymnasium as gym out_dir, total_steps, env_id = sys.argv[1], int(sys.argv[2]), sys.argv[3] env = gym.make(env_id) model = PPO("MlpPolicy", env, n_steps=2048, batch_size=64, learning_rate=3e-4, verbose=1, tensorboard_log=out_dir) model.learn(total_timesteps=total_steps) model.save(f"{out_dir}/policy") print("RL adapter saved") PY echo "RL adapter complete → $OUT_DIR/policy.zip"