neuro / scripts /rl_adapt.sh
Evogoatml's picture
initial build
b308b74
raw
history blame contribute delete
697 Bytes
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)"
OUT_DIR="$ROOT_DIR/ckpts/rl"
TIMESTEPS=${TIMESTEPS:-200000}
ENV_ID=${ENV_ID:-CartPole-v1}
mkdir -p "$OUT_DIR"
python - "$OUT_DIR" "$TIMESTEPS" "$ENV_ID" <<'PY'
import sys
from stable_baselines3 import PPO
import gymnasium as gym
out_dir, total_steps, env_id = sys.argv[1], int(sys.argv[2]), sys.argv[3]
env = gym.make(env_id)
model = PPO("MlpPolicy", env, n_steps=2048, batch_size=64, learning_rate=3e-4, verbose=1, tensorboard_log=out_dir)
model.learn(total_timesteps=total_steps)
model.save(f"{out_dir}/policy")
print("RL adapter saved")
PY
echo "RL adapter complete → $OUT_DIR/policy.zip"