Gridmind / scripts /run_baseline.sh
adityss's picture
feat: add baseline evaluation tools and demo scripts for RL performance comparison
c395f6a
#!/bin/bash
# GridMind-RL Baseline Scorer
# ----------------------------
# Runs two baseline policies (heuristic and zero-shot LLM) before training
# and saves scores to results/ for comparison with post-training results.
set -e
mkdir -p results
ENV_URL="${ENV_URL:-http://localhost:7860}"
EPISODES="${EPISODES:-3}"
echo "=== GridMind-RL Baseline Scorer ==="
echo "Environment: $ENV_URL"
echo "Episodes per task: $EPISODES"
echo ""
# --- Baseline 1: Heuristic Rule-Based Policy ---
echo "▶ Running Heuristic Baseline (no LLM)..."
python inference.py \
--fast-mode \
--episodes "$EPISODES" \
--env-url "$ENV_URL" \
--output results/baseline_heuristic.json
echo "✅ Heuristic baseline saved to results/baseline_heuristic.json"
echo ""
# --- Baseline 2: Zero-Shot LLM (pre-training) ---
echo "▶ Running Zero-Shot LLM Baseline (pre-training)..."
python inference.py \
--episodes "$EPISODES" \
--env-url "$ENV_URL" \
--output results/baseline_zeroshot.json
echo "✅ Zero-shot LLM baseline saved to results/baseline_zeroshot.json"
echo ""
# --- Print Summary ---
echo "=== Baseline Summary ==="
python - <<'EOF'
import json, os
for label, path in [("Heuristic", "results/baseline_heuristic.json"),
("Zero-Shot LLM", "results/baseline_zeroshot.json")]:
if not os.path.exists(path):
print(f" {label}: file not found")
continue
with open(path) as f:
data = json.load(f)
avgs = data.get("task_averages", {})
overall = data.get("overall_average", 0)
print(f"\n {label}:")
for tid in ["1","2","3"]:
print(f" Task {tid}: {avgs.get(tid, 0):.4f}")
print(f" Overall: {overall:.4f}")
EOF
echo ""
echo "Run 'python scripts/train_unsloth.py' to start fine-tuning."
echo "After training, compare scores with results/post_training.json."