#!/bin/bash # GridMind-RL Baseline Scorer # ---------------------------- # Runs two baseline policies (heuristic and zero-shot LLM) before training # and saves scores to results/ for comparison with post-training results. set -e mkdir -p results ENV_URL="${ENV_URL:-http://localhost:7860}" EPISODES="${EPISODES:-3}" echo "=== GridMind-RL Baseline Scorer ===" echo "Environment: $ENV_URL" echo "Episodes per task: $EPISODES" echo "" # --- Baseline 1: Heuristic Rule-Based Policy --- echo "▶ Running Heuristic Baseline (no LLM)..." python inference.py \ --fast-mode \ --episodes "$EPISODES" \ --env-url "$ENV_URL" \ --output results/baseline_heuristic.json echo "✅ Heuristic baseline saved to results/baseline_heuristic.json" echo "" # --- Baseline 2: Zero-Shot LLM (pre-training) --- echo "▶ Running Zero-Shot LLM Baseline (pre-training)..." python inference.py \ --episodes "$EPISODES" \ --env-url "$ENV_URL" \ --output results/baseline_zeroshot.json echo "✅ Zero-shot LLM baseline saved to results/baseline_zeroshot.json" echo "" # --- Print Summary --- echo "=== Baseline Summary ===" python - <<'EOF' import json, os for label, path in [("Heuristic", "results/baseline_heuristic.json"), ("Zero-Shot LLM", "results/baseline_zeroshot.json")]: if not os.path.exists(path): print(f" {label}: file not found") continue with open(path) as f: data = json.load(f) avgs = data.get("task_averages", {}) overall = data.get("overall_average", 0) print(f"\n {label}:") for tid in ["1","2","3"]: print(f" Task {tid}: {avgs.get(tid, 0):.4f}") print(f" Overall: {overall:.4f}") EOF echo "" echo "Run 'python scripts/train_unsloth.py' to start fine-tuning." echo "After training, compare scores with results/post_training.json."