Spaces:

Prajwal782007
/

Gridmind

Sleeping

App Files Files Community

Gridmind / scripts /run_baseline.sh

adityss

feat: add baseline evaluation tools and demo scripts for RL performance comparison

c395f6a 23 days ago

raw

history blame contribute delete

1.85 kB

	#!/bin/bash
	# GridMind-RL Baseline Scorer
	# ----------------------------
	# Runs two baseline policies (heuristic and zero-shot LLM) before training
	# and saves scores to results/ for comparison with post-training results.

	set -e
	mkdir -p results

	ENV_URL="${ENV_URL:-http://localhost:7860}"
	EPISODES="${EPISODES:-3}"

	echo "=== GridMind-RL Baseline Scorer ==="
	echo "Environment: $ENV_URL"
	echo "Episodes per task: $EPISODES"
	echo ""

	# --- Baseline 1: Heuristic Rule-Based Policy ---
	echo "▶ Running Heuristic Baseline (no LLM)..."
	python inference.py \
	--fast-mode \
	--episodes "$EPISODES" \
	--env-url "$ENV_URL" \
	--output results/baseline_heuristic.json

	echo "✅ Heuristic baseline saved to results/baseline_heuristic.json"
	echo ""

	# --- Baseline 2: Zero-Shot LLM (pre-training) ---
	echo "▶ Running Zero-Shot LLM Baseline (pre-training)..."
	python inference.py \
	--episodes "$EPISODES" \
	--env-url "$ENV_URL" \
	--output results/baseline_zeroshot.json

	echo "✅ Zero-shot LLM baseline saved to results/baseline_zeroshot.json"
	echo ""

	# --- Print Summary ---
	echo "=== Baseline Summary ==="
	python - <<'EOF'
	import json, os

	for label, path in [("Heuristic", "results/baseline_heuristic.json"),
	("Zero-Shot LLM", "results/baseline_zeroshot.json")]:
	if not os.path.exists(path):
	print(f" {label}: file not found")
	continue
	with open(path) as f:
	data = json.load(f)
	avgs = data.get("task_averages", {})
	overall = data.get("overall_average", 0)
	print(f"\n {label}:")
	for tid in ["1","2","3"]:
	print(f" Task {tid}: {avgs.get(tid, 0):.4f}")
	print(f" Overall: {overall:.4f}")
	EOF

	echo ""
	echo "Run 'python scripts/train_unsloth.py' to start fine-tuning."
	echo "After training, compare scores with results/post_training.json."