meta-r2 / baseline.json
github-actions[bot]
Deploy Space snapshot
ddbc1ba
{
"schema": "lifestack_baseline_eval_v1",
"note": "Base model eval (no LoRA). 50 episodes, same schedule as evaluate_and_plot. Per-episode rows omitted; re-run: python scripts/eval_baseline.py --output baseline_results.json",
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"load_method": "transformers:Qwen/Qwen2.5-1.5B-Instruct",
"environment": "linux GPU server, HF fallback (Unsloth unavailable due to TRL import mismatch)",
"n_episodes": 50,
"mean_reward": -0.07,
"per_domain": {
"career": { "n": 7, "mean": -0.1429 },
"finances": { "n": 7, "mean": 0.0 },
"relationships": { "n": 6, "mean": 0.0 },
"physical_health": { "n": 6, "mean": -0.1667 },
"mental_wellbeing": { "n": 6, "mean": -0.25 },
"time": { "n": 6, "mean": 0.0 },
"transport_crisis": { "n": 6, "mean": 0.0 },
"code_merge_crisis": { "n": 6, "mean": 0.0 }
},
"all_domains_order": [
"career",
"finances",
"relationships",
"physical_health",
"mental_wellbeing",
"time",
"transport_crisis",
"code_merge_crisis"
]
}