PYTHON ?= python MODEL ?= Qwen/Qwen2.5-Coder-7B-Instruct GROUP_SIZE ?= 4 MAX_TURNS ?= 12 NUM_STEPS ?= 100 SCENARIOS ?= hpc_outage,hpc_munge,hpc_pid_stale,hpc_gpu_ecc,hpc_nfs_stale,hpc_ood_apache ENV_URLS ?= RUN_DIR ?= ./runs/hpc_grpo .PHONY: help install bench gold eval demo train train-remote dry dry-remote serve clean reward-demo help: @echo "Targets for EnterpriseHPC-v0" @echo " make install install runtime + dev deps (pip install -e '.[dev]')" @echo " make install-train install runtime + dev + gpu training deps + unsloth" @echo " make bench reset-latency benchmark (200 iterations)" @echo " make gold prove every scenario is solvable (deterministic)" @echo " make eval run gold/random/bad policies + leaderboard.md" @echo " make demo gold trajectory run with transcripts printed" @echo " make dry local dry-run training rollout (no gpu)" @echo " make dry-remote dry-run against a hosted openenv space (set ENV_URLS=...)" @echo " make train full grpo training locally with qwen2.5-coder-7b" @echo " make train-remote full grpo training against ENV_URLS (hf spaces)" @echo " make serve run the openenv server on :8000" @echo " make reward-demo gpu-free curriculum reward curve png (no bwrap required)" @echo " make clean remove runs/ caches" install: $(PYTHON) -m pip install --upgrade pip setuptools wheel $(PYTHON) -m pip install -e '.[dev]' install-train: $(PYTHON) -m pip install -e '.[dev,train]' $(PYTHON) -m pip install --no-deps 'unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git' bench: $(PYTHON) -m bench.bench_reset -n 200 gold: $(PYTHON) -m tools.verify_gold_trajectory -v eval: $(PYTHON) -m eval.eval_suite --trials 3 --scenarios $(SCENARIOS) --output-dir ./runs/eval demo: gold @echo "see docs/pitch.md for the 3-minute demo script" dry: $(PYTHON) -m training.train_hpc_outage --dry-run \ --group-size $(GROUP_SIZE) --max-turns $(MAX_TURNS) \ --scenarios $(SCENARIOS) --output-dir $(RUN_DIR) dry-remote: @test -n "$(ENV_URLS)" || (echo "set ENV_URLS=https://... to target a hosted space" && exit 1) $(PYTHON) -m training.hpc_openenv_gemma --dry-run \ --env-urls $(ENV_URLS) --group-size $(GROUP_SIZE) --max-turns $(MAX_TURNS) \ --scenarios $(SCENARIOS) --output-dir $(RUN_DIR) train: $(PYTHON) -m training.train_hpc_outage \ --model $(MODEL) --group-size $(GROUP_SIZE) --max-turns $(MAX_TURNS) \ --num-train-steps $(NUM_STEPS) --scenarios $(SCENARIOS) \ --output-dir $(RUN_DIR) train-remote: @test -n "$(ENV_URLS)" || (echo "set ENV_URLS=https://... to target a hosted space" && exit 1) $(PYTHON) -m training.hpc_openenv_gemma \ --env-urls $(ENV_URLS) --model $(MODEL) \ --group-size $(GROUP_SIZE) --max-turns $(MAX_TURNS) \ --num-train-steps $(NUM_STEPS) --scenarios $(SCENARIOS) \ --output-dir $(RUN_DIR) serve: $(PYTHON) -m server.app --host 0.0.0.0 --port 8000 reward-demo: $(PYTHON) -m tools.reward_curve_demo --output-dir ./runs/reward_demo clean: rm -rf runs __pycache__ **/__pycache__ .pytest_cache