File size: 3,668 Bytes
7e9a520 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | PROJECT ?= cloudsre-v3-amd
REGION ?= us-central1
CLUSTER ?= atlasops
# ββ Cluster lifecycle ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: up down status
up:
bash infra/setup.sh $(PROJECT) $(REGION) $(CLUSTER)
down:
bash infra/teardown.sh $(PROJECT) $(REGION) $(CLUSTER)
status:
kubectl get pods -A --context=gke_$(PROJECT)_$(REGION)_$(CLUSTER)
# ββ Chaos injection ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: chaos chaos-reset
chaos:
@if [ -z "$(SCENARIO)" ]; then echo "Usage: make chaos SCENARIO=sf-001"; exit 1; fi
@MANIFEST=$$(find bench/chaos_manifests -name "$(SCENARIO).yaml" | head -1); \
if [ -z "$$MANIFEST" ]; then echo "Scenario $(SCENARIO) not found"; exit 1; fi; \
echo "Applying chaos: $$MANIFEST"; \
kubectl apply -f $$MANIFEST
chaos-reset:
kubectl delete podchaos,networkchaos,stresschaos,dnschaos,iochaos,timechaos --all -A 2>/dev/null || true
# ββ Historical replays βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
replay-%:
kubectl apply -f bench/chaos_manifests/named_replays/$*.yaml
@echo "Replay $* triggered. Watch: make status"
# ββ Agent runtime ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: coordinator
coordinator:
python agents/coordinator.py
# ββ Benchmark βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: bench bench-baseline
bench:
python bench/runner.py --model $(MODEL) --output bench/results/$(shell date +%Y%m%d_%H%M%S)
bench-baseline:
python bench/runner.py --model checkpoints/cloudsre_v2_baseline --tag baseline_v2 \
--output bench/results/baseline_v2
# ββ Training βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: sft grpo trajectories
trajectories:
python training/generate_trajectories.py --output data/sft_corpus.jsonl
sft:
python training/sft.py \
--model Qwen/Qwen2.5-7B-Instruct \
--data data/sft_corpus.jsonl \
--output checkpoints/sft_v3
grpo:
python training/grpo.py \
--model checkpoints/sft_v3 \
--output checkpoints/grpo_v3 \
--tiers cascade,multi_fault,named_replays
# ββ Dashboard βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: dashboard
dashboard:
python dashboard.py
# ββ Linting / tests βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
.PHONY: lint test release-gate smoke-e2e-local
lint:
ruff check .
test:
pytest tests/ -v
release-gate:
python scripts/release_gate.py --strict --output docs/RELEASE_READINESS.md
smoke-e2e-local:
pytest tests/test_app_endpoints.py tests/test_coordinator.py tests/test_tools.py tests/test_bench_runner.py -q
|