soc-openenv / validate.sh
Jayanth4577's picture
Add FastAPI server implementation and entry points
ab20014
Raw
History Blame Contribute Delete
8.8 kB
#!/usr/bin/env bash
# validate.sh β€” Pre-submission validator for SOC OpenEnv
# Run BEFORE submitting to catch all disqualifying issues.
#
# Usage:
# chmod +x validate.sh
# ./validate.sh # local checks only
# ./validate.sh https://your-space.hf.space # + HF Space ping
set -uo pipefail
PING_URL="${1:-}"
PASS=0; FAIL=0
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m'
pass() { echo -e "${GREEN}PASS${NC} $1"; PASS=$((PASS+1)); }
fail() { echo -e "${RED}FAIL${NC} $1"; FAIL=$((FAIL+1)); }
warn() { echo -e "${YELLOW}WARN${NC} $1"; }
hdr() { echo -e "\n${BOLD}── $1 ──${NC}"; }
# Python interpreter selection (override with PYTHON_BIN env var)
if [ -z "${PYTHON_BIN:-}" ]; then
if command -v python3 >/dev/null 2>&1; then
PYTHON_BIN="python3"
elif command -v python >/dev/null 2>&1; then
PYTHON_BIN="python"
elif [ -x "/mnt/c/Users/jayan/OneDrive/Documents/Projects/metaXrl/metaXrl/.venv/Scripts/python.exe" ]; then
PYTHON_BIN="/mnt/c/Users/jayan/OneDrive/Documents/Projects/metaXrl/metaXrl/.venv/Scripts/python.exe"
else
fail "No Python interpreter found. Set PYTHON_BIN to your project Python."
echo -e "${BOLD}========================================${NC}"
exit 1
fi
fi
echo -e "${BOLD}========================================${NC}"
echo -e "${BOLD} SOC OpenEnv β€” Pre-submission Validator${NC}"
echo -e "${BOLD}========================================${NC}"
# 1. Required files
hdr "1. Required files"
for f in openenv.yaml Dockerfile requirements.txt inference.py server.py README.md pyproject.toml \
soc_env/__init__.py soc_env/models.py soc_env/environment.py soc_env/graders.py \
scenarios/__init__.py scenarios/easy_scenarios.py scenarios/medium_scenarios.py scenarios/hard_scenarios.py \
tests/test_environment.py tests/test_graders.py validate.sh; do
[ -f "$f" ] && pass "$f" || fail "MISSING: $f"
done
# 2. openenv.yaml structure
hdr "2. openenv.yaml"
grep -q "^name:" openenv.yaml && pass "name field" || fail "name field missing"
grep -q "^tasks:" openenv.yaml && pass "tasks field" || fail "tasks field missing"
TC=$(grep -c "^ - id:" openenv.yaml 2>/dev/null || echo 0)
[ "$TC" -ge 3 ] && pass "3+ tasks ($TC)" || fail "Need 3+ tasks, found $TC"
grep -q "POST /reset" openenv.yaml && pass "reset endpoint" || fail "reset endpoint missing"
# 3. Python syntax
hdr "3. Python syntax"
for f in server.py inference.py soc_env/models.py soc_env/environment.py soc_env/graders.py \
scenarios/easy_scenarios.py scenarios/medium_scenarios.py scenarios/hard_scenarios.py; do
"$PYTHON_BIN" -m py_compile "$f" 2>/dev/null && pass "syntax OK: $f" || fail "syntax error: $f"
done
# 4. Environment contract
hdr "4. Environment contract (reset/step/state/grade)"
"$PYTHON_BIN" - <<'PYEOF'
import sys; sys.path.insert(0, '.')
from soc_env import SOCEnv, Action
from soc_env.models import ActionType, Observation, Reward, EnvState
errors = []
for task_id in SOCEnv.TASK_IDS:
try:
env = SOCEnv(task_id=task_id, seed=42)
obs = env.reset()
assert isinstance(obs, Observation)
assert obs.step == 0
action = Action(action_type=ActionType.ENRICH_ALERT,
alert_id=obs.active_alerts[0].alert_id if obs.active_alerts else None,
source="threat_intel")
obs2, reward, done, info = env.step(action)
assert isinstance(obs2, Observation)
assert isinstance(reward, Reward)
assert isinstance(done, bool)
assert -1.0 <= reward.total <= 1.0
s = env.state()
assert isinstance(s, EnvState)
score = env.grade()
assert 0.0 <= score <= 1.0
print(f" OK {task_id}: reward={reward.total:+.3f} score={score:.3f}")
except Exception as e:
errors.append(f" FAIL {task_id}: {e}")
for e in errors: print(e)
sys.exit(1 if errors else 0)
PYEOF
[ $? -eq 0 ] && pass "All 3 tasks: reset/step/state/grade" || fail "Environment contract failed"
# 5. Grader determinism
hdr "5. Grader determinism"
"$PYTHON_BIN" - <<'PYEOF'
import sys; sys.path.insert(0, '.')
from soc_env import SOCEnv, Action
from soc_env.models import ActionType
def run(task_id):
env = SOCEnv(task_id=task_id, seed=42); env.reset()
for _ in range(5):
s = env.state()
if s.done: break
alerts = s.observation.active_alerts
a = (Action(action_type=ActionType.ENRICH_ALERT, alert_id=alerts[0].alert_id, source="threat_intel")
if alerts else Action(action_type=ActionType.CREATE_TICKET, priority="P2", summary="done"))
_, _, done, _ = env.step(a)
if done: break
return env.grade()
errors = []
for t in SOCEnv.TASK_IDS:
s1, s2 = run(t), run(t)
if s1 == s2: print(f" OK {t}: {s1:.4f}")
else: errors.append(f" FAIL {t}: {s1} != {s2}")
for e in errors: print(e)
sys.exit(1 if errors else 0)
PYEOF
[ $? -eq 0 ] && pass "Graders deterministic" || fail "Graders NOT deterministic"
# 6. Scores vary
hdr "6. Scores vary across agents"
"$PYTHON_BIN" - <<'PYEOF'
import sys; sys.path.insert(0, '.')
from soc_env import SOCEnv, Action
from soc_env.models import ActionType
def trivial(t):
env = SOCEnv(task_id=t, seed=42); env.reset()
env.step(Action(action_type=ActionType.CREATE_TICKET, priority="P3", summary="x")); return env.grade()
def active(t):
env = SOCEnv(task_id=t, seed=42); env.reset()
for _ in range(8):
s = env.state()
if s.done: break
alerts = s.observation.active_alerts
a = (Action(action_type=ActionType.ENRICH_ALERT, alert_id=alerts[0].alert_id, source="threat_intel")
if alerts else Action(action_type=ActionType.CREATE_TICKET, priority="P1", summary="done"))
_, _, done, _ = env.step(a);
if done: break
return env.grade()
pairs = [(trivial(t), active(t)) for t in SOCEnv.TASK_IDS]
if all(a == b for a, b in pairs): print(" WARN: all scores identical"); sys.exit(1)
else: [print(f" OK trivial={a:.3f} active={b:.3f}") for a, b in pairs]
sys.exit(0)
PYEOF
[ $? -eq 0 ] && pass "Scores vary" || fail "Scores don't vary β€” check graders"
# 7. inference.py requirements
hdr "7. inference.py spec compliance"
grep -q "API_BASE_URL" inference.py && pass "API_BASE_URL" || fail "API_BASE_URL missing"
grep -q "MODEL_NAME" inference.py && pass "MODEL_NAME" || fail "MODEL_NAME missing"
grep -q "HF_TOKEN" inference.py && pass "HF_TOKEN" || fail "HF_TOKEN missing"
grep -q "OpenAI" inference.py && pass "OpenAI client" || fail "OpenAI client missing"
# 8. Dockerfile
hdr "8. Dockerfile"
grep -q "7860" Dockerfile && pass "Port 7860" || fail "Port 7860 not exposed"
grep -q "HEALTHCHECK" Dockerfile && pass "HEALTHCHECK" || warn "No HEALTHCHECK"
grep -q "^CMD" Dockerfile && pass "CMD present" || fail "No CMD"
if command -v docker &>/dev/null; then
if docker info >/dev/null 2>&1; then
echo " Building Docker image (may take 1-2 min)..."
docker build -t soc-openenv-validate . -q 2>/dev/null \
&& pass "docker build succeeded" \
&& docker rmi soc-openenv-validate -f &>/dev/null \
|| fail "docker build FAILED β€” run 'docker build .' for details"
else
warn "Docker CLI found but daemon unavailable in this shell β€” skipping build check."
fi
elif command -v docker.exe &>/dev/null; then
if docker.exe info >/dev/null 2>&1; then
echo " Building Docker image via docker.exe (may take 1-2 min)..."
docker.exe build -t soc-openenv-validate . -q 2>/dev/null \
&& pass "docker build succeeded" \
&& docker.exe rmi soc-openenv-validate -f &>/dev/null \
|| fail "docker build FAILED β€” run 'docker build .' for details"
else
warn "docker.exe found but daemon unavailable β€” skipping build check."
fi
else
warn "Docker not installed/available in this shell β€” skipping build check."
fi
# 9. HF Space (optional)
hdr "9. HF Space ping"
if [ -z "$PING_URL" ]; then
warn "Skipped β€” run: ./validate.sh https://your-space.hf.space"
else
CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H "Content-Type: application/json" \
-d '{"task_id":"alert_triage"}' "$PING_URL/reset" --max-time 30 2>/dev/null || echo "000")
[ "$CODE" = "200" ] && pass "HF Space /reset returned 200" || fail "HF Space returned $CODE (need 200)"
fi
# Summary
echo ""
echo -e "${BOLD}========================================${NC}"
if [ "$FAIL" -eq 0 ]; then
echo -e "${GREEN}${BOLD} ALL CHECKS PASSED ($PASS passed)${NC}"
echo -e "${GREEN}${BOLD} Ready to submit!${NC}"
else
echo -e "${RED}${BOLD} $FAIL FAILED, $PASS passed β€” fix before submitting${NC}"
fi
echo -e "${BOLD}========================================${NC}"
exit $FAIL