Spaces:
Sleeping
Sleeping
File size: 8,800 Bytes
08bd7cf ab20014 08bd7cf ab20014 08bd7cf ab20014 08bd7cf ab20014 08bd7cf ab20014 08bd7cf ab20014 08bd7cf ab20014 08bd7cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | #!/usr/bin/env bash
# validate.sh β Pre-submission validator for SOC OpenEnv
# Run BEFORE submitting to catch all disqualifying issues.
#
# Usage:
# chmod +x validate.sh
# ./validate.sh # local checks only
# ./validate.sh https://your-space.hf.space # + HF Space ping
set -uo pipefail
PING_URL="${1:-}"
PASS=0; FAIL=0
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m'
pass() { echo -e "${GREEN}PASS${NC} $1"; PASS=$((PASS+1)); }
fail() { echo -e "${RED}FAIL${NC} $1"; FAIL=$((FAIL+1)); }
warn() { echo -e "${YELLOW}WARN${NC} $1"; }
hdr() { echo -e "\n${BOLD}ββ $1 ββ${NC}"; }
# Python interpreter selection (override with PYTHON_BIN env var)
if [ -z "${PYTHON_BIN:-}" ]; then
if command -v python3 >/dev/null 2>&1; then
PYTHON_BIN="python3"
elif command -v python >/dev/null 2>&1; then
PYTHON_BIN="python"
elif [ -x "/mnt/c/Users/jayan/OneDrive/Documents/Projects/metaXrl/metaXrl/.venv/Scripts/python.exe" ]; then
PYTHON_BIN="/mnt/c/Users/jayan/OneDrive/Documents/Projects/metaXrl/metaXrl/.venv/Scripts/python.exe"
else
fail "No Python interpreter found. Set PYTHON_BIN to your project Python."
echo -e "${BOLD}========================================${NC}"
exit 1
fi
fi
echo -e "${BOLD}========================================${NC}"
echo -e "${BOLD} SOC OpenEnv β Pre-submission Validator${NC}"
echo -e "${BOLD}========================================${NC}"
# 1. Required files
hdr "1. Required files"
for f in openenv.yaml Dockerfile requirements.txt inference.py server.py README.md pyproject.toml \
soc_env/__init__.py soc_env/models.py soc_env/environment.py soc_env/graders.py \
scenarios/__init__.py scenarios/easy_scenarios.py scenarios/medium_scenarios.py scenarios/hard_scenarios.py \
tests/test_environment.py tests/test_graders.py validate.sh; do
[ -f "$f" ] && pass "$f" || fail "MISSING: $f"
done
# 2. openenv.yaml structure
hdr "2. openenv.yaml"
grep -q "^name:" openenv.yaml && pass "name field" || fail "name field missing"
grep -q "^tasks:" openenv.yaml && pass "tasks field" || fail "tasks field missing"
TC=$(grep -c "^ - id:" openenv.yaml 2>/dev/null || echo 0)
[ "$TC" -ge 3 ] && pass "3+ tasks ($TC)" || fail "Need 3+ tasks, found $TC"
grep -q "POST /reset" openenv.yaml && pass "reset endpoint" || fail "reset endpoint missing"
# 3. Python syntax
hdr "3. Python syntax"
for f in server.py inference.py soc_env/models.py soc_env/environment.py soc_env/graders.py \
scenarios/easy_scenarios.py scenarios/medium_scenarios.py scenarios/hard_scenarios.py; do
"$PYTHON_BIN" -m py_compile "$f" 2>/dev/null && pass "syntax OK: $f" || fail "syntax error: $f"
done
# 4. Environment contract
hdr "4. Environment contract (reset/step/state/grade)"
"$PYTHON_BIN" - <<'PYEOF'
import sys; sys.path.insert(0, '.')
from soc_env import SOCEnv, Action
from soc_env.models import ActionType, Observation, Reward, EnvState
errors = []
for task_id in SOCEnv.TASK_IDS:
try:
env = SOCEnv(task_id=task_id, seed=42)
obs = env.reset()
assert isinstance(obs, Observation)
assert obs.step == 0
action = Action(action_type=ActionType.ENRICH_ALERT,
alert_id=obs.active_alerts[0].alert_id if obs.active_alerts else None,
source="threat_intel")
obs2, reward, done, info = env.step(action)
assert isinstance(obs2, Observation)
assert isinstance(reward, Reward)
assert isinstance(done, bool)
assert -1.0 <= reward.total <= 1.0
s = env.state()
assert isinstance(s, EnvState)
score = env.grade()
assert 0.0 <= score <= 1.0
print(f" OK {task_id}: reward={reward.total:+.3f} score={score:.3f}")
except Exception as e:
errors.append(f" FAIL {task_id}: {e}")
for e in errors: print(e)
sys.exit(1 if errors else 0)
PYEOF
[ $? -eq 0 ] && pass "All 3 tasks: reset/step/state/grade" || fail "Environment contract failed"
# 5. Grader determinism
hdr "5. Grader determinism"
"$PYTHON_BIN" - <<'PYEOF'
import sys; sys.path.insert(0, '.')
from soc_env import SOCEnv, Action
from soc_env.models import ActionType
def run(task_id):
env = SOCEnv(task_id=task_id, seed=42); env.reset()
for _ in range(5):
s = env.state()
if s.done: break
alerts = s.observation.active_alerts
a = (Action(action_type=ActionType.ENRICH_ALERT, alert_id=alerts[0].alert_id, source="threat_intel")
if alerts else Action(action_type=ActionType.CREATE_TICKET, priority="P2", summary="done"))
_, _, done, _ = env.step(a)
if done: break
return env.grade()
errors = []
for t in SOCEnv.TASK_IDS:
s1, s2 = run(t), run(t)
if s1 == s2: print(f" OK {t}: {s1:.4f}")
else: errors.append(f" FAIL {t}: {s1} != {s2}")
for e in errors: print(e)
sys.exit(1 if errors else 0)
PYEOF
[ $? -eq 0 ] && pass "Graders deterministic" || fail "Graders NOT deterministic"
# 6. Scores vary
hdr "6. Scores vary across agents"
"$PYTHON_BIN" - <<'PYEOF'
import sys; sys.path.insert(0, '.')
from soc_env import SOCEnv, Action
from soc_env.models import ActionType
def trivial(t):
env = SOCEnv(task_id=t, seed=42); env.reset()
env.step(Action(action_type=ActionType.CREATE_TICKET, priority="P3", summary="x")); return env.grade()
def active(t):
env = SOCEnv(task_id=t, seed=42); env.reset()
for _ in range(8):
s = env.state()
if s.done: break
alerts = s.observation.active_alerts
a = (Action(action_type=ActionType.ENRICH_ALERT, alert_id=alerts[0].alert_id, source="threat_intel")
if alerts else Action(action_type=ActionType.CREATE_TICKET, priority="P1", summary="done"))
_, _, done, _ = env.step(a);
if done: break
return env.grade()
pairs = [(trivial(t), active(t)) for t in SOCEnv.TASK_IDS]
if all(a == b for a, b in pairs): print(" WARN: all scores identical"); sys.exit(1)
else: [print(f" OK trivial={a:.3f} active={b:.3f}") for a, b in pairs]
sys.exit(0)
PYEOF
[ $? -eq 0 ] && pass "Scores vary" || fail "Scores don't vary β check graders"
# 7. inference.py requirements
hdr "7. inference.py spec compliance"
grep -q "API_BASE_URL" inference.py && pass "API_BASE_URL" || fail "API_BASE_URL missing"
grep -q "MODEL_NAME" inference.py && pass "MODEL_NAME" || fail "MODEL_NAME missing"
grep -q "HF_TOKEN" inference.py && pass "HF_TOKEN" || fail "HF_TOKEN missing"
grep -q "OpenAI" inference.py && pass "OpenAI client" || fail "OpenAI client missing"
# 8. Dockerfile
hdr "8. Dockerfile"
grep -q "7860" Dockerfile && pass "Port 7860" || fail "Port 7860 not exposed"
grep -q "HEALTHCHECK" Dockerfile && pass "HEALTHCHECK" || warn "No HEALTHCHECK"
grep -q "^CMD" Dockerfile && pass "CMD present" || fail "No CMD"
if command -v docker &>/dev/null; then
if docker info >/dev/null 2>&1; then
echo " Building Docker image (may take 1-2 min)..."
docker build -t soc-openenv-validate . -q 2>/dev/null \
&& pass "docker build succeeded" \
&& docker rmi soc-openenv-validate -f &>/dev/null \
|| fail "docker build FAILED β run 'docker build .' for details"
else
warn "Docker CLI found but daemon unavailable in this shell β skipping build check."
fi
elif command -v docker.exe &>/dev/null; then
if docker.exe info >/dev/null 2>&1; then
echo " Building Docker image via docker.exe (may take 1-2 min)..."
docker.exe build -t soc-openenv-validate . -q 2>/dev/null \
&& pass "docker build succeeded" \
&& docker.exe rmi soc-openenv-validate -f &>/dev/null \
|| fail "docker build FAILED β run 'docker build .' for details"
else
warn "docker.exe found but daemon unavailable β skipping build check."
fi
else
warn "Docker not installed/available in this shell β skipping build check."
fi
# 9. HF Space (optional)
hdr "9. HF Space ping"
if [ -z "$PING_URL" ]; then
warn "Skipped β run: ./validate.sh https://your-space.hf.space"
else
CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H "Content-Type: application/json" \
-d '{"task_id":"alert_triage"}' "$PING_URL/reset" --max-time 30 2>/dev/null || echo "000")
[ "$CODE" = "200" ] && pass "HF Space /reset returned 200" || fail "HF Space returned $CODE (need 200)"
fi
# Summary
echo ""
echo -e "${BOLD}========================================${NC}"
if [ "$FAIL" -eq 0 ]; then
echo -e "${GREEN}${BOLD} ALL CHECKS PASSED ($PASS passed)${NC}"
echo -e "${GREEN}${BOLD} Ready to submit!${NC}"
else
echo -e "${RED}${BOLD} $FAIL FAILED, $PASS passed β fix before submitting${NC}"
fi
echo -e "${BOLD}========================================${NC}"
exit $FAIL
|