graphstrike / validate.py
Pandago's picture
Upload folder using huggingface_hub
50f71a7 verified
"""Pre-submission validator for the Fake Gang Detection OpenEnv environment.
Checks all submission requirements and prints pass/fail for each.
Exits 0 if all checks pass, 1 if any fail.
Usage:
python validate.py # server must be running on :8000
python validate.py --url http://host:8001
python validate.py --local # skip HTTP checks, test locally
"""
from __future__ import annotations
import argparse
import importlib
import json
import sys
import time
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
_ROOT = Path(__file__).parent
sys.path.insert(0, str(_ROOT))
sys.path.insert(0, str(_ROOT / "server"))
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_PASS = "[PASS]"
_FAIL = "[FAIL]"
_results: List[Tuple[bool, str]] = []
def check(name: str, ok: bool, detail: str = "") -> bool:
tag = _PASS if ok else _FAIL
line = f" {tag} {name}"
if detail and not ok:
line += f"\n → {detail}"
print(line)
_results.append((ok, name))
return ok
def _get(url: str) -> Tuple[Optional[dict], Optional[str]]:
try:
with urllib.request.urlopen(url, timeout=10) as r:
return json.loads(r.read()), None
except Exception as exc:
return None, str(exc)
def _post(url: str, body: Any = None) -> Tuple[Optional[dict], Optional[str]]:
try:
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
url, data=data, headers={"Content-Type": "application/json"}, method="POST"
)
with urllib.request.urlopen(req, timeout=30) as r:
return json.loads(r.read()), None
except urllib.error.HTTPError as exc:
try:
body_bytes = exc.read()
return None, f"HTTP {exc.code}: {body_bytes.decode()}"
except Exception:
return None, f"HTTP {exc.code}"
except Exception as exc:
return None, str(exc)
# ---------------------------------------------------------------------------
# HTTP checks
# ---------------------------------------------------------------------------
def run_http_checks(base_url: str) -> None:
print(f"\nHTTP checks against {base_url}\n")
# /health
data, err = _get(f"{base_url}/health")
check("/health reachable", data is not None and data.get("status") == "healthy", err or "")
# /tasks — must have action_schema and 3 tasks
data, err = _get(f"{base_url}/tasks")
if check("/tasks reachable", data is not None, err or ""):
has_schema = isinstance(data.get("action_schema"), dict)
has_3_tasks = isinstance(data.get("tasks"), list) and len(data["tasks"]) == 3
has_score_range = "score_range" in data
check("/tasks has action_schema", has_schema, str(data))
check("/tasks has 3 tasks", has_3_tasks, str(data))
check("/tasks has score_range", has_score_range, str(data))
# /reset for each task
for task in ["easy", "medium", "hard"]:
data, err = _post(f"{base_url}/reset", {"task": task, "seed": 0})
check(f"/reset task={task}", data is not None and "observation" in data, err or "")
# /step — INSPECT, FLAG, SUBMIT cycle
_post(f"{base_url}/reset", {"task": "easy", "seed": 0})
obs_resp, err = _post(f"{base_url}/step", {"action_type": "inspect",
"account_id": "acc_0000"})
check("/step INSPECT", obs_resp is not None, err or "")
# Get a visible account ID from the observation to flag
acc_to_flag = None
if obs_resp:
vis_ids = obs_resp.get("observation", {}).get("visible_account_ids", [])
if vis_ids:
acc_to_flag = vis_ids[0]
if acc_to_flag:
flag_resp, err = _post(f"{base_url}/step", {"action_type": "flag",
"account_id": acc_to_flag})
check("/step FLAG", flag_resp is not None, err or "")
sub_resp, err = _post(f"{base_url}/step", {"action_type": "submit"})
check("/step SUBMIT", sub_resp is not None and sub_resp.get("done") is True, err or "")
# /grader — must return float in [0, 1]
data, err = _get(f"{base_url}/grader")
if check("/grader reachable", data is not None, err or ""):
score = data.get("score")
check("/grader returns [0,1] float",
isinstance(score, (int, float)) and 0.0 <= score <= 1.0,
f"score={score}")
# /baseline — must return 3 task scores in [0, 1]
data, err = _post(f"{base_url}/baseline")
if check("/baseline reachable", data is not None, err or ""):
scores = data.get("scores", {})
all_valid = (
set(scores.keys()) == {"easy", "medium", "hard"}
and all(isinstance(v, (int, float)) and 0.0 <= v <= 1.0
for v in scores.values())
)
check("/baseline returns 3 valid scores", all_valid,
f"got: {scores}")
# ---------------------------------------------------------------------------
# Local checks (no server needed)
# ---------------------------------------------------------------------------
def run_local_checks() -> None:
print("\nLocal checks\n")
# scoring.py importable and correct
try:
from scoring import ( # type: ignore[import]
compute_fake_risk, compute_hub_legitimacy, grader_score
)
gang_risk = compute_fake_risk(0.75, 0.65, 0.85, 0.10)
hub = compute_hub_legitimacy(2_000_000, 200, 2000, 0.05)
celeb_risk = compute_fake_risk(0.02, 0.02, 0.10, hub)
# Perfect score: 10 TP, 0 FP, 0 FN, 0 steps used → efficiency=1.0 → score=1.0
perfect = grader_score(10, 0, 0, 0, 30)
ok = (gang_risk >= 0.60 and celeb_risk < 0.20 and perfect == 1.0)
check("scoring.py math correct", ok,
f"gang_risk={gang_risk} celeb_risk={celeb_risk} perfect={perfect}")
except Exception as exc:
check("scoring.py importable", False, str(exc))
# models.py has AccountStatus + new fields
try:
from models import AccountStatus, AccountProfile, FakeGangObservation # type: ignore[import]
p = AccountProfile(
account_id="acc_0001", follower_count=100, following_count=50,
post_count=10, avg_post_hour=14.0, photo_reuse_score=0.8,
bio_template_score=0.7, account_age_days=60,
)
check("models.py AccountProfile has fake_risk_score",
hasattr(p, "fake_risk_score"), "")
check("models.py FakeGangObservation has suspect_ids",
hasattr(FakeGangObservation(), "suspect_ids"), "")
check("models.py AccountStatus enum exists",
AccountStatus.SUSPECT == "suspect", "")
except Exception as exc:
check("models.py new fields", False, str(exc))
# environment.py runs episode + status cascade
try:
from environment import FakeGangEnvironment # type: ignore[import]
from models import FakeGangAction, ActionType # type: ignore[import]
env = FakeGangEnvironment()
obs = env.reset(task="easy", seed=0)
ep_path = _ROOT / "episodes" / "easy_000.json"
if ep_path.exists():
gang_id = json.loads(ep_path.read_text())["gang_member_ids"][0]
obs = env.step(FakeGangAction(action_type=ActionType.INSPECT, account_id=gang_id))
obs = env.step(FakeGangAction(action_type=ActionType.FLAG, account_id=gang_id))
cascade_ok = len(obs.suspect_ids) > 0
check("environment.py SUSPECT cascade works", cascade_ok,
f"suspect_ids={obs.suspect_ids[:3]}")
p_flagged = next((p for p in obs.visible_accounts if p.account_id == gang_id), None)
check("environment.py fake_risk_score computed",
p_flagged is not None and p_flagged.fake_risk_score > 0, "")
else:
check("episode file exists for cascade test", False,
f"run python server/generator.py first")
except Exception as exc:
check("environment.py status cascade", False, str(exc))
# inference.py importable + runs one episode locally
try:
from inference import run_rule_based_episode # type: ignore[import]
from environment import FakeGangEnvironment # type: ignore[import]
env2 = FakeGangEnvironment()
score = run_rule_based_episode(env2, task="easy", seed=1)
check("inference.py runs locally",
isinstance(score, float) and 0.0 <= score <= 1.0,
f"score={score}")
except Exception as exc:
check("inference.py importable", False, str(exc))
# Episodes have new features
ep_path = _ROOT / "episodes" / "easy_000.json"
if ep_path.exists():
ep = json.loads(ep_path.read_text())
accounts = ep["network"]["accounts"]
first = accounts[0]["features"]
has_features = "comment_repeat_score" in first and "shared_ip_count" in first
check("episodes have new features (comment_repeat_score, shared_ip_count)",
has_features, f"keys: {list(first.keys())}")
has_celebs = "celeb_ids" in ep
check("episodes have celeb_ids field", has_celebs, "")
else:
check("episodes directory has files", False, "run python server/generator.py")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--url", default="http://localhost:8000")
parser.add_argument("--local", action="store_true",
help="Run local checks only (no server needed)")
args = parser.parse_args()
run_local_checks()
if not args.local:
run_http_checks(args.url)
total = len(_results)
passed = sum(1 for ok, _ in _results if ok)
failed = total - passed
print(f"\n{'='*50}")
print(f"Results: {passed}/{total} passed", end="")
if failed:
print(f" ({failed} FAILED)")
failed_names = [name for ok, name in _results if not ok]
for name in failed_names:
print(f" - {name}")
print()
sys.exit(1)
else:
print(" — all OK")
print()
sys.exit(0)