Spaces:
Sleeping
Sleeping
| """Pre-submission validator for the Fake Gang Detection OpenEnv environment. | |
| Checks all submission requirements and prints pass/fail for each. | |
| Exits 0 if all checks pass, 1 if any fail. | |
| Usage: | |
| python validate.py # server must be running on :8000 | |
| python validate.py --url http://host:8001 | |
| python validate.py --local # skip HTTP checks, test locally | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import importlib | |
| import json | |
| import sys | |
| import time | |
| import urllib.error | |
| import urllib.request | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Optional, Tuple | |
| _ROOT = Path(__file__).parent | |
| sys.path.insert(0, str(_ROOT)) | |
| sys.path.insert(0, str(_ROOT / "server")) | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| _PASS = "[PASS]" | |
| _FAIL = "[FAIL]" | |
| _results: List[Tuple[bool, str]] = [] | |
| def check(name: str, ok: bool, detail: str = "") -> bool: | |
| tag = _PASS if ok else _FAIL | |
| line = f" {tag} {name}" | |
| if detail and not ok: | |
| line += f"\n → {detail}" | |
| print(line) | |
| _results.append((ok, name)) | |
| return ok | |
| def _get(url: str) -> Tuple[Optional[dict], Optional[str]]: | |
| try: | |
| with urllib.request.urlopen(url, timeout=10) as r: | |
| return json.loads(r.read()), None | |
| except Exception as exc: | |
| return None, str(exc) | |
| def _post(url: str, body: Any = None) -> Tuple[Optional[dict], Optional[str]]: | |
| try: | |
| data = json.dumps(body or {}).encode() | |
| req = urllib.request.Request( | |
| url, data=data, headers={"Content-Type": "application/json"}, method="POST" | |
| ) | |
| with urllib.request.urlopen(req, timeout=30) as r: | |
| return json.loads(r.read()), None | |
| except urllib.error.HTTPError as exc: | |
| try: | |
| body_bytes = exc.read() | |
| return None, f"HTTP {exc.code}: {body_bytes.decode()}" | |
| except Exception: | |
| return None, f"HTTP {exc.code}" | |
| except Exception as exc: | |
| return None, str(exc) | |
| # --------------------------------------------------------------------------- | |
| # HTTP checks | |
| # --------------------------------------------------------------------------- | |
| def run_http_checks(base_url: str) -> None: | |
| print(f"\nHTTP checks against {base_url}\n") | |
| # /health | |
| data, err = _get(f"{base_url}/health") | |
| check("/health reachable", data is not None and data.get("status") == "healthy", err or "") | |
| # /tasks — must have action_schema and 3 tasks | |
| data, err = _get(f"{base_url}/tasks") | |
| if check("/tasks reachable", data is not None, err or ""): | |
| has_schema = isinstance(data.get("action_schema"), dict) | |
| has_3_tasks = isinstance(data.get("tasks"), list) and len(data["tasks"]) == 3 | |
| has_score_range = "score_range" in data | |
| check("/tasks has action_schema", has_schema, str(data)) | |
| check("/tasks has 3 tasks", has_3_tasks, str(data)) | |
| check("/tasks has score_range", has_score_range, str(data)) | |
| # /reset for each task | |
| for task in ["easy", "medium", "hard"]: | |
| data, err = _post(f"{base_url}/reset", {"task": task, "seed": 0}) | |
| check(f"/reset task={task}", data is not None and "observation" in data, err or "") | |
| # /step — INSPECT, FLAG, SUBMIT cycle | |
| _post(f"{base_url}/reset", {"task": "easy", "seed": 0}) | |
| obs_resp, err = _post(f"{base_url}/step", {"action_type": "inspect", | |
| "account_id": "acc_0000"}) | |
| check("/step INSPECT", obs_resp is not None, err or "") | |
| # Get a visible account ID from the observation to flag | |
| acc_to_flag = None | |
| if obs_resp: | |
| vis_ids = obs_resp.get("observation", {}).get("visible_account_ids", []) | |
| if vis_ids: | |
| acc_to_flag = vis_ids[0] | |
| if acc_to_flag: | |
| flag_resp, err = _post(f"{base_url}/step", {"action_type": "flag", | |
| "account_id": acc_to_flag}) | |
| check("/step FLAG", flag_resp is not None, err or "") | |
| sub_resp, err = _post(f"{base_url}/step", {"action_type": "submit"}) | |
| check("/step SUBMIT", sub_resp is not None and sub_resp.get("done") is True, err or "") | |
| # /grader — must return float in [0, 1] | |
| data, err = _get(f"{base_url}/grader") | |
| if check("/grader reachable", data is not None, err or ""): | |
| score = data.get("score") | |
| check("/grader returns [0,1] float", | |
| isinstance(score, (int, float)) and 0.0 <= score <= 1.0, | |
| f"score={score}") | |
| # /baseline — must return 3 task scores in [0, 1] | |
| data, err = _post(f"{base_url}/baseline") | |
| if check("/baseline reachable", data is not None, err or ""): | |
| scores = data.get("scores", {}) | |
| all_valid = ( | |
| set(scores.keys()) == {"easy", "medium", "hard"} | |
| and all(isinstance(v, (int, float)) and 0.0 <= v <= 1.0 | |
| for v in scores.values()) | |
| ) | |
| check("/baseline returns 3 valid scores", all_valid, | |
| f"got: {scores}") | |
| # --------------------------------------------------------------------------- | |
| # Local checks (no server needed) | |
| # --------------------------------------------------------------------------- | |
| def run_local_checks() -> None: | |
| print("\nLocal checks\n") | |
| # scoring.py importable and correct | |
| try: | |
| from scoring import ( # type: ignore[import] | |
| compute_fake_risk, compute_hub_legitimacy, grader_score | |
| ) | |
| gang_risk = compute_fake_risk(0.75, 0.65, 0.85, 0.10) | |
| hub = compute_hub_legitimacy(2_000_000, 200, 2000, 0.05) | |
| celeb_risk = compute_fake_risk(0.02, 0.02, 0.10, hub) | |
| # Perfect score: 10 TP, 0 FP, 0 FN, 0 steps used → efficiency=1.0 → score=1.0 | |
| perfect = grader_score(10, 0, 0, 0, 30) | |
| ok = (gang_risk >= 0.60 and celeb_risk < 0.20 and perfect == 1.0) | |
| check("scoring.py math correct", ok, | |
| f"gang_risk={gang_risk} celeb_risk={celeb_risk} perfect={perfect}") | |
| except Exception as exc: | |
| check("scoring.py importable", False, str(exc)) | |
| # models.py has AccountStatus + new fields | |
| try: | |
| from models import AccountStatus, AccountProfile, FakeGangObservation # type: ignore[import] | |
| p = AccountProfile( | |
| account_id="acc_0001", follower_count=100, following_count=50, | |
| post_count=10, avg_post_hour=14.0, photo_reuse_score=0.8, | |
| bio_template_score=0.7, account_age_days=60, | |
| ) | |
| check("models.py AccountProfile has fake_risk_score", | |
| hasattr(p, "fake_risk_score"), "") | |
| check("models.py FakeGangObservation has suspect_ids", | |
| hasattr(FakeGangObservation(), "suspect_ids"), "") | |
| check("models.py AccountStatus enum exists", | |
| AccountStatus.SUSPECT == "suspect", "") | |
| except Exception as exc: | |
| check("models.py new fields", False, str(exc)) | |
| # environment.py runs episode + status cascade | |
| try: | |
| from environment import FakeGangEnvironment # type: ignore[import] | |
| from models import FakeGangAction, ActionType # type: ignore[import] | |
| env = FakeGangEnvironment() | |
| obs = env.reset(task="easy", seed=0) | |
| ep_path = _ROOT / "episodes" / "easy_000.json" | |
| if ep_path.exists(): | |
| gang_id = json.loads(ep_path.read_text())["gang_member_ids"][0] | |
| obs = env.step(FakeGangAction(action_type=ActionType.INSPECT, account_id=gang_id)) | |
| obs = env.step(FakeGangAction(action_type=ActionType.FLAG, account_id=gang_id)) | |
| cascade_ok = len(obs.suspect_ids) > 0 | |
| check("environment.py SUSPECT cascade works", cascade_ok, | |
| f"suspect_ids={obs.suspect_ids[:3]}") | |
| p_flagged = next((p for p in obs.visible_accounts if p.account_id == gang_id), None) | |
| check("environment.py fake_risk_score computed", | |
| p_flagged is not None and p_flagged.fake_risk_score > 0, "") | |
| else: | |
| check("episode file exists for cascade test", False, | |
| f"run python server/generator.py first") | |
| except Exception as exc: | |
| check("environment.py status cascade", False, str(exc)) | |
| # inference.py importable + runs one episode locally | |
| try: | |
| from inference import run_rule_based_episode # type: ignore[import] | |
| from environment import FakeGangEnvironment # type: ignore[import] | |
| env2 = FakeGangEnvironment() | |
| score = run_rule_based_episode(env2, task="easy", seed=1) | |
| check("inference.py runs locally", | |
| isinstance(score, float) and 0.0 <= score <= 1.0, | |
| f"score={score}") | |
| except Exception as exc: | |
| check("inference.py importable", False, str(exc)) | |
| # Episodes have new features | |
| ep_path = _ROOT / "episodes" / "easy_000.json" | |
| if ep_path.exists(): | |
| ep = json.loads(ep_path.read_text()) | |
| accounts = ep["network"]["accounts"] | |
| first = accounts[0]["features"] | |
| has_features = "comment_repeat_score" in first and "shared_ip_count" in first | |
| check("episodes have new features (comment_repeat_score, shared_ip_count)", | |
| has_features, f"keys: {list(first.keys())}") | |
| has_celebs = "celeb_ids" in ep | |
| check("episodes have celeb_ids field", has_celebs, "") | |
| else: | |
| check("episodes directory has files", False, "run python server/generator.py") | |
| # --------------------------------------------------------------------------- | |
| # Main | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--url", default="http://localhost:8000") | |
| parser.add_argument("--local", action="store_true", | |
| help="Run local checks only (no server needed)") | |
| args = parser.parse_args() | |
| run_local_checks() | |
| if not args.local: | |
| run_http_checks(args.url) | |
| total = len(_results) | |
| passed = sum(1 for ok, _ in _results if ok) | |
| failed = total - passed | |
| print(f"\n{'='*50}") | |
| print(f"Results: {passed}/{total} passed", end="") | |
| if failed: | |
| print(f" ({failed} FAILED)") | |
| failed_names = [name for ok, name in _results if not ok] | |
| for name in failed_names: | |
| print(f" - {name}") | |
| print() | |
| sys.exit(1) | |
| else: | |
| print(" — all OK") | |
| print() | |
| sys.exit(0) | |