ap-clerk-env / validate.py
Pathikreet's picture
Clean up codebase: remove server/, fix pyproject.toml, update validate.py
29e1fd3
Raw
History Blame Contribute Delete
10.7 kB
"""
Pre-Submission Validation Script β€” AP Clerk Environment
========================================================
Runs all checklist items locally WITHOUT needing a running server or HF token.
All checks are self-contained.
Usage:
python validate.py
Exit code 0 = all checks passed.
Exit code 1 = one or more checks failed.
"""
import sys
import json
import importlib
PASS = "[PASS]"
FAIL = "[FAIL]"
INFO = "[INFO]"
errors = []
def check(label: str, condition: bool, detail: str = ""):
if condition:
print(f" {PASS} {label}")
else:
print(f" {FAIL} {label}" + (f" β€” {detail}" if detail else ""))
errors.append(label)
# ── 1. Python version ─────────────────────────────────────────────────────────
print("\n[1] Python version")
major, minor = sys.version_info.major, sys.version_info.minor
check("Python >= 3.10", (major, minor) >= (3, 10),
f"found {major}.{minor}")
# ── 2. Required files present ─────────────────────────────────────────────────
print("\n[2] Required files present")
import os
required_files = [
"inference.py",
"openenv.yaml",
"Dockerfile",
"requirements.txt",
"app/__init__.py",
"app/main.py",
"app/models.py",
"app/tasks.py",
"app/environment.py",
]
for f in required_files:
check(f"File exists: {f}", os.path.isfile(f))
# ── 3. openenv.yaml valid ─────────────────────────────────────────────────────
print("\n[3] openenv.yaml compliance")
try:
import yaml
with open("openenv.yaml") as fh:
spec = yaml.safe_load(fh)
check("openenv.yaml is valid YAML", True)
check("has 'name' field", "name" in spec)
check("has 'version' field", "version" in spec)
check("has 'tasks' field", "tasks" in spec)
check("has 'endpoints' field", "endpoints" in spec)
check("has 'reward_range'", "reward_range" in spec)
task_ids = [t["id"] for t in spec.get("tasks", [])]
check("6+ tasks declared in openenv.yaml", len(task_ids) >= 6,
f"found {len(task_ids)}")
endpoints = spec.get("endpoints", {})
for ep in ["reset", "step", "state", "tasks", "health"]:
check(f"endpoint '{ep}' declared", ep in endpoints)
except ImportError:
print(f" {INFO} pyyaml not installed β€” skipping YAML parse (pip install pyyaml)")
except Exception as e:
check("openenv.yaml readable", False, str(e))
# ── 4. Pydantic models importable ─────────────────────────────────────────────
print("\n[4] Pydantic models")
try:
from app.models import (
APAction, APObservation, APReward,
DecisionType, ReasonCode,
Invoice, LineItem, PurchaseOrder, POLine, GoodsReceipt, GRNLine,
ResetRequest, StepRequest, ResetResponse, StepResponse,
StateResponse, TaskInfo,
)
check("All Pydantic models import cleanly", True)
check("DecisionType has 3+ values (incl. intermediate actions)",
len(DecisionType) >= 3)
check("ReasonCode has 6+ values (incl. new codes)",
len(ReasonCode) >= 6)
except Exception as e:
check("Pydantic models importable", False, str(e))
# ── 5. Tasks & graders ────────────────────────────────────────────────────────
print("\n[5] Tasks and graders")
try:
from app.tasks import TASKS, grade_action
from app.models import APAction, DecisionType, ReasonCode
check("6+ tasks registered", len(TASKS) >= 6, f"found {len(TASKS)}")
difficulty_counts: dict = {}
for spec in TASKS.values():
difficulty_counts[spec.difficulty] = difficulty_counts.get(spec.difficulty, 0) + 1
for diff in ["easy", "medium", "hard"]:
check(f"At least 2 tasks at difficulty='{diff}'",
difficulty_counts.get(diff, 0) >= 2,
f"found {difficulty_counts.get(diff, 0)}")
# Run each grader with a fixed seed and dummy action β€” verify score in [0,1]
dummy_action = APAction(
decision=DecisionType.REJECT,
approved_amount=0.0,
reason_code=ReasonCode.NO_PO_FOUND,
explanation="Validation dummy action for pre-submission check.",
)
for task_id, spec in TASKS.items():
obs = spec.generator(seed=0)
reward = grade_action(task_id, obs, dummy_action)
in_range = 0.0 <= reward.score <= 1.0
check(f"Grader '{task_id}' returns score in [0,1]", in_range,
f"got {reward.score}")
# Verify perfect action scores 1.0 on easy_perfect_match
perfect_action = APAction(
decision=DecisionType.APPROVE_FULL,
approved_amount=TASKS["easy_perfect_match"].generator(seed=1).invoice.invoice_total,
reason_code=ReasonCode.MATCH_CONFIRMED,
explanation="All three documents match exactly. Full invoice approved.",
)
obs1 = TASKS["easy_perfect_match"].generator(seed=1)
score = grade_action("easy_perfect_match", obs1, perfect_action).score
check("Perfect action on easy_perfect_match scores >= 0.99", score >= 0.99, f"got {score}")
except Exception as e:
check("Tasks and graders", False, str(e))
# ── 6. Environment class ──────────────────────────────────────────────────────
print("\n[6] APClerkEnvironment reset/step/state")
try:
from app.environment import APClerkEnvironment
from app.models import APAction, DecisionType, ReasonCode
env = APClerkEnvironment()
obs = env.reset("medium_quantity_shortfall", seed=42)
check("reset() returns APObservation", obs is not None)
check("reset() obs has invoice", hasattr(obs, "invoice"))
check("reset() step_count == 0", obs.step_count == 0)
action = APAction(
decision=DecisionType.APPROVE_PARTIAL,
approved_amount=obs.goods_receipts[0].lines[0].received_quantity
* obs.purchase_orders[0].lines[0].agreed_unit_price,
reason_code=ReasonCode.QUANTITY_MISMATCH,
explanation="Paying only for quantities confirmed received in the GRN.",
)
obs2, reward, done, info = env.step(action)
check("step() returns reward", reward is not None)
check("step() done=True", done is True)
check("step() score in [0,1]", 0.0 <= reward.score <= 1.0)
check("step() breakdown is dict", isinstance(reward.breakdown, dict))
state = env.state()
check("state() returns dict", isinstance(state, dict))
check("state() done=True", state["done"] is True)
# Confirm episode_score matches reward.score
check("state() episode_score matches reward",
abs(state["episode_score"] - reward.score) < 1e-9)
except Exception as e:
check("APClerkEnvironment", False, str(e))
# ── 7. Randomisation ──────────────────────────────────────────────────────────
print("\n[7] Randomisation β€” different seeds produce different episodes")
try:
from app.tasks import TASKS
gen = TASKS["easy_perfect_match"].generator
obs_a = gen(seed=42)
obs_b = gen(seed=99)
check("seed=42 vs seed=99 give different invoice totals",
obs_a.invoice.invoice_total != obs_b.invoice.invoice_total,
f"{obs_a.invoice.invoice_total} vs {obs_b.invoice.invoice_total}")
obs_c = gen(seed=42)
check("same seed is reproducible",
obs_a.invoice.invoice_total == obs_c.invoice.invoice_total)
except Exception as e:
check("Randomisation", False, str(e))
# ── 8. FastAPI app importable ─────────────────────────────────────────────────
print("\n[8] FastAPI app")
try:
from app.main import app as fastapi_app
check("FastAPI app imports cleanly", True)
routes = [r.path for r in fastapi_app.routes]
for path in ["/health", "/tasks", "/reset", "/step", "/state"]:
check(f"Route '{path}' registered", path in routes)
except Exception as e:
check("FastAPI app", False, str(e))
# ── 9. inference.py env-var guard ─────────────────────────────────────────────
print("\n[9] inference.py mandatory env-var checks")
import subprocess, os as _os
env_clean = {k: v for k, v in _os.environ.items()
if k not in ("HF_TOKEN", "API_KEY", "API_BASE_URL", "MODEL_NAME")}
result = subprocess.run(
[sys.executable, "inference.py"],
capture_output=True, text=True, env=env_clean
)
check("inference.py exits non-zero when env vars missing",
result.returncode != 0,
f"exit code was {result.returncode}")
check("inference.py prints ERROR for missing vars",
"ERROR" in result.stderr or "ERROR" in result.stdout)
# ── 10. Dockerfile sanity ─────────────────────────────────────────────────────
print("\n[10] Dockerfile")
with open("Dockerfile") as fh:
dockerfile = fh.read()
check("Dockerfile uses python:3.11", "python:3.11" in dockerfile)
check("Dockerfile EXPOSEs port 7860", "7860" in dockerfile)
check("Dockerfile copies app/", "COPY app/" in dockerfile)
check("Dockerfile copies inference.py", "COPY inference.py" in dockerfile)
check("Dockerfile copies openenv.yaml", "COPY openenv.yaml" in dockerfile)
check("Dockerfile runs uvicorn", "uvicorn" in dockerfile)
check("Dockerfile has non-root USER", "USER appuser" in dockerfile)
# ── Summary ───────────────────────────────────────────────────────────────────
print("\n" + "="*55)
if errors:
print(f" FAILED β€” {len(errors)} check(s) did not pass:")
for e in errors:
print(f" βœ— {e}")
print("="*55)
sys.exit(1)
else:
print(" ALL CHECKS PASSED β€” ready to submit!")
print("="*55)
sys.exit(0)