data-centric-env / audit.py
Aswini-Kumar's picture
refactor: extract agent_utils.py (shared prompt/commands/server utils), simplify reward to env+format, add audit.py
51a79ee
Raw
History Blame Contribute Delete
3.62 kB
"""Full connectivity audit β€” run after any code changes."""
import ast, pathlib, sys
PASS = []
FAIL = []
def ok(msg): PASS.append(msg); print(f" PASS {msg}")
def fail(msg, err): FAIL.append(msg); print(f" FAIL {msg}: {err}")
# 1. agent_utils
try:
from agent_utils import VALID_COMMANDS, SYSTEM_PROMPT, build_user_prompt, start_server, stop_server
ok(f"agent_utils β€” {len(VALID_COMMANDS)} commands, {len(SYSTEM_PROMPT)} char prompt")
except Exception as e:
fail("agent_utils import", e)
# 2. Server core
try:
from server.data_centric_environment import DataCentricEnvironment
from server.grader import compute_total_reward
from server.anti_exploit import validate_calls_remaining, AntiExploitState
from server.model_evaluator import ModelEvaluator
from server.specialist_agents import CleanerAgent, AnalystAgent, BalancerAgent, AugmenterAgent
ok("server.* all imports")
except Exception as e:
fail("server imports", e)
# 3. Client + models
try:
from client import DataCentricEnv
from models import DataCentricAction, DataCentricObservation
ok("client + models")
except Exception as e:
fail("client/models", e)
# 4. Script parse check (no heavy deps loaded)
for script in ["train_data_centric.py", "eval_data_centric.py", "sft_generator.py",
"inference.py", "plot_rewards.py", "hf_job_train.py", "submit_job.py"]:
try:
ast.parse(pathlib.Path(script).read_text(encoding="utf-8"))
ok(f"{script} syntax OK")
except Exception as e:
fail(f"{script} syntax", e)
# 5. Live environment cycle
try:
from models import DataCentricAction
env = DataCentricEnvironment()
obs = env.reset(task="task_0_tutorial", seed=42)
assert obs.validate_calls_remaining == 3, f"expected 3 got {obs.validate_calls_remaining}"
assert obs.baseline_accuracy > 0
ok(f"env.reset() β€” baseline={obs.baseline_accuracy:.4f}, vcr={obs.validate_calls_remaining}")
except Exception as e:
fail("env.reset()", e)
try:
obs = env.step(DataCentricAction(message="inspect_dataset"))
ok(f"inspect_dataset β€” reward={obs.reward:.4f}")
except Exception as e:
fail("step inspect_dataset", e)
try:
obs = env.step(DataCentricAction(message="query_analyst"))
ok(f"query_analyst β€” reward={obs.reward:.4f}")
except Exception as e:
fail("step query_analyst", e)
try:
obs = env.step(DataCentricAction(message="query_cleaner"))
ok(f"query_cleaner β€” reward={obs.reward:.4f}")
except Exception as e:
fail("step query_cleaner", e)
try:
obs = env.step(DataCentricAction(message="apply 1"))
ok(f"apply 1 β€” accuracy={obs.current_accuracy:.4f}")
except Exception as e:
fail("step apply 1", e)
try:
obs = env.step(DataCentricAction(message="validate"))
ok(f"validate β€” accuracy={obs.current_accuracy:.4f}, vcr={obs.validate_calls_remaining}")
except Exception as e:
fail("step validate", e)
try:
obs = env.step(DataCentricAction(message="submit"))
ok(f"submit β€” final reward={obs.reward:.4f}, done={obs.done}")
except Exception as e:
fail("step submit", e)
# 6. agent_utils.build_user_prompt
try:
obs2 = env.reset(task="task_1_easy", seed=0)
prompt = build_user_prompt(obs2.__dict__)
assert "Budget remaining" in prompt
ok("build_user_prompt output valid")
except Exception as e:
fail("build_user_prompt", e)
# Summary
print()
print("=" * 60)
print(f"PASSED: {len(PASS)} FAILED: {len(FAIL)}")
if FAIL:
print("FAILURES:")
for f in FAIL:
print(f" - {f}")
sys.exit(1)
else:
print("ALL CHECKS PASSED")