File size: 1,345 Bytes
d02bacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""Run oracle on all 3 briefs — validator gate."""
from ceo_brief_env.environment import CEOBriefEnvironment, oracle_action_for_observation

BRIEFS = ["easy_brief", "medium_brief", "hard_brief", "expert_brief"]
results = {}

for brief in BRIEFS:
    env = CEOBriefEnvironment()
    obs = env.reset(brief)
    steps = 0
    cumulative = 0.0
    while not obs.done and steps < 20:
        action = oracle_action_for_observation(obs)
        obs = env.step(action)
        steps += 1
        cumulative += obs.reward

    results[brief] = {
        "terminal": obs.terminal_grader_score,
        "cumulative": round(cumulative, 4),
        "steps": steps,
        "experts": obs.consulted_experts,
    }
    print(f"{brief:14s} | terminal={obs.terminal_grader_score:.4f} | "
          f"cum={cumulative:.4f} | steps={steps} | experts={obs.consulted_experts}")

print()
print("=" * 70)
all_in_band = True
for brief, r in results.items():
    score = r["terminal"]
    in_band = score is not None and 0.001 <= score <= 0.999
    tag = "PASS" if in_band else "FAIL"
    print(f"  [{tag}] {brief:14s} terminal={score}")
    if not in_band:
        all_in_band = False

print()
if all_in_band:
    print("[ALL PASS] Oracle lands in (0.001, 0.999) on every brief.")
else:
    print("[FAIL] At least one brief is out of band — fix before proceeding.")