Hemanth Kunta commited on
Commit
b2ea92f
·
1 Parent(s): 3e987ed

clamp terminal task scores and avoid 1.000 display rounding

Browse files
Files changed (2) hide show
  1. env/app.py +3 -3
  2. high_grade_agent.py +3 -3
env/app.py CHANGED
@@ -75,7 +75,7 @@ def step(payload: dict):
75
 
76
  if state.step > MAX_STEPS:
77
  state.done = True
78
- total = round(min(1.25, state.audit_score + state.fix_bonus), 4)
79
  rb = RewardBreakdown(
80
  base_audit_score=state.audit_score,
81
  confidence_brier_adjustment=0.0,
@@ -145,7 +145,7 @@ def step(payload: dict):
145
 
146
  if state.fix_steps_remaining <= 0:
147
  state.done = True
148
- total = round(min(1.25, state.audit_score + state.fix_bonus), 4)
149
  rb = RewardBreakdown(
150
  base_audit_score=state.audit_score,
151
  confidence_brier_adjustment=0.0,
@@ -163,7 +163,7 @@ def step(payload: dict):
163
  if done:
164
  state.done = True
165
 
166
- total = round(min(1.25, state.audit_score + state.fix_bonus), 4)
167
  rb = RewardBreakdown(
168
  base_audit_score=state.audit_score,
169
  confidence_brier_adjustment=0.0,
 
75
 
76
  if state.step > MAX_STEPS:
77
  state.done = True
78
+ total = BaseTask.strict_score(round(min(1.25, state.audit_score + state.fix_bonus), 4))
79
  rb = RewardBreakdown(
80
  base_audit_score=state.audit_score,
81
  confidence_brier_adjustment=0.0,
 
145
 
146
  if state.fix_steps_remaining <= 0:
147
  state.done = True
148
+ total = BaseTask.strict_score(round(min(1.25, state.audit_score + state.fix_bonus), 4))
149
  rb = RewardBreakdown(
150
  base_audit_score=state.audit_score,
151
  confidence_brier_adjustment=0.0,
 
163
  if done:
164
  state.done = True
165
 
166
+ total = BaseTask.strict_score(round(min(1.25, state.audit_score + state.fix_bonus), 4))
167
  rb = RewardBreakdown(
168
  base_audit_score=state.audit_score,
169
  confidence_brier_adjustment=0.0,
high_grade_agent.py CHANGED
@@ -452,7 +452,7 @@ def run_task(task_id: int, q_table: dict[str, list[float]], memory: MemoryStore)
452
  evidence={"task_id": task_id, "score": score},
453
  )
454
  )
455
- print(f" Done. Score: {score:.3f} | Breakdown: {reward.get('breakdown', {})}")
456
  return score
457
 
458
 
@@ -465,9 +465,9 @@ def main() -> None:
465
  memory.save()
466
  print("\n=== HIGH-GRADE AGENT RESULTS ===")
467
  for k, v in scores.items():
468
- print(f" {k}: {v:.3f}")
469
  mean_score = BaseTask.strict_score(sum(scores.values()) / len(scores))
470
- print(f" mean: {mean_score:.3f}")
471
 
472
 
473
  if __name__ == "__main__":
 
452
  evidence={"task_id": task_id, "score": score},
453
  )
454
  )
455
+ print(f" Done. Score: {score:.6f} | Breakdown: {reward.get('breakdown', {})}")
456
  return score
457
 
458
 
 
465
  memory.save()
466
  print("\n=== HIGH-GRADE AGENT RESULTS ===")
467
  for k, v in scores.items():
468
+ print(f" {k}: {v:.6f}")
469
  mean_score = BaseTask.strict_score(sum(scores.values()) / len(scores))
470
+ print(f" mean: {mean_score:.6f}")
471
 
472
 
473
  if __name__ == "__main__":