Spaces:
Sleeping
Sleeping
Hemanth Kunta commited on
Commit ·
b2ea92f
1
Parent(s): 3e987ed
clamp terminal task scores and avoid 1.000 display rounding
Browse files- env/app.py +3 -3
- high_grade_agent.py +3 -3
env/app.py
CHANGED
|
@@ -75,7 +75,7 @@ def step(payload: dict):
|
|
| 75 |
|
| 76 |
if state.step > MAX_STEPS:
|
| 77 |
state.done = True
|
| 78 |
-
total = round(min(1.25, state.audit_score + state.fix_bonus), 4)
|
| 79 |
rb = RewardBreakdown(
|
| 80 |
base_audit_score=state.audit_score,
|
| 81 |
confidence_brier_adjustment=0.0,
|
|
@@ -145,7 +145,7 @@ def step(payload: dict):
|
|
| 145 |
|
| 146 |
if state.fix_steps_remaining <= 0:
|
| 147 |
state.done = True
|
| 148 |
-
total = round(min(1.25, state.audit_score + state.fix_bonus), 4)
|
| 149 |
rb = RewardBreakdown(
|
| 150 |
base_audit_score=state.audit_score,
|
| 151 |
confidence_brier_adjustment=0.0,
|
|
@@ -163,7 +163,7 @@ def step(payload: dict):
|
|
| 163 |
if done:
|
| 164 |
state.done = True
|
| 165 |
|
| 166 |
-
total = round(min(1.25, state.audit_score + state.fix_bonus), 4)
|
| 167 |
rb = RewardBreakdown(
|
| 168 |
base_audit_score=state.audit_score,
|
| 169 |
confidence_brier_adjustment=0.0,
|
|
|
|
| 75 |
|
| 76 |
if state.step > MAX_STEPS:
|
| 77 |
state.done = True
|
| 78 |
+
total = BaseTask.strict_score(round(min(1.25, state.audit_score + state.fix_bonus), 4))
|
| 79 |
rb = RewardBreakdown(
|
| 80 |
base_audit_score=state.audit_score,
|
| 81 |
confidence_brier_adjustment=0.0,
|
|
|
|
| 145 |
|
| 146 |
if state.fix_steps_remaining <= 0:
|
| 147 |
state.done = True
|
| 148 |
+
total = BaseTask.strict_score(round(min(1.25, state.audit_score + state.fix_bonus), 4))
|
| 149 |
rb = RewardBreakdown(
|
| 150 |
base_audit_score=state.audit_score,
|
| 151 |
confidence_brier_adjustment=0.0,
|
|
|
|
| 163 |
if done:
|
| 164 |
state.done = True
|
| 165 |
|
| 166 |
+
total = BaseTask.strict_score(round(min(1.25, state.audit_score + state.fix_bonus), 4))
|
| 167 |
rb = RewardBreakdown(
|
| 168 |
base_audit_score=state.audit_score,
|
| 169 |
confidence_brier_adjustment=0.0,
|
high_grade_agent.py
CHANGED
|
@@ -452,7 +452,7 @@ def run_task(task_id: int, q_table: dict[str, list[float]], memory: MemoryStore)
|
|
| 452 |
evidence={"task_id": task_id, "score": score},
|
| 453 |
)
|
| 454 |
)
|
| 455 |
-
print(f" Done. Score: {score:.
|
| 456 |
return score
|
| 457 |
|
| 458 |
|
|
@@ -465,9 +465,9 @@ def main() -> None:
|
|
| 465 |
memory.save()
|
| 466 |
print("\n=== HIGH-GRADE AGENT RESULTS ===")
|
| 467 |
for k, v in scores.items():
|
| 468 |
-
print(f" {k}: {v:.
|
| 469 |
mean_score = BaseTask.strict_score(sum(scores.values()) / len(scores))
|
| 470 |
-
print(f" mean: {mean_score:.
|
| 471 |
|
| 472 |
|
| 473 |
if __name__ == "__main__":
|
|
|
|
| 452 |
evidence={"task_id": task_id, "score": score},
|
| 453 |
)
|
| 454 |
)
|
| 455 |
+
print(f" Done. Score: {score:.6f} | Breakdown: {reward.get('breakdown', {})}")
|
| 456 |
return score
|
| 457 |
|
| 458 |
|
|
|
|
| 465 |
memory.save()
|
| 466 |
print("\n=== HIGH-GRADE AGENT RESULTS ===")
|
| 467 |
for k, v in scores.items():
|
| 468 |
+
print(f" {k}: {v:.6f}")
|
| 469 |
mean_score = BaseTask.strict_score(sum(scores.values()) / len(scores))
|
| 470 |
+
print(f" mean: {mean_score:.6f}")
|
| 471 |
|
| 472 |
|
| 473 |
if __name__ == "__main__":
|