Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| TASK_ID = "anomaly_detection_medium" | |
| MAX_STEPS = 10 | |
| DIFFICULTY = "medium" | |
| def get_task_config() -> dict: | |
| return { | |
| "id": TASK_ID, | |
| "difficulty": DIFFICULTY, | |
| "max_steps": MAX_STEPS, | |
| "grader": "graders.grader_classification:MediumClassificationGrader", | |
| "description": "MEDIUM — Faster ingestion, tighter metrics." | |
| } | |
| def setup_env(env) -> None: | |
| try: | |
| from server.fin_auditor_environment import hft_auditor | |
| if hft_auditor is not None: | |
| env.difficulty = hft_auditor.Difficulty.MEDIUM | |
| env._MAX_EPISODE_STEPS = MAX_STEPS | |
| except Exception as e: | |
| print(f"[task_medium] Could not set difficulty: {e}") | |
| def run_episode(env, agent_fn) -> dict: | |
| """Run a single MEDIUM anomaly-detection episode and return the graded result. | |
| Called by the OpenEnv evaluator. agent_fn receives an observation and | |
| returns a list of binary decisions (0 = valid, 1 = anomaly). | |
| """ | |
| setup_env(env) | |
| total_reward = 0.0 | |
| steps_done = 0 | |
| try: | |
| obs = env.reset() | |
| for _ in range(MAX_STEPS): | |
| decisions = agent_fn(obs) | |
| from models import AuditorAction | |
| action = AuditorAction(decisions=decisions) | |
| obs = env.step(action) | |
| total_reward += float(obs.reward) if obs.reward is not None else 0.0 | |
| steps_done += 1 | |
| if obs.done: | |
| break | |
| except Exception as exc: | |
| print(f"[task_medium] run_episode error at step {steps_done}: {exc}") | |
| # Always grade — even partial data yields a valid score via perfect_signal fallback | |
| from graders.grader_classification import MediumClassificationGrader | |
| grader = MediumClassificationGrader() | |
| final_score = grader.grade(env) | |
| return { | |
| "task": TASK_ID, | |
| "difficulty": DIFFICULTY, | |
| "steps": steps_done, | |
| "total_reward": round(total_reward, 4), | |
| "score": round(final_score, 4), | |
| "grader_breakdown": grader.last_breakdown, | |
| } |