| [ |
| { |
| "episode_id": "27c65a00-9315-4d7d-b457-f8d64b0da466", |
| "task_id": "bug_detection", |
| "seed": 0, |
| "final_score": 0.0, |
| "steps_taken": 1, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 0, |
| "terminated_reason": "terminal_action", |
| "duration_seconds": 0.02 |
| }, |
| { |
| "episode_id": "8bdcee56-8a56-4ec8-88fc-a70e78ab48f2", |
| "task_id": "bug_detection", |
| "seed": 1, |
| "final_score": 0.0, |
| "steps_taken": 1, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 0, |
| "terminated_reason": "terminal_action", |
| "duration_seconds": 0.02 |
| }, |
| { |
| "episode_id": "d3dcda88-ce7e-4965-9409-4e97c98cf444", |
| "task_id": "bug_detection", |
| "seed": 2, |
| "final_score": 0.9167, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.04 |
| }, |
| { |
| "episode_id": "822dfb7d-d67b-4dc9-9cfe-866665a9e5b9", |
| "task_id": "bug_detection", |
| "seed": 3, |
| "final_score": 0.9167, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "c35c2096-b7b8-4a54-b0c7-d8fa1e1538bf", |
| "task_id": "bug_detection", |
| "seed": 4, |
| "final_score": 0.8267, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "9b4e5191-57da-4b10-b31e-5b8236b9b1f4", |
| "task_id": "bug_detection", |
| "seed": 5, |
| "final_score": 0.0, |
| "steps_taken": 1, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 0, |
| "terminated_reason": "terminal_action", |
| "duration_seconds": 0.01 |
| }, |
| { |
| "episode_id": "c1e64a16-512a-4716-b0f9-5d9fe14a142d", |
| "task_id": "bug_detection", |
| "seed": 6, |
| "final_score": 0.0, |
| "steps_taken": 1, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 0, |
| "terminated_reason": "terminal_action", |
| "duration_seconds": 0.02 |
| }, |
| { |
| "episode_id": "cc8bd066-506e-46de-ba0f-0a446f045945", |
| "task_id": "bug_detection", |
| "seed": 7, |
| "final_score": 0.0, |
| "steps_taken": 1, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 0, |
| "terminated_reason": "terminal_action", |
| "duration_seconds": 0.02 |
| }, |
| { |
| "episode_id": "20dd7158-a49f-4b6c-adcd-6b76b4274e94", |
| "task_id": "bug_detection", |
| "seed": 8, |
| "final_score": 0.9167, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "14b9d1a5-03b7-45b2-8b23-b6c387bced5b", |
| "task_id": "bug_detection", |
| "seed": 9, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "2b1c4b7f-cf05-4312-89d2-49883b9ed6c1", |
| "task_id": "security_audit", |
| "seed": 0, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "47145114-7a99-4c81-baa9-1f9c4221f48a", |
| "task_id": "security_audit", |
| "seed": 1, |
| "final_score": 1.0, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "5d9d3f74-a127-46b2-a331-45f30f0d2f6f", |
| "task_id": "security_audit", |
| "seed": 2, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "f383afc4-df2d-4a60-abd1-a583cb2de538", |
| "task_id": "security_audit", |
| "seed": 3, |
| "final_score": 0.85, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "df23e18b-c21a-4390-9afa-f77eb1b8050b", |
| "task_id": "security_audit", |
| "seed": 4, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "bcec83af-4aaf-4997-922a-1556ca63fcf3", |
| "task_id": "security_audit", |
| "seed": 5, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "65afedd0-9ad1-45f7-9615-dafeaa390338", |
| "task_id": "security_audit", |
| "seed": 6, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "0e8b65e5-d59c-41fb-8038-1f3b1d4b657b", |
| "task_id": "security_audit", |
| "seed": 7, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "ae477e21-2718-477e-8a11-e46bda49bea7", |
| "task_id": "security_audit", |
| "seed": 8, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.04 |
| }, |
| { |
| "episode_id": "0cd3d18c-cdf6-4752-9dc8-cade848cee0e", |
| "task_id": "security_audit", |
| "seed": 9, |
| "final_score": 0.0, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "d9f79316-57ea-42ad-b191-797ea895951b", |
| "task_id": "architectural_review", |
| "seed": 0, |
| "final_score": 0.0, |
| "steps_taken": 1, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 0, |
| "terminated_reason": "terminal_action", |
| "duration_seconds": 0.01 |
| }, |
| { |
| "episode_id": "65112da3-d319-4eb3-9774-1c43313fe1ec", |
| "task_id": "architectural_review", |
| "seed": 1, |
| "final_score": 0.059, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "c1d6bd63-1abf-403b-97be-cf1962959910", |
| "task_id": "architectural_review", |
| "seed": 2, |
| "final_score": 0.661, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "fab56551-f2ae-42be-88eb-854ef236b29a", |
| "task_id": "architectural_review", |
| "seed": 3, |
| "final_score": 0.658, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "8f4b8fa9-f281-4bb2-ae71-7ad6c7ca7fc9", |
| "task_id": "architectural_review", |
| "seed": 4, |
| "final_score": 0.058, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "7eaa445d-774d-44b1-80d9-06e978b022df", |
| "task_id": "architectural_review", |
| "seed": 5, |
| "final_score": 0.657, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "103d6e2b-e40d-49ff-8f36-6910d463b48b", |
| "task_id": "architectural_review", |
| "seed": 6, |
| "final_score": 0.059, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "5206e9d8-0ee0-482c-ad3d-34adf2ce57a7", |
| "task_id": "architectural_review", |
| "seed": 7, |
| "final_score": 0.664, |
| "steps_taken": 6, |
| "issues_found": 1, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.03 |
| }, |
| { |
| "episode_id": "c147abd7-f887-44a2-b166-5382e292d793", |
| "task_id": "architectural_review", |
| "seed": 8, |
| "final_score": 0.039, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.08 |
| }, |
| { |
| "episode_id": "6fe54153-a878-47d7-a521-5cd70f55f6b8", |
| "task_id": "architectural_review", |
| "seed": 9, |
| "final_score": 0.075, |
| "steps_taken": 5, |
| "issues_found": 0, |
| "issues_total": 1, |
| "noise_penalties": 5, |
| "terminated_reason": "noise_exhausted", |
| "duration_seconds": 0.09 |
| } |
| ] |