| { |
| "task": "all", |
| "tasks": [ |
| "task1", |
| "task2", |
| "task3" |
| ], |
| "episodes_per_policy": 30, |
| "adaptive": false, |
| "difficulty_controller": { |
| "adaptive": true, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "difficulty_controller_by_task_policy": { |
| "task1": { |
| "random": {}, |
| "heuristic": {}, |
| "oracle_lite": {} |
| }, |
| "task2": { |
| "random": {}, |
| "heuristic": {}, |
| "oracle_lite": {} |
| }, |
| "task3": { |
| "random": {}, |
| "heuristic": {}, |
| "oracle_lite": {} |
| } |
| }, |
| "summary": { |
| "random": { |
| "episodes": 90, |
| "avg_score": 0.6904, |
| "avg_completion_rate": 0.8131, |
| "avg_detection_rate": 0.7935, |
| "avg_trust_calibration": 0.4453, |
| "avg_steps": 26.2111 |
| }, |
| "heuristic": { |
| "episodes": 90, |
| "avg_score": 0.7817, |
| "avg_completion_rate": 0.8918, |
| "avg_detection_rate": 0.9178, |
| "avg_trust_calibration": 0.4373, |
| "avg_steps": 24.4 |
| }, |
| "oracle_lite": { |
| "episodes": 90, |
| "avg_score": 0.8405, |
| "avg_completion_rate": 0.8687, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.5892, |
| "avg_steps": 29.4444 |
| } |
| }, |
| "by_task": { |
| "task1": { |
| "random": { |
| "episodes": 30, |
| "avg_score": 0.7635, |
| "avg_completion_rate": 0.76, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.0, |
| "avg_steps": 15.1333 |
| }, |
| "heuristic": { |
| "episodes": 30, |
| "avg_score": 0.8504, |
| "avg_completion_rate": 0.84, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.0, |
| "avg_steps": 13.8333 |
| }, |
| "oracle_lite": { |
| "episodes": 30, |
| "avg_score": 0.9011, |
| "avg_completion_rate": 0.7167, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.0, |
| "avg_steps": 16.0 |
| } |
| }, |
| "task2": { |
| "random": { |
| "episodes": 30, |
| "avg_score": 0.6472, |
| "avg_completion_rate": 0.8644, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.5829, |
| "avg_steps": 26.7667 |
| }, |
| "heuristic": { |
| "episodes": 30, |
| "avg_score": 0.7497, |
| "avg_completion_rate": 0.9288, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.5737, |
| "avg_steps": 23.2333 |
| }, |
| "oracle_lite": { |
| "episodes": 30, |
| "avg_score": 0.7638, |
| "avg_completion_rate": 0.9045, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.8377, |
| "avg_steps": 30.0 |
| } |
| }, |
| "task3": { |
| "random": { |
| "episodes": 30, |
| "avg_score": 0.6606, |
| "avg_completion_rate": 0.815, |
| "avg_detection_rate": 0.3806, |
| "avg_trust_calibration": 0.7531, |
| "avg_steps": 36.7333 |
| }, |
| "heuristic": { |
| "episodes": 30, |
| "avg_score": 0.7449, |
| "avg_completion_rate": 0.9067, |
| "avg_detection_rate": 0.7534, |
| "avg_trust_calibration": 0.7383, |
| "avg_steps": 36.1333 |
| }, |
| "oracle_lite": { |
| "episodes": 30, |
| "avg_score": 0.8567, |
| "avg_completion_rate": 0.985, |
| "avg_detection_rate": 1.0, |
| "avg_trust_calibration": 0.9299, |
| "avg_steps": 42.3333 |
| } |
| } |
| }, |
| "episodes": [ |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 0, |
| "steps": 15, |
| "score": 0.6569, |
| "total_reward": 7.8825, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.914, |
| 0.962, |
| 0.962, |
| 0.02, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3645, |
| 0.02, |
| 0.02, |
| 0.867 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 1, |
| "steps": 15, |
| "score": 0.7996, |
| "total_reward": 7.196, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.982, |
| 0.914, |
| 0.914, |
| 0.962, |
| 0.867, |
| 0.962, |
| 0.3165 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 2, |
| "steps": 15, |
| "score": 0.8129, |
| "total_reward": 8.1294, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.369, |
| 0.3024, |
| 0.962, |
| 0.962, |
| 0.914, |
| 0.962, |
| 0.867, |
| 0.867, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 3, |
| "steps": 14, |
| "score": 0.8084, |
| "total_reward": 10.5095, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.982, |
| 0.962, |
| 0.3455, |
| 0.867, |
| 0.962, |
| 0.946, |
| 0.962, |
| 0.3455, |
| 0.3645, |
| 0.867, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 4, |
| "steps": 15, |
| "score": 0.7814, |
| "total_reward": 8.5956, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.982, |
| 0.962, |
| 0.3645, |
| 0.914, |
| 0.962, |
| 0.3455, |
| 0.3136, |
| 0.962, |
| 0.914 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 5, |
| "steps": 15, |
| "score": 0.725, |
| "total_reward": 8.7, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.914, |
| 0.962, |
| 0.3165, |
| 0.3455, |
| 0.3455, |
| 0.867, |
| 0.946, |
| 0.962, |
| 0.3455, |
| 0.867 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 6, |
| "steps": 15, |
| "score": 0.8118, |
| "total_reward": 8.1182, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.914, |
| 0.914, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.934, |
| 0.867, |
| 0.3206 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 7, |
| "steps": 15, |
| "score": 0.9334, |
| "total_reward": 9.334, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.867, |
| 0.914, |
| 0.914, |
| 0.962, |
| 0.867, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 8, |
| "steps": 15, |
| "score": 0.8425, |
| "total_reward": 9.2675, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.369, |
| 0.962, |
| 0.914, |
| 0.962, |
| 0.962, |
| 0.3645, |
| 0.962, |
| 0.982, |
| 0.914 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 9, |
| "steps": 15, |
| "score": 0.7751, |
| "total_reward": 9.3011, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.962, |
| 0.962, |
| 0.867, |
| 0.3616, |
| 0.914, |
| 0.3645, |
| 0.02, |
| 0.982, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 10, |
| "steps": 15, |
| "score": 0.7653, |
| "total_reward": 8.418, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.982, |
| 0.914, |
| 0.982, |
| 0.962, |
| 0.02, |
| 0.3455, |
| 0.3645, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 11, |
| "steps": 15, |
| "score": 0.8199, |
| "total_reward": 9.8394, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.867, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.914, |
| 0.982, |
| 0.962, |
| 0.982, |
| 0.02, |
| 0.914, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 12, |
| "steps": 15, |
| "score": 0.6163, |
| "total_reward": 7.3956, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.914, |
| 0.3455, |
| 0.962, |
| 0.898, |
| 0.962, |
| 0.914, |
| 0.02, |
| 0.3616, |
| 0.3455, |
| 0.3455 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 13, |
| "steps": 15, |
| "score": 0.7283, |
| "total_reward": 6.555, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.914, |
| 0.867, |
| 0.962, |
| 0.934, |
| 0.962, |
| 0.914, |
| 0.02 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 14, |
| "steps": 17, |
| "score": 0.8867, |
| "total_reward": 10.6405, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.867, |
| 0.914, |
| 0.914, |
| 0.3455, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.914 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 15, |
| "steps": 16, |
| "score": 0.6915, |
| "total_reward": 9.6809, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.867, |
| 0.3645, |
| 0.3645, |
| 0.982, |
| 0.3645, |
| 0.867, |
| 0.982, |
| 0.3455, |
| 0.3455, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 16, |
| "steps": 15, |
| "score": 0.7164, |
| "total_reward": 9.313, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.2975, |
| 0.02, |
| 0.982, |
| 0.02, |
| 0.3455, |
| 0.914, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 17, |
| "steps": 15, |
| "score": 0.6495, |
| "total_reward": 8.4439, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.02, |
| 0.3455, |
| 0.3136, |
| 0.867, |
| 0.982, |
| 0.962, |
| 0.3206, |
| 0.962, |
| 0.962, |
| 0.982, |
| 0.3826 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 18, |
| "steps": 15, |
| "score": 0.8235, |
| "total_reward": 9.8815, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.02, |
| 0.962, |
| 0.982, |
| 0.3645, |
| 0.962, |
| 0.867, |
| 0.962, |
| 0.914, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 19, |
| "steps": 15, |
| "score": 0.7588, |
| "total_reward": 8.347, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2975, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3455, |
| 0.02, |
| 0.93, |
| 0.982, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 20, |
| "steps": 15, |
| "score": 0.6444, |
| "total_reward": 7.7329, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3525, |
| 0.867, |
| 0.867, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.3686, |
| 0.3645 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 21, |
| "steps": 15, |
| "score": 0.8756, |
| "total_reward": 9.6315, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.914, |
| 0.914, |
| 0.962, |
| 0.3455, |
| 0.962, |
| 0.962, |
| 0.914, |
| 0.867 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 22, |
| "steps": 15, |
| "score": 0.6437, |
| "total_reward": 7.0809, |
| "completion_rate": 0.5, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3504, |
| 0.02, |
| 0.93, |
| 0.914, |
| 0.3645, |
| 0.3645, |
| 0.962, |
| 0.2975, |
| 0.914, |
| 0.982 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 23, |
| "steps": 15, |
| "score": 0.8985, |
| "total_reward": 10.7824, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.867, |
| 0.962, |
| 0.3504, |
| 0.982, |
| 0.962, |
| 0.962, |
| 0.867, |
| 0.962, |
| 0.962, |
| 0.982, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 24, |
| "steps": 16, |
| "score": 0.6933, |
| "total_reward": 7.6267, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2975, |
| 0.982, |
| 0.982, |
| 0.3066, |
| 0.934, |
| 0.914, |
| 0.962, |
| 0.3686, |
| 0.02, |
| 0.93 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 25, |
| "steps": 15, |
| "score": 0.8266, |
| "total_reward": 9.0928, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.914, |
| 0.982, |
| 0.962, |
| 0.867, |
| 0.962, |
| 0.962, |
| 0.867, |
| 0.914, |
| 0.962, |
| 0.3504 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 26, |
| "steps": 15, |
| "score": 0.7833, |
| "total_reward": 7.8326, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.914, |
| 0.962, |
| 0.982, |
| 0.962, |
| 0.962, |
| 0.02, |
| 0.3206, |
| 0.914, |
| 0.898 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 27, |
| "steps": 16, |
| "score": 0.8311, |
| "total_reward": 9.1421, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.934, |
| 0.3455, |
| 0.3546, |
| 0.962, |
| 0.93, |
| 0.982, |
| 0.962, |
| 0.93, |
| 0.914, |
| 0.914 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 28, |
| "steps": 15, |
| "score": 0.7196, |
| "total_reward": 8.6356, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.914, |
| 0.962, |
| 0.962, |
| 0.914, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3826, |
| 0.02, |
| 0.3165 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task1", |
| "seed": 29, |
| "steps": 15, |
| "score": 0.5851, |
| "total_reward": 7.021, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.914, |
| 0.3455, |
| 0.962, |
| 0.02, |
| 0.914, |
| 0.3165, |
| 0.02, |
| 0.867, |
| 0.962, |
| 0.369 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 0, |
| "steps": 13, |
| "score": 0.753, |
| "total_reward": 10.5415, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.982, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.962, |
| 0.962, |
| 0.3455, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 1, |
| "steps": 12, |
| "score": 0.7843, |
| "total_reward": 10.196, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.982, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 2, |
| "steps": 11, |
| "score": 0.8612, |
| "total_reward": 10.3345, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.369, |
| 0.3455, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 3, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 4, |
| "steps": 11, |
| "score": 0.911, |
| "total_reward": 10.9324, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 5, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 6, |
| "steps": 10, |
| "score": 0.962, |
| "total_reward": 10.582, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 7, |
| "steps": 16, |
| "score": 0.8166, |
| "total_reward": 9.7988, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3686, |
| 0.962, |
| 0.3826, |
| 0.3896, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 8, |
| "steps": 16, |
| "score": 0.8399, |
| "total_reward": 8.3989, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 9, |
| "steps": 16, |
| "score": 0.785, |
| "total_reward": 10.2052, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3756, |
| 0.962, |
| 0.3896, |
| 0.391, |
| 0.391, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 10, |
| "steps": 10, |
| "score": 0.962, |
| "total_reward": 10.582, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 11, |
| "steps": 16, |
| "score": 0.7843, |
| "total_reward": 10.196, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.982, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 12, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 13, |
| "steps": 16, |
| "score": 0.9003, |
| "total_reward": 9.0035, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.3455, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 14, |
| "steps": 13, |
| "score": 0.7534, |
| "total_reward": 10.5473, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3686, |
| 0.3756, |
| 0.962, |
| 0.3896, |
| 0.391, |
| 0.3645, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 15, |
| "steps": 13, |
| "score": 0.8312, |
| "total_reward": 11.6374, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3546, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 16, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 17, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 18, |
| "steps": 10, |
| "score": 0.962, |
| "total_reward": 10.582, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 19, |
| "steps": 12, |
| "score": 0.8675, |
| "total_reward": 11.2779, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3455, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 20, |
| "steps": 16, |
| "score": 0.7993, |
| "total_reward": 8.7927, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3686, |
| 0.3756, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 21, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 22, |
| "steps": 15, |
| "score": 0.772, |
| "total_reward": 12.3526, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.369, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 23, |
| "steps": 11, |
| "score": 0.8606, |
| "total_reward": 10.3271, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.3616, |
| 0.3455, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 24, |
| "steps": 16, |
| "score": 0.8161, |
| "total_reward": 9.7931, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.3616, |
| 0.982, |
| 0.369, |
| 0.982, |
| 0.3645, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 25, |
| "steps": 14, |
| "score": 0.6506, |
| "total_reward": 9.7585, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.982, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.982, |
| 0.946, |
| 0.369, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.982, |
| 0.3645, |
| 0.3645 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 26, |
| "steps": 16, |
| "score": 0.7054, |
| "total_reward": 11.2865, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.982, |
| 0.982, |
| 0.982, |
| 0.3645, |
| 0.946, |
| 0.982, |
| 0.3645, |
| 0.946, |
| 0.982, |
| 0.3645, |
| 0.3645, |
| 0.369, |
| 0.369, |
| 0.3645, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 27, |
| "steps": 15, |
| "score": 0.6937, |
| "total_reward": 9.0177, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3686, |
| 0.962, |
| 0.962, |
| 0.3896, |
| 0.391, |
| 0.391, |
| 0.391, |
| 0.962, |
| 0.3525, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 28, |
| "steps": 10, |
| "score": 0.962, |
| "total_reward": 10.582, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task1", |
| "seed": 29, |
| "steps": 11, |
| "score": 0.911, |
| "total_reward": 10.9324, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3504, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 0, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 1, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 2, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3525, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 3, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 4, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 5, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 6, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 7, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 8, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 9, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 10, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 11, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 12, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 13, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 14, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 15, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 16, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 17, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 18, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 19, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 20, |
| "steps": 16, |
| "score": 0.7588, |
| "total_reward": 6.8295, |
| "completion_rate": 0.5, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 21, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 22, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 23, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3525, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 24, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 25, |
| "steps": 16, |
| "score": 0.7588, |
| "total_reward": 6.8295, |
| "completion_rate": 0.6, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 26, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 27, |
| "steps": 16, |
| "score": 0.8943, |
| "total_reward": 8.0485, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.3525, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 28, |
| "steps": 16, |
| "score": 0.962, |
| "total_reward": 8.658, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task1", |
| "seed": 29, |
| "steps": 16, |
| "score": 0.8266, |
| "total_reward": 7.439, |
| "completion_rate": 0.7, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.0, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.962, |
| 0.3525 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 0, |
| "steps": 25, |
| "score": 0.5298, |
| "total_reward": 9.5365, |
| "completion_rate": 0.667, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.461, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9083, |
| 0.925, |
| 0.9167, |
| 0.02, |
| 0.8917, |
| 0.8833, |
| 0.8667, |
| 0.2233, |
| 0.02, |
| 0.02, |
| 0.725, |
| 0.8083, |
| 0.6917, |
| 0.775, |
| 0.1317, |
| 0.115, |
| 0.5949 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 1, |
| "steps": 31, |
| "score": 0.7252, |
| "total_reward": 13.054, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.569, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9833, |
| 0.8833, |
| 0.8583, |
| 0.875, |
| 0.7583, |
| 0.8417, |
| 0.165, |
| 0.7833, |
| 0.8, |
| 0.6833, |
| 0.735, |
| 0.1217, |
| 0.7167, |
| 0.7, |
| 0.7083, |
| 0.849 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 2, |
| "steps": 27, |
| "score": 0.6551, |
| "total_reward": 11.7913, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.579, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3517, |
| 0.262, |
| 0.9167, |
| 0.9083, |
| 0.8583, |
| 0.875, |
| 0.7583, |
| 0.7417, |
| 0.825, |
| 0.7083, |
| 0.8, |
| 0.6833, |
| 0.6667, |
| 0.75, |
| 0.0967, |
| 0.0983, |
| 0.766 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 3, |
| "steps": 28, |
| "score": 0.6154, |
| "total_reward": 12.9233, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.586, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9833, |
| 0.925, |
| 0.2717, |
| 0.8, |
| 0.8917, |
| 0.9333, |
| 0.875, |
| 0.2217, |
| 0.2233, |
| 0.7417, |
| 0.8333, |
| 0.1483, |
| 0.7917, |
| 0.1383, |
| 0.7333, |
| 0.75, |
| 0.7083, |
| 0.123, |
| 0.7686 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 4, |
| "steps": 28, |
| "score": 0.6575, |
| "total_reward": 12.4928, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.458, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9833, |
| 0.925, |
| 0.2817, |
| 0.8667, |
| 0.8833, |
| 0.23, |
| 0.2113, |
| 0.8417, |
| 0.8, |
| 0.775, |
| 0.7917, |
| 0.775, |
| 0.7583, |
| 0.75, |
| 0.1067, |
| 0.1313, |
| 0.7235 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 5, |
| "steps": 26, |
| "score": 0.6206, |
| "total_reward": 12.4128, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.408, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9, |
| 0.9167, |
| 0.24, |
| 0.2467, |
| 0.2383, |
| 0.7667, |
| 0.9083, |
| 0.85, |
| 0.1967, |
| 0.725, |
| 0.8667, |
| 0.165, |
| 0.7917, |
| 0.7833, |
| 0.7667, |
| 0.02, |
| 0.65, |
| 0.7061 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 6, |
| "steps": 31, |
| "score": 0.7065, |
| "total_reward": 12.7163, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.576, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9083, |
| 0.8833, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8833, |
| 0.7417, |
| 0.188, |
| 0.8667, |
| 0.13, |
| 0.7583, |
| 0.7667, |
| 0.7583, |
| 0.7083, |
| 0.7, |
| 0.8083 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 7, |
| "steps": 25, |
| "score": 0.7366, |
| "total_reward": 12.5218, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.776, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8333, |
| 0.8917, |
| 0.875, |
| 0.8917, |
| 0.775, |
| 0.8583, |
| 0.85, |
| 0.8333, |
| 0.825, |
| 0.1817, |
| 0.8, |
| 0.6833, |
| 0.825, |
| 0.02, |
| 0.7583, |
| 0.8784 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 8, |
| "steps": 25, |
| "score": 0.7329, |
| "total_reward": 13.9253, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.425, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.335, |
| 0.9167, |
| 0.8667, |
| 0.8833, |
| 0.875, |
| 0.2767, |
| 0.8583, |
| 0.9, |
| 0.8, |
| 0.8167, |
| 0.7, |
| 0.1233, |
| 0.775, |
| 0.7667, |
| 0.8083, |
| 0.75, |
| 0.7987 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 9, |
| "steps": 27, |
| "score": 0.7062, |
| "total_reward": 12.712, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.597, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9333, |
| 0.925, |
| 0.8083, |
| 0.278, |
| 0.8583, |
| 0.925, |
| 0.02, |
| 0.2233, |
| 0.8417, |
| 0.825, |
| 0.8167, |
| 0.735, |
| 0.6833, |
| 0.7333, |
| 0.65, |
| 0.8157 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 10, |
| "steps": 26, |
| "score": 0.5723, |
| "total_reward": 12.0174, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.855, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.925, |
| 0.9667, |
| 0.8667, |
| 0.2483, |
| 0.8667, |
| 0.02, |
| 0.205, |
| 0.8917, |
| 0.825, |
| 0.8167, |
| 0.775, |
| 0.1567, |
| 0.02, |
| 0.153, |
| 0.7667, |
| 0.8083, |
| 0.105, |
| 0.1067, |
| 0.8194 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 11, |
| "steps": 23, |
| "score": 0.7197, |
| "total_reward": 12.955, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.57, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8333, |
| 0.925, |
| 0.9167, |
| 0.2703, |
| 0.8667, |
| 0.2483, |
| 0.8667, |
| 0.9083, |
| 0.02, |
| 0.8083, |
| 0.825, |
| 0.8167, |
| 0.7683, |
| 0.7917, |
| 0.7833, |
| 0.7417, |
| 0.8063 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 12, |
| "steps": 30, |
| "score": 0.6047, |
| "total_reward": 12.0935, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.344, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9, |
| 0.2717, |
| 0.9083, |
| 0.8267, |
| 0.8833, |
| 0.8333, |
| 0.02, |
| 0.2197, |
| 0.1883, |
| 0.18, |
| 0.7833, |
| 0.7917, |
| 0.7417, |
| 0.7583, |
| 0.65, |
| 0.7, |
| 0.0633, |
| 0.6839 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 13, |
| "steps": 30, |
| "score": 0.6649, |
| "total_reward": 11.9681, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.271, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.8917, |
| 0.8, |
| 0.8833, |
| 0.2067, |
| 0.8583, |
| 0.8083, |
| 0.02, |
| 0.7833, |
| 0.6917, |
| 0.75, |
| 0.6583, |
| 0.75, |
| 0.7333, |
| 0.09, |
| 0.7, |
| 0.7014 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 14, |
| "steps": 26, |
| "score": 0.7146, |
| "total_reward": 13.5771, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.416, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.825, |
| 0.8833, |
| 0.8667, |
| 0.2383, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8333, |
| 0.7833, |
| 0.8, |
| 0.1797, |
| 0.7833, |
| 0.775, |
| 0.1217, |
| 0.65, |
| 0.7957 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 15, |
| "steps": 27, |
| "score": 0.5573, |
| "total_reward": 12.2603, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.606, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3037, |
| 0.9333, |
| 0.925, |
| 0.8083, |
| 0.265, |
| 0.2567, |
| 0.8433, |
| 0.925, |
| 0.7583, |
| 0.215, |
| 0.1967, |
| 0.1883, |
| 0.8167, |
| 0.8, |
| 0.1567, |
| 0.675, |
| 0.7583, |
| 0.75, |
| 0.7417, |
| 0.0883, |
| 0.732 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 16, |
| "steps": 23, |
| "score": 0.5543, |
| "total_reward": 11.0864, |
| "completion_rate": 0.733, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.437, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.2217, |
| 0.02, |
| 0.925, |
| 0.02, |
| 0.2133, |
| 0.8167, |
| 0.825, |
| 0.1817, |
| 0.02, |
| 0.8, |
| 0.7583, |
| 0.775, |
| 0.1317, |
| 0.6298 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 17, |
| "steps": 27, |
| "score": 0.5694, |
| "total_reward": 11.9565, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.688, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.02, |
| 0.28, |
| 0.2613, |
| 0.7917, |
| 0.2483, |
| 0.875, |
| 0.2213, |
| 0.85, |
| 0.8417, |
| 0.7933, |
| 0.233, |
| 0.8667, |
| 0.7, |
| 0.7833, |
| 0.7667, |
| 0.7583, |
| 0.168, |
| 0.0703, |
| 0.7609 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 18, |
| "steps": 23, |
| "score": 0.6662, |
| "total_reward": 11.3256, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.602, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.925, |
| 0.02, |
| 0.9, |
| 0.2567, |
| 0.2483, |
| 0.875, |
| 0.7583, |
| 0.85, |
| 0.8083, |
| 0.825, |
| 0.8083, |
| 0.85, |
| 0.02, |
| 0.75, |
| 0.7306 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 19, |
| "steps": 27, |
| "score": 0.7003, |
| "total_reward": 12.6055, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.625, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2633, |
| 0.9167, |
| 0.9083, |
| 0.8917, |
| 0.8833, |
| 0.23, |
| 0.02, |
| 0.825, |
| 0.8917, |
| 0.825, |
| 0.7083, |
| 0.8, |
| 0.6833, |
| 0.7417, |
| 0.7583, |
| 0.7083, |
| 0.8255 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 20, |
| "steps": 32, |
| "score": 0.5557, |
| "total_reward": 12.7811, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.636, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2983, |
| 0.8167, |
| 0.8, |
| 0.8917, |
| 0.8833, |
| 0.237, |
| 0.8667, |
| 0.85, |
| 0.2037, |
| 0.2213, |
| 0.19, |
| 0.8083, |
| 0.8, |
| 0.1997, |
| 0.7833, |
| 0.775, |
| 0.6583, |
| 0.0787, |
| 0.725, |
| 0.1347, |
| 0.7083, |
| 0.7861 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 21, |
| "steps": 23, |
| "score": 0.7485, |
| "total_reward": 12.7247, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.523, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.8917, |
| 0.875, |
| 0.8917, |
| 0.2383, |
| 0.8667, |
| 0.8583, |
| 0.8167, |
| 0.725, |
| 0.8083, |
| 0.8, |
| 0.02, |
| 0.7833, |
| 0.7667, |
| 0.7897 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 22, |
| "steps": 31, |
| "score": 0.4891, |
| "total_reward": 10.2706, |
| "completion_rate": 0.667, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.544, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3037, |
| 0.02, |
| 0.8917, |
| 0.8667, |
| 0.2483, |
| 0.24, |
| 0.8667, |
| 0.18, |
| 0.8083, |
| 0.875, |
| 0.1817, |
| 0.165, |
| 0.7583, |
| 0.13, |
| 0.1217, |
| 0.75, |
| 0.0983, |
| 0.725, |
| 0.7167, |
| 0.6236 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 23, |
| "steps": 20, |
| "score": 0.7708, |
| "total_reward": 13.1031, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.755, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8333, |
| 0.925, |
| 0.2787, |
| 0.9583, |
| 0.9, |
| 0.8917, |
| 0.775, |
| 0.8667, |
| 0.85, |
| 0.8917, |
| 0.825, |
| 0.8167, |
| 0.8083, |
| 0.8, |
| 0.7917, |
| 0.8711 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 24, |
| "steps": 31, |
| "score": 0.5498, |
| "total_reward": 9.8972, |
| "completion_rate": 0.667, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.809, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2633, |
| 0.975, |
| 0.9667, |
| 0.243, |
| 0.9083, |
| 0.8333, |
| 0.85, |
| 0.2297, |
| 0.02, |
| 0.7917, |
| 0.02, |
| 0.1647, |
| 0.7417, |
| 0.09, |
| 0.7333, |
| 0.7, |
| 0.7165 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 25, |
| "steps": 23, |
| "score": 0.791, |
| "total_reward": 13.4466, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.623, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9, |
| 0.9667, |
| 0.9083, |
| 0.7917, |
| 0.8833, |
| 0.875, |
| 0.7583, |
| 0.8167, |
| 0.8333, |
| 0.187, |
| 0.8167, |
| 0.8083, |
| 0.6917, |
| 0.8333, |
| 0.775, |
| 0.8679 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 26, |
| "steps": 29, |
| "score": 0.5823, |
| "total_reward": 11.646, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.895, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9, |
| 0.9167, |
| 0.9583, |
| 0.9, |
| 0.8917, |
| 0.02, |
| 0.2297, |
| 0.8167, |
| 0.76, |
| 0.8083, |
| 0.7917, |
| 0.1813, |
| 0.13, |
| 0.0953, |
| 0.75, |
| 0.7917, |
| 0.1413, |
| 0.08, |
| 0.8333 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 27, |
| "steps": 29, |
| "score": 0.6402, |
| "total_reward": 12.1644, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.703, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9583, |
| 0.28, |
| 0.2847, |
| 0.9083, |
| 0.8667, |
| 0.2483, |
| 0.8667, |
| 0.825, |
| 0.8083, |
| 0.7917, |
| 0.825, |
| 0.02, |
| 0.75, |
| 0.8167, |
| 0.65, |
| 0.0703, |
| 0.725, |
| 0.8094 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 28, |
| "steps": 23, |
| "score": 0.7219, |
| "total_reward": 12.9944, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.671, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9083, |
| 0.9167, |
| 0.9083, |
| 0.8667, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.258, |
| 0.02, |
| 0.165, |
| 0.8167, |
| 0.8, |
| 0.8417, |
| 0.7833, |
| 0.6667, |
| 0.8414 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task2", |
| "seed": 29, |
| "steps": 27, |
| "score": 0.5586, |
| "total_reward": 10.614, |
| "completion_rate": 0.733, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.478, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9, |
| 0.2717, |
| 0.9083, |
| 0.02, |
| 0.8583, |
| 0.8917, |
| 0.02, |
| 0.7417, |
| 0.8333, |
| 0.19, |
| 0.1817, |
| 0.7667, |
| 0.7833, |
| 0.7417, |
| 0.1133, |
| 0.0817, |
| 0.644 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 0, |
| "steps": 31, |
| "score": 0.6145, |
| "total_reward": 12.2902, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.72, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9833, |
| 0.975, |
| 0.2817, |
| 0.2733, |
| 0.265, |
| 0.2467, |
| 0.875, |
| 0.8583, |
| 0.8417, |
| 0.19, |
| 0.8083, |
| 0.7917, |
| 0.775, |
| 0.7583, |
| 0.7417, |
| 0.09, |
| 0.0733, |
| 0.7719 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 1, |
| "steps": 17, |
| "score": 0.768, |
| "total_reward": 13.8236, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.282, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9833, |
| 0.975, |
| 0.2817, |
| 0.2733, |
| 0.265, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.8167, |
| 0.7053 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 2, |
| "steps": 17, |
| "score": 0.7237, |
| "total_reward": 13.0266, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.284, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3517, |
| 0.2883, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.1787, |
| 0.6626 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 3, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.5171, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9021 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 4, |
| "steps": 17, |
| "score": 0.7999, |
| "total_reward": 14.3981, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.426, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.2537, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.1787, |
| 0.7991 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 5, |
| "steps": 30, |
| "score": 0.6545, |
| "total_reward": 10.4723, |
| "completion_rate": 0.733, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.816, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.2483, |
| 0.8667, |
| 0.215, |
| 0.8333, |
| 0.8167, |
| 0.165, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.7623 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 6, |
| "steps": 15, |
| "score": 0.8749, |
| "total_reward": 13.9981, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.28, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.7481 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 7, |
| "steps": 28, |
| "score": 0.7465, |
| "total_reward": 13.4373, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.833, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.313, |
| 0.9167, |
| 0.3163, |
| 0.318, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.8983 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 8, |
| "steps": 17, |
| "score": 0.7992, |
| "total_reward": 14.3856, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.39, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.2953, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.2537, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.8167, |
| 0.7866 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 9, |
| "steps": 28, |
| "score": 0.7248, |
| "total_reward": 13.7712, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.834, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.3147, |
| 0.9083, |
| 0.318, |
| 0.3117, |
| 0.3033, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.8986 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 10, |
| "steps": 16, |
| "score": 0.797, |
| "total_reward": 13.5485, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.432, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.212, |
| 0.2037, |
| 0.8333, |
| 0.825, |
| 0.7578 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 11, |
| "steps": 30, |
| "score": 0.6763, |
| "total_reward": 12.8505, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.825, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9833, |
| 0.975, |
| 0.2817, |
| 0.2733, |
| 0.265, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.0817, |
| 0.8522 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 12, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 13, |
| "steps": 30, |
| "score": 0.7935, |
| "total_reward": 13.4903, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.839, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.2883, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9436 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 14, |
| "steps": 18, |
| "score": 0.7309, |
| "total_reward": 13.8869, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.264, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.313, |
| 0.3147, |
| 0.2733, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.187, |
| 0.8167, |
| 0.8083, |
| 0.6989 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 15, |
| "steps": 18, |
| "score": 0.7649, |
| "total_reward": 14.5326, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.33, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3097, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.2703, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.2287, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.8167, |
| 0.8083, |
| 0.7656 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 16, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 17, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.517, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.265, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.902 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 18, |
| "steps": 15, |
| "score": 0.8749, |
| "total_reward": 13.998, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.28, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.748 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 19, |
| "steps": 17, |
| "score": 0.7966, |
| "total_reward": 14.3395, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.279, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2967, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.262, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.8167, |
| 0.7475 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 20, |
| "steps": 30, |
| "score": 0.6558, |
| "total_reward": 11.8048, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.82, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.313, |
| 0.3147, |
| 0.9, |
| 0.8833, |
| 0.2317, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.1483, |
| 0.7667, |
| 0.75, |
| 0.0983, |
| 0.7167, |
| 0.8071 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 21, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 22, |
| "steps": 22, |
| "score": 0.6604, |
| "total_reward": 15.1886, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.471, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3517, |
| 0.9333, |
| 0.925, |
| 0.2787, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.237, |
| 0.8667, |
| 0.2203, |
| 0.85, |
| 0.8417, |
| 0.1953, |
| 0.825, |
| 0.1787, |
| 0.8083, |
| 0.8, |
| 0.1537, |
| 0.7833, |
| 0.775, |
| 0.8149 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 23, |
| "steps": 16, |
| "score": 0.793, |
| "total_reward": 13.4804, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.212, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.3113, |
| 0.28, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.825, |
| 0.6808 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 24, |
| "steps": 30, |
| "score": 0.6768, |
| "total_reward": 12.8598, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.824, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.3113, |
| 0.975, |
| 0.3267, |
| 0.9583, |
| 0.265, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.115, |
| 0.7333, |
| 0.7167, |
| 0.8518 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 25, |
| "steps": 21, |
| "score": 0.5985, |
| "total_reward": 13.1666, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.709, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9833, |
| 0.975, |
| 0.2817, |
| 0.2733, |
| 0.265, |
| 0.8917, |
| 0.2383, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.1883, |
| 0.18, |
| 0.8167, |
| 0.1633, |
| 0.155, |
| 0.1467, |
| 0.7833, |
| 0.7683 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 26, |
| "steps": 22, |
| "score": 0.5962, |
| "total_reward": 13.1159, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.324, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.99, |
| 0.9833, |
| 0.975, |
| 0.2817, |
| 0.8683, |
| 0.265, |
| 0.2567, |
| 0.9333, |
| 0.925, |
| 0.2317, |
| 0.2233, |
| 0.26, |
| 0.1983, |
| 0.825, |
| 0.8167, |
| 0.8083, |
| 0.162, |
| 0.7917, |
| 0.1453, |
| 0.775, |
| 0.6336 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 27, |
| "steps": 30, |
| "score": 0.6573, |
| "total_reward": 13.146, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.829, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.313, |
| 0.9167, |
| 0.9083, |
| 0.318, |
| 0.3117, |
| 0.3033, |
| 0.8667, |
| 0.85, |
| 0.1983, |
| 0.8167, |
| 0.8, |
| 0.1483, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8533 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 28, |
| "steps": 15, |
| "score": 0.8749, |
| "total_reward": 13.9979, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.28, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.8833, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.8417, |
| 0.8333, |
| 0.7479 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task2", |
| "seed": 29, |
| "steps": 17, |
| "score": 0.7998, |
| "total_reward": 14.3965, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.421, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9417, |
| 0.9333, |
| 0.925, |
| 0.9167, |
| 0.9083, |
| 0.9, |
| 0.8917, |
| 0.2453, |
| 0.875, |
| 0.8667, |
| 0.8583, |
| 0.85, |
| 0.2037, |
| 0.8333, |
| 0.825, |
| 0.8167, |
| 0.7975 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 0, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.5171, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9021 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 1, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 2, |
| "steps": 30, |
| "score": 0.7399, |
| "total_reward": 11.8385, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.843, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2983, |
| 0.2817, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8585 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 3, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.5171, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9021 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 4, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.5171, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.1983, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9021 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 5, |
| "steps": 30, |
| "score": 0.6545, |
| "total_reward": 10.4723, |
| "completion_rate": 0.733, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.816, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.2483, |
| 0.8667, |
| 0.215, |
| 0.8333, |
| 0.8167, |
| 0.165, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.7623 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 6, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 7, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1942, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9442 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 8, |
| "steps": 30, |
| "score": 0.7399, |
| "total_reward": 11.8383, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.843, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.2817, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.1983, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8583 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 9, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 10, |
| "steps": 30, |
| "score": 0.7398, |
| "total_reward": 11.8373, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.115, |
| 0.0983, |
| 0.7167, |
| 0.8573 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 11, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.517, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.265, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.902 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 12, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 13, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 14, |
| "steps": 30, |
| "score": 0.7398, |
| "total_reward": 11.8376, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.841, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.2483, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8576 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 15, |
| "steps": 30, |
| "score": 0.6973, |
| "total_reward": 11.1569, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.834, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2983, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.2317, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.1483, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8119 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 16, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 17, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.517, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.265, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.902 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 18, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 19, |
| "steps": 30, |
| "score": 0.7823, |
| "total_reward": 12.517, |
| "completion_rate": 0.933, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.215, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.902 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 20, |
| "steps": 30, |
| "score": 0.6117, |
| "total_reward": 9.7864, |
| "completion_rate": 0.667, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.795, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2983, |
| 0.9167, |
| 0.9, |
| 0.2483, |
| 0.8667, |
| 0.85, |
| 0.1983, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.115, |
| 0.7333, |
| 0.7167, |
| 0.7114 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 21, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 22, |
| "steps": 30, |
| "score": 0.6545, |
| "total_reward": 10.4728, |
| "completion_rate": 0.733, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.817, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2983, |
| 0.9167, |
| 0.9, |
| 0.2483, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.0817, |
| 0.7628 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 23, |
| "steps": 30, |
| "score": 0.7399, |
| "total_reward": 11.8385, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.843, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.2983, |
| 0.2817, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8585 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 24, |
| "steps": 30, |
| "score": 0.7398, |
| "total_reward": 11.8376, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.841, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.2483, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.8576 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 25, |
| "steps": 30, |
| "score": 0.7399, |
| "total_reward": 11.838, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.842, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.2817, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.1817, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.858 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 26, |
| "steps": 30, |
| "score": 0.7398, |
| "total_reward": 11.8373, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.115, |
| 0.0983, |
| 0.7167, |
| 0.8573 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 27, |
| "steps": 30, |
| "score": 0.6972, |
| "total_reward": 11.156, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.831, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.265, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.1317, |
| 0.75, |
| 0.7333, |
| 0.0817, |
| 0.811 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 28, |
| "steps": 30, |
| "score": 0.8246, |
| "total_reward": 13.1941, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.8167, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.7333, |
| 0.7167, |
| 0.9441 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task2", |
| "seed": 29, |
| "steps": 30, |
| "score": 0.7398, |
| "total_reward": 11.8373, |
| "completion_rate": 0.867, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.84, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9333, |
| 0.9167, |
| 0.9, |
| 0.8833, |
| 0.8667, |
| 0.85, |
| 0.8333, |
| 0.1817, |
| 0.8, |
| 0.7833, |
| 0.7667, |
| 0.75, |
| 0.0983, |
| 0.7167, |
| 0.8573 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 0, |
| "steps": 36, |
| "score": 0.6105, |
| "total_reward": 15.2622, |
| "completion_rate": 0.75, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.884, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8956, |
| 0.9233, |
| 0.9211, |
| 0.02, |
| 0.9144, |
| 0.9122, |
| 0.9078, |
| 0.3556, |
| 0.02, |
| 0.02, |
| 0.8467, |
| 0.8922, |
| 0.8378, |
| 0.8833, |
| 0.9061, |
| 0.3067, |
| 0.02, |
| 0.8722, |
| 0.073, |
| 0.8306, |
| 0.9061, |
| 0.3397, |
| 0.3044, |
| 0.5035 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 1, |
| "steps": 40, |
| "score": 0.7205, |
| "total_reward": 18.0135, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.662, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9506, |
| 0.8889, |
| 0.8822, |
| 0.91, |
| 0.8556, |
| 0.9011, |
| 0.3167, |
| 0.8622, |
| 0.89, |
| 0.8356, |
| 0.8633, |
| 0.3011, |
| 0.8444, |
| 0.8378, |
| 0.8656, |
| 0.8261, |
| 0.8589, |
| 0.8961, |
| 0.02, |
| 0.3, |
| 0.8106, |
| 0.2933, |
| 0.7916 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 2, |
| "steps": 37, |
| "score": 0.7627, |
| "total_reward": 17.5411, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.752, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3778, |
| 0.3203, |
| 0.9211, |
| 0.9189, |
| 0.8822, |
| 0.91, |
| 0.8556, |
| 0.8511, |
| 0.8967, |
| 0.8422, |
| 0.89, |
| 0.8356, |
| 0.8311, |
| 0.8767, |
| 0.2944, |
| 0.8972, |
| 0.87, |
| 0.8656, |
| 0.8789, |
| 0.02, |
| 0.8722, |
| 0.8207 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 3, |
| "steps": 36, |
| "score": 0.6303, |
| "total_reward": 16.3887, |
| "completion_rate": 0.75, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.798, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9506, |
| 0.9233, |
| 0.3411, |
| 0.8667, |
| 0.9144, |
| 0.9372, |
| 0.91, |
| 0.3278, |
| 0.3556, |
| 0.8511, |
| 0.8989, |
| 0.2922, |
| 0.8878, |
| 0.3056, |
| 0.8489, |
| 0.8767, |
| 0.8922, |
| 0.333, |
| 0.2878, |
| 0.8283, |
| 0.8589, |
| 0.3297, |
| 0.3552, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 4, |
| "steps": 41, |
| "score": 0.6894, |
| "total_reward": 18.6138, |
| "completion_rate": 0.85, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.405, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9506, |
| 0.9233, |
| 0.3711, |
| 0.8844, |
| 0.9122, |
| 0.33, |
| 0.3186, |
| 0.9011, |
| 0.8667, |
| 0.86, |
| 0.8878, |
| 0.8833, |
| 0.8789, |
| 0.8767, |
| 0.8994, |
| 0.3352, |
| 0.8678, |
| 0.3033, |
| 0.8239, |
| 0.8744, |
| 0.8678, |
| 0.2656, |
| 0.2933, |
| 0.2911, |
| 0.7076 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 5, |
| "steps": 31, |
| "score": 0.6062, |
| "total_reward": 15.1538, |
| "completion_rate": 0.75, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.816, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.8933, |
| 0.9211, |
| 0.3367, |
| 0.3344, |
| 0.3322, |
| 0.8578, |
| 0.9306, |
| 0.9033, |
| 0.3211, |
| 0.8467, |
| 0.9194, |
| 0.32, |
| 0.8878, |
| 0.8856, |
| 0.8811, |
| 0.02, |
| 0.8394, |
| 0.0752, |
| 0.87, |
| 0.8678, |
| 0.02, |
| 0.8883, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 6, |
| "steps": 39, |
| "score": 0.6337, |
| "total_reward": 15.8429, |
| "completion_rate": 0.8, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.872, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8956, |
| 0.8889, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9006, |
| 0.8511, |
| 0.3197, |
| 0.9194, |
| 0.28, |
| 0.8556, |
| 0.8811, |
| 0.8789, |
| 0.8422, |
| 0.8856, |
| 0.3311, |
| 0.8589, |
| 0.0597, |
| 0.3222, |
| 0.27, |
| 0.8728, |
| 0.02, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 7, |
| "steps": 32, |
| "score": 0.7179, |
| "total_reward": 15.793, |
| "completion_rate": 0.85, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.869, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8756, |
| 0.8911, |
| 0.8867, |
| 0.9144, |
| 0.86, |
| 0.9056, |
| 0.9033, |
| 0.8989, |
| 0.8967, |
| 0.3444, |
| 0.89, |
| 0.8356, |
| 0.9083, |
| 0.02, |
| 0.8789, |
| 0.8744, |
| 0.87, |
| 0.8928, |
| 0.8633, |
| 0.3111, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 8, |
| "steps": 31, |
| "score": 0.7087, |
| "total_reward": 16.3004, |
| "completion_rate": 0.85, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.701, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.3733, |
| 0.9211, |
| 0.8844, |
| 0.9122, |
| 0.91, |
| 0.3578, |
| 0.9056, |
| 0.9283, |
| 0.8667, |
| 0.8944, |
| 0.84, |
| 0.3456, |
| 0.8833, |
| 0.8811, |
| 0.8589, |
| 0.8767, |
| 0.0774, |
| 0.835, |
| 0.8856, |
| 0.8633, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 9, |
| "steps": 37, |
| "score": 0.7151, |
| "total_reward": 17.877, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.558, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9256, |
| 0.9233, |
| 0.8689, |
| 0.3597, |
| 0.8822, |
| 0.935, |
| 0.02, |
| 0.3556, |
| 0.9011, |
| 0.8967, |
| 0.8944, |
| 0.84, |
| 0.8356, |
| 0.8489, |
| 0.8244, |
| 0.835, |
| 0.3178, |
| 0.8656, |
| 0.8261, |
| 0.8217, |
| 0.3044, |
| 0.85, |
| 0.7724 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 10, |
| "steps": 38, |
| "score": 0.6037, |
| "total_reward": 17.5072, |
| "completion_rate": 0.75, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.772, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9233, |
| 0.9461, |
| 0.8844, |
| 0.3622, |
| 0.9078, |
| 0.02, |
| 0.3233, |
| 0.9261, |
| 0.8967, |
| 0.8944, |
| 0.86, |
| 0.3378, |
| 0.02, |
| 0.3263, |
| 0.8811, |
| 0.3289, |
| 0.2967, |
| 0.8994, |
| 0.8722, |
| 0.8678, |
| 0.3386, |
| 0.3463, |
| 0.02, |
| 0.3089, |
| 0.8544, |
| 0.355, |
| 0.7709 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 11, |
| "steps": 32, |
| "score": 0.5881, |
| "total_reward": 14.703, |
| "completion_rate": 0.7, |
| "detection_rate": 0.3333, |
| "trust_calibration": 0.743, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8756, |
| 0.9233, |
| 0.9211, |
| 0.3459, |
| 0.8844, |
| 0.3622, |
| 0.9078, |
| 0.9306, |
| 0.02, |
| 0.8689, |
| 0.8967, |
| 0.8944, |
| 0.8722, |
| 0.8878, |
| 0.8856, |
| 0.9228, |
| 0.0819, |
| 0.2967, |
| 0.3244, |
| 0.8722, |
| 0.02, |
| 0.3356, |
| 0.3011, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 12, |
| "steps": 36, |
| "score": 0.6249, |
| "total_reward": 14.9974, |
| "completion_rate": 0.75, |
| "detection_rate": 0.3333, |
| "trust_calibration": 0.716, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.8933, |
| 0.3411, |
| 0.9189, |
| 0.8644, |
| 0.9122, |
| 0.8756, |
| 0.02, |
| 0.3441, |
| 0.3189, |
| 0.3167, |
| 0.8622, |
| 0.8878, |
| 0.8511, |
| 0.8789, |
| 0.8244, |
| 0.8878, |
| 0.2856, |
| 0.0663, |
| 0.8589, |
| 0.8961, |
| 0.8772, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 13, |
| "steps": 38, |
| "score": 0.7872, |
| "total_reward": 18.1053, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.64, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.8911, |
| 0.8667, |
| 0.9122, |
| 0.3278, |
| 0.9056, |
| 0.8689, |
| 0.02, |
| 0.8622, |
| 0.8378, |
| 0.8533, |
| 0.8289, |
| 0.8767, |
| 0.8722, |
| 0.895, |
| 0.8856, |
| 0.8633, |
| 0.8611, |
| 0.9017, |
| 0.8939, |
| 0.3, |
| 0.8081 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 14, |
| "steps": 35, |
| "score": 0.6988, |
| "total_reward": 18.1679, |
| "completion_rate": 0.95, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.661, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.8733, |
| 0.8889, |
| 0.8844, |
| 0.3322, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.8989, |
| 0.8622, |
| 0.89, |
| 0.3408, |
| 0.8856, |
| 0.8833, |
| 0.3011, |
| 0.8267, |
| 0.8372, |
| 0.87, |
| 0.0708, |
| 0.8833, |
| 0.2811, |
| 0.8839, |
| 0.8544, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 15, |
| "steps": 38, |
| "score": 0.68, |
| "total_reward": 19.0388, |
| "completion_rate": 0.85, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.774, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3548, |
| 0.9256, |
| 0.9233, |
| 0.8689, |
| 0.3667, |
| 0.3644, |
| 0.8922, |
| 0.935, |
| 0.8556, |
| 0.3533, |
| 0.3211, |
| 0.3189, |
| 0.8944, |
| 0.89, |
| 0.3378, |
| 0.8333, |
| 0.8789, |
| 0.8767, |
| 0.8744, |
| 0.2922, |
| 0.333, |
| 0.8656, |
| 0.8261, |
| 0.8567, |
| 0.3, |
| 0.8478, |
| 0.8066 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 16, |
| "steps": 33, |
| "score": 0.5966, |
| "total_reward": 15.511, |
| "completion_rate": 0.75, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.691, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.3044, |
| 0.02, |
| 0.935, |
| 0.02, |
| 0.3256, |
| 0.8711, |
| 0.8967, |
| 0.3444, |
| 0.02, |
| 0.89, |
| 0.8556, |
| 0.8833, |
| 0.3311, |
| 0.3289, |
| 0.8744, |
| 0.8878, |
| 0.8633, |
| 0.3141, |
| 0.8589, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 17, |
| "steps": 35, |
| "score": 0.5891, |
| "total_reward": 16.4939, |
| "completion_rate": 0.75, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.795, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.02, |
| 0.3433, |
| 0.3319, |
| 0.8644, |
| 0.3622, |
| 0.91, |
| 0.3286, |
| 0.9033, |
| 0.9011, |
| 0.8789, |
| 0.3697, |
| 0.9194, |
| 0.84, |
| 0.8856, |
| 0.8811, |
| 0.8789, |
| 0.3597, |
| 0.2692, |
| 0.87, |
| 0.2878, |
| 0.8656, |
| 0.0663, |
| 0.8239, |
| 0.8817, |
| 0.02, |
| 0.4835 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 18, |
| "steps": 35, |
| "score": 0.6548, |
| "total_reward": 16.3705, |
| "completion_rate": 0.75, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.573, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9233, |
| 0.02, |
| 0.9167, |
| 0.3644, |
| 0.3622, |
| 0.91, |
| 0.8556, |
| 0.9033, |
| 0.8689, |
| 0.8967, |
| 0.8922, |
| 0.915, |
| 0.02, |
| 0.8533, |
| 0.8789, |
| 0.2967, |
| 0.3422, |
| 0.3078, |
| 0.8656, |
| 0.8611, |
| 0.2789, |
| 0.3297, |
| 0.7281 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 19, |
| "steps": 38, |
| "score": 0.6912, |
| "total_reward": 17.2799, |
| "completion_rate": 0.9, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.834, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3156, |
| 0.9211, |
| 0.9189, |
| 0.9144, |
| 0.9122, |
| 0.33, |
| 0.02, |
| 0.9133, |
| 0.9261, |
| 0.8967, |
| 0.8422, |
| 0.89, |
| 0.8356, |
| 0.8511, |
| 0.8789, |
| 0.8922, |
| 0.87, |
| 0.3178, |
| 0.8811, |
| 0.8589, |
| 0.8544, |
| 0.87, |
| 0.3108, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 20, |
| "steps": 44, |
| "score": 0.6149, |
| "total_reward": 19.0606, |
| "completion_rate": 0.85, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.859, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3556, |
| 0.8711, |
| 0.8667, |
| 0.9144, |
| 0.9122, |
| 0.337, |
| 0.9078, |
| 0.9033, |
| 0.3281, |
| 0.3519, |
| 0.3467, |
| 0.8922, |
| 0.89, |
| 0.3608, |
| 0.8856, |
| 0.8833, |
| 0.8289, |
| 0.2714, |
| 0.87, |
| 0.3508, |
| 0.8656, |
| 0.3089, |
| 0.0597, |
| 0.8172, |
| 0.053, |
| 0.8728, |
| 0.8083, |
| 0.3439, |
| 0.2567, |
| 0.5146 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 21, |
| "steps": 30, |
| "score": 0.7401, |
| "total_reward": 15.543, |
| "completion_rate": 0.85, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.709, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.8911, |
| 0.8867, |
| 0.9144, |
| 0.3322, |
| 0.9078, |
| 0.9056, |
| 0.8711, |
| 0.8467, |
| 0.8922, |
| 0.89, |
| 0.02, |
| 0.8856, |
| 0.8811, |
| 0.8589, |
| 0.8744, |
| 0.87, |
| 0.8856, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 22, |
| "steps": 45, |
| "score": 0.5529, |
| "total_reward": 16.5871, |
| "completion_rate": 0.7, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.709, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3548, |
| 0.02, |
| 0.9311, |
| 0.8844, |
| 0.3622, |
| 0.36, |
| 0.9078, |
| 0.2933, |
| 0.8689, |
| 0.9217, |
| 0.3444, |
| 0.32, |
| 0.8556, |
| 0.3033, |
| 0.3011, |
| 0.8767, |
| 0.3022, |
| 0.87, |
| 0.8678, |
| 0.8811, |
| 0.0619, |
| 0.8544, |
| 0.3022, |
| 0.8478, |
| 0.8083, |
| 0.3119, |
| 0.0397, |
| 0.8322, |
| 0.4222 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 23, |
| "steps": 31, |
| "score": 0.7921, |
| "total_reward": 18.2193, |
| "completion_rate": 0.95, |
| "detection_rate": 0.5, |
| "trust_calibration": 0.847, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 1, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8756, |
| 0.9233, |
| 0.3481, |
| 0.9439, |
| 0.9167, |
| 0.9144, |
| 0.86, |
| 0.9078, |
| 0.9033, |
| 0.9261, |
| 0.8967, |
| 0.8944, |
| 0.8922, |
| 0.89, |
| 0.8878, |
| 0.02, |
| 0.9228, |
| 0.0819, |
| 0.8744, |
| 0.87, |
| 0.8833, |
| 0.7254 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 24, |
| "steps": 45, |
| "score": 0.6293, |
| "total_reward": 16.3622, |
| "completion_rate": 0.75, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.813, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3156, |
| 0.9483, |
| 0.9461, |
| 0.3197, |
| 0.9072, |
| 0.8756, |
| 0.9033, |
| 0.3541, |
| 0.02, |
| 0.9044, |
| 0.02, |
| 0.3608, |
| 0.8511, |
| 0.2967, |
| 0.8722, |
| 0.8356, |
| 0.8111, |
| 0.2867, |
| 0.8544, |
| 0.845, |
| 0.3156, |
| 0.8061, |
| 0.8367, |
| 0.7972, |
| 0.7658 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 25, |
| "steps": 34, |
| "score": 0.6745, |
| "total_reward": 16.8613, |
| "completion_rate": 0.85, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.809, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 2, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8933, |
| 0.9461, |
| 0.9189, |
| 0.8644, |
| 0.9122, |
| 0.91, |
| 0.8556, |
| 0.8711, |
| 0.8989, |
| 0.3237, |
| 0.8944, |
| 0.8922, |
| 0.8378, |
| 0.3356, |
| 0.8833, |
| 0.8489, |
| 0.0797, |
| 0.3244, |
| 0.3222, |
| 0.8878, |
| 0.8906, |
| 0.9061, |
| 0.2967, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 26, |
| "steps": 43, |
| "score": 0.5813, |
| "total_reward": 17.4397, |
| "completion_rate": 0.75, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.815, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8933, |
| 0.9211, |
| 0.9439, |
| 0.9167, |
| 0.9144, |
| 0.02, |
| 0.3308, |
| 0.8711, |
| 0.8467, |
| 0.8922, |
| 0.8878, |
| 0.3486, |
| 0.3033, |
| 0.2759, |
| 0.8767, |
| 0.3244, |
| 0.3452, |
| 0.29, |
| 0.8156, |
| 0.8633, |
| 0.2889, |
| 0.0597, |
| 0.8544, |
| 0.3372, |
| 0.8478, |
| 0.2956, |
| 0.2811, |
| 0.2889, |
| 0.4707 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 27, |
| "steps": 40, |
| "score": 0.5674, |
| "total_reward": 15.3205, |
| "completion_rate": 0.7, |
| "detection_rate": 0.25, |
| "trust_calibration": 0.816, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 3, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9206, |
| 0.3433, |
| 0.3541, |
| 0.9189, |
| 0.9244, |
| 0.3622, |
| 0.9078, |
| 0.9133, |
| 0.8689, |
| 0.8644, |
| 0.885, |
| 0.02, |
| 0.8533, |
| 0.9061, |
| 0.8267, |
| 0.9139, |
| 0.073, |
| 0.3356, |
| 0.3441, |
| 0.0619, |
| 0.8994, |
| 0.2722, |
| 0.323, |
| 0.3308, |
| 0.8433, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 28, |
| "steps": 32, |
| "score": 0.6685, |
| "total_reward": 16.0443, |
| "completion_rate": 0.8, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.793, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 1, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.8956, |
| 0.9211, |
| 0.9189, |
| 0.8844, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.3763, |
| 0.02, |
| 0.3167, |
| 0.8944, |
| 0.89, |
| 0.9128, |
| 0.8856, |
| 0.8461, |
| 0.8589, |
| 0.3267, |
| 0.8372, |
| 0.9128, |
| 0.2933, |
| 0.3461, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "random", |
| "task_type": "task3", |
| "seed": 29, |
| "steps": 40, |
| "score": 0.6868, |
| "total_reward": 17.8577, |
| "completion_rate": 0.85, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.907, |
| "adversarial_detections": 1, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.8933, |
| 0.3411, |
| 0.9189, |
| 0.02, |
| 0.8822, |
| 0.9028, |
| 0.02, |
| 0.8511, |
| 0.8989, |
| 0.3467, |
| 0.3444, |
| 0.8578, |
| 0.8856, |
| 0.8511, |
| 0.2989, |
| 0.9094, |
| 0.87, |
| 0.8856, |
| 0.3641, |
| 0.8567, |
| 0.3572, |
| 0.8895, |
| 0.8083, |
| 0.8353 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 0, |
| "steps": 43, |
| "score": 0.7114, |
| "total_reward": 18.4969, |
| "completion_rate": 0.85, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.729, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9506, |
| 0.9483, |
| 0.3711, |
| 0.3689, |
| 0.3667, |
| 0.3344, |
| 0.91, |
| 0.9056, |
| 0.9011, |
| 0.3267, |
| 0.8922, |
| 0.8878, |
| 0.8833, |
| 0.8789, |
| 0.8744, |
| 0.3, |
| 0.2956, |
| 0.8611, |
| 0.2867, |
| 0.8522, |
| 0.8478, |
| 0.8433, |
| 0.8389, |
| 0.7841 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 1, |
| "steps": 29, |
| "score": 0.7083, |
| "total_reward": 17.707, |
| "completion_rate": 0.85, |
| "detection_rate": 0.5, |
| "trust_calibration": 0.721, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 4, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9506, |
| 0.9483, |
| 0.3711, |
| 0.3689, |
| 0.3667, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.8967, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.925, |
| 0.9206, |
| 0.9161, |
| 0.9117, |
| 0.6632 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 2, |
| "steps": 29, |
| "score": 0.6919, |
| "total_reward": 17.2983, |
| "completion_rate": 0.85, |
| "detection_rate": 0.4444, |
| "trust_calibration": 0.561, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 5, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3778, |
| 0.3456, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.0997, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.925, |
| 0.9206, |
| 0.9161, |
| 0.9117, |
| 0.6065 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 3, |
| "steps": 42, |
| "score": 0.8546, |
| "total_reward": 18.8008, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.843, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.8411, |
| 0.8675 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 4, |
| "steps": 29, |
| "score": 0.7165, |
| "total_reward": 17.9128, |
| "completion_rate": 0.9, |
| "detection_rate": 0.4444, |
| "trust_calibration": 0.721, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 5, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.3414, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.0997, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.925, |
| 0.9206, |
| 0.9161, |
| 0.9117, |
| 0.664 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 5, |
| "steps": 46, |
| "score": 0.7558, |
| "total_reward": 18.1385, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.832, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.3422, |
| 0.9078, |
| 0.3333, |
| 0.8989, |
| 0.8944, |
| 0.32, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.2711, |
| 0.8367, |
| 0.8322, |
| 0.8229 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 6, |
| "steps": 27, |
| "score": 0.6991, |
| "total_reward": 16.778, |
| "completion_rate": 0.85, |
| "detection_rate": 0.4, |
| "trust_calibration": 0.725, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 6, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.1019, |
| 0.0997, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.925, |
| 0.9206, |
| 0.9161, |
| 0.6387 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 7, |
| "steps": 42, |
| "score": 0.7756, |
| "total_reward": 19.3902, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.835, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.3763, |
| 0.9211, |
| 0.3919, |
| 0.3997, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.2933, |
| 0.8589, |
| 0.8544, |
| 0.28, |
| 0.8456, |
| 0.8411, |
| 0.8478 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 8, |
| "steps": 44, |
| "score": 0.809, |
| "total_reward": 19.4157, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.853, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.3526, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.3333, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8544, |
| 0.85, |
| 0.2756, |
| 0.8411, |
| 0.8367, |
| 0.8654 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 9, |
| "steps": 40, |
| "score": 0.782, |
| "total_reward": 19.5499, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.837, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.3841, |
| 0.9189, |
| 0.3997, |
| 0.3994, |
| 0.3972, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.2889, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.8528 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 10, |
| "steps": 31, |
| "score": 0.712, |
| "total_reward": 17.8008, |
| "completion_rate": 0.85, |
| "detection_rate": 0.625, |
| "trust_calibration": 0.448, |
| "adversarial_detections": 5, |
| "adversarial_poisonings": 3, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.3303, |
| 0.3281, |
| 0.8989, |
| 0.0997, |
| 0.0974, |
| 0.0952, |
| 0.9295, |
| 0.925, |
| 0.9206, |
| 0.9161, |
| 0.9117, |
| 0.3356, |
| 0.6281 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 11, |
| "steps": 40, |
| "score": 0.7732, |
| "total_reward": 18.5566, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.835, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9506, |
| 0.9483, |
| 0.3711, |
| 0.3689, |
| 0.3667, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.2978, |
| 0.2933, |
| 0.8589, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.8349 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 12, |
| "steps": 42, |
| "score": 0.8546, |
| "total_reward": 18.8009, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.2889, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.8411, |
| 0.8676 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 13, |
| "steps": 39, |
| "score": 0.833, |
| "total_reward": 18.3252, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.811, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.3456, |
| 0.3433, |
| 0.9189, |
| 0.9144, |
| 0.91, |
| 0.9056, |
| 0.9011, |
| 0.8967, |
| 0.8922, |
| 0.8878, |
| 0.8833, |
| 0.8789, |
| 0.8744, |
| 0.87, |
| 0.8656, |
| 0.8611, |
| 0.8567, |
| 0.8522, |
| 0.8478, |
| 0.8485 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 14, |
| "steps": 29, |
| "score": 0.6889, |
| "total_reward": 17.9127, |
| "completion_rate": 0.85, |
| "detection_rate": 0.5, |
| "trust_calibration": 0.609, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 4, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.3763, |
| 0.3841, |
| 0.3689, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.3237, |
| 0.8944, |
| 0.8922, |
| 0.093, |
| 0.0908, |
| 0.0886, |
| 0.0863, |
| 0.9206, |
| 0.9161, |
| 0.9117, |
| 0.6353 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 15, |
| "steps": 30, |
| "score": 0.6847, |
| "total_reward": 18.4869, |
| "completion_rate": 0.9, |
| "detection_rate": 0.4444, |
| "trust_calibration": 0.635, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 5, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3608, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.3459, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.3348, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.8967, |
| 0.8944, |
| 0.8922, |
| 0.093, |
| 0.0908, |
| 0.0886, |
| 0.0863, |
| 0.0841, |
| 0.9184, |
| 0.9139, |
| 0.9095, |
| 0.6404 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 16, |
| "steps": 42, |
| "score": 0.8546, |
| "total_reward": 18.8009, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.2889, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.8411, |
| 0.8676 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 17, |
| "steps": 46, |
| "score": 0.8048, |
| "total_reward": 19.316, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.842, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.3467, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.2889, |
| 0.8544, |
| 0.28, |
| 0.8456, |
| 0.8411, |
| 0.8367, |
| 0.8322, |
| 0.8605 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 18, |
| "steps": 26, |
| "score": 0.6967, |
| "total_reward": 16.7213, |
| "completion_rate": 0.85, |
| "detection_rate": 0.3333, |
| "trust_calibration": 0.701, |
| "adversarial_detections": 3, |
| "adversarial_poisonings": 6, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.0997, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.0886, |
| 0.9228, |
| 0.9184, |
| 0.6149 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 19, |
| "steps": 20, |
| "score": 0.6193, |
| "total_reward": 13.0053, |
| "completion_rate": 0.65, |
| "detection_rate": 0.0, |
| "trust_calibration": 0.576, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 5, |
| "status": "failed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3478, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.3437, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.8967, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.01 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 20, |
| "steps": 46, |
| "score": 0.7498, |
| "total_reward": 19.4938, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.835, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.3763, |
| 0.3841, |
| 0.9167, |
| 0.9122, |
| 0.3378, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.3156, |
| 0.8811, |
| 0.8767, |
| 0.3022, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.2844, |
| 0.85, |
| 0.8456, |
| 0.8411, |
| 0.8367, |
| 0.8322, |
| 0.8412 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 21, |
| "steps": 42, |
| "score": 0.8546, |
| "total_reward": 18.8009, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.844, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8544, |
| 0.85, |
| 0.2756, |
| 0.8411, |
| 0.8676 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 22, |
| "steps": 36, |
| "score": 0.7334, |
| "total_reward": 21.2675, |
| "completion_rate": 1.0, |
| "detection_rate": 0.8, |
| "trust_calibration": 0.747, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 1, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3778, |
| 0.9256, |
| 0.9233, |
| 0.3481, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.337, |
| 0.9078, |
| 0.3326, |
| 0.9033, |
| 0.9011, |
| 0.3259, |
| 0.8967, |
| 0.3214, |
| 0.8922, |
| 0.89, |
| 0.3148, |
| 0.8856, |
| 0.0863, |
| 0.9206, |
| 0.9161, |
| 0.9117, |
| 0.9073, |
| 0.8789, |
| 0.8544, |
| 0.7968 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 23, |
| "steps": 28, |
| "score": 0.6922, |
| "total_reward": 17.3057, |
| "completion_rate": 0.85, |
| "detection_rate": 0.4444, |
| "trust_calibration": 0.645, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 5, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.3686, |
| 0.3433, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.8989, |
| 0.8967, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.0886, |
| 0.9228, |
| 0.9184, |
| 0.9139, |
| 0.6298 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 24, |
| "steps": 46, |
| "score": 0.7725, |
| "total_reward": 20.0838, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.836, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.3686, |
| 0.9483, |
| 0.3711, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.3022, |
| 0.8678, |
| 0.8633, |
| 0.2889, |
| 0.8544, |
| 0.85, |
| 0.8456, |
| 0.2711, |
| 0.8367, |
| 0.8322, |
| 0.8591 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 25, |
| "steps": 34, |
| "score": 0.6755, |
| "total_reward": 18.9148, |
| "completion_rate": 0.8, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.71, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9506, |
| 0.9483, |
| 0.3711, |
| 0.3689, |
| 0.3667, |
| 0.9144, |
| 0.3322, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.3189, |
| 0.3167, |
| 0.8944, |
| 0.3122, |
| 0.31, |
| 0.3078, |
| 0.8856, |
| 0.9011, |
| 0.8967, |
| 0.8922, |
| 0.3078, |
| 0.3033, |
| 0.8789, |
| 0.782 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 26, |
| "steps": 34, |
| "score": 0.6561, |
| "total_reward": 19.0282, |
| "completion_rate": 0.8, |
| "detection_rate": 0.6667, |
| "trust_calibration": 0.467, |
| "adversarial_detections": 2, |
| "adversarial_poisonings": 1, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9528, |
| 0.9506, |
| 0.9483, |
| 0.3711, |
| 0.8989, |
| 0.3667, |
| 0.3644, |
| 0.9372, |
| 0.935, |
| 0.3578, |
| 0.3556, |
| 0.3533, |
| 0.3289, |
| 0.8967, |
| 0.8944, |
| 0.8922, |
| 0.317, |
| 0.8878, |
| 0.3126, |
| 0.8833, |
| 0.0841, |
| 0.9184, |
| 0.9139, |
| 0.8878, |
| 0.8656, |
| 0.3163, |
| 0.3419, |
| 0.6213 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 27, |
| "steps": 46, |
| "score": 0.7256, |
| "total_reward": 20.3155, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.828, |
| "adversarial_detections": 0, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.3763, |
| 0.9211, |
| 0.9189, |
| 0.3997, |
| 0.3994, |
| 0.3972, |
| 0.9078, |
| 0.9033, |
| 0.3289, |
| 0.8944, |
| 0.89, |
| 0.3156, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.2844, |
| 0.85, |
| 0.2756, |
| 0.8411, |
| 0.8367, |
| 0.8322, |
| 0.8395 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 28, |
| "steps": 28, |
| "score": 0.7355, |
| "total_reward": 17.6509, |
| "completion_rate": 0.9, |
| "detection_rate": 0.5, |
| "trust_calibration": 0.725, |
| "adversarial_detections": 5, |
| "adversarial_poisonings": 5, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.9122, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.9011, |
| 0.1019, |
| 0.0997, |
| 0.0974, |
| 0.0952, |
| 0.093, |
| 0.9273, |
| 0.9228, |
| 0.9184, |
| 0.9139, |
| 0.6841 |
| ] |
| }, |
| { |
| "policy": "heuristic", |
| "task_type": "task3", |
| "seed": 29, |
| "steps": 28, |
| "score": 0.7258, |
| "total_reward": 18.144, |
| "completion_rate": 0.9, |
| "detection_rate": 0.5, |
| "trust_calibration": 0.709, |
| "adversarial_detections": 4, |
| "adversarial_poisonings": 4, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9278, |
| 0.9256, |
| 0.9233, |
| 0.9211, |
| 0.9189, |
| 0.9167, |
| 0.9144, |
| 0.3392, |
| 0.91, |
| 0.9078, |
| 0.9056, |
| 0.9033, |
| 0.3281, |
| 0.8989, |
| 0.8967, |
| 0.8944, |
| 0.0952, |
| 0.093, |
| 0.0908, |
| 0.0886, |
| 0.9228, |
| 0.9184, |
| 0.9139, |
| 0.6801 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 0, |
| "steps": 42, |
| "score": 0.867, |
| "total_reward": 19.0739, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.935, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8904 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 1, |
| "steps": 40, |
| "score": 0.8977, |
| "total_reward": 18.8524, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.9095, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8938 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 2, |
| "steps": 42, |
| "score": 0.8422, |
| "total_reward": 18.5276, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.933, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3556, |
| 0.3511, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8724 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 3, |
| "steps": 42, |
| "score": 0.8689, |
| "total_reward": 19.1153, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8901 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 4, |
| "steps": 42, |
| "score": 0.8689, |
| "total_reward": 19.1154, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.3289, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8901 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 5, |
| "steps": 46, |
| "score": 0.7916, |
| "total_reward": 18.9976, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.917, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.3422, |
| 0.9078, |
| 0.3333, |
| 0.8989, |
| 0.8944, |
| 0.32, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8544, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8739, |
| 0.8618 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 6, |
| "steps": 40, |
| "score": 0.8977, |
| "total_reward": 18.8523, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.9095, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8938 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 7, |
| "steps": 40, |
| "score": 0.8958, |
| "total_reward": 18.8108, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.894 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 8, |
| "steps": 44, |
| "score": 0.8405, |
| "total_reward": 19.3315, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.3511, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.3289, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8857 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 9, |
| "steps": 40, |
| "score": 0.8958, |
| "total_reward": 18.8108, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.894 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 10, |
| "steps": 42, |
| "score": 0.8421, |
| "total_reward": 18.5263, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.928, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.3067, |
| 0.3022, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.871 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 11, |
| "steps": 42, |
| "score": 0.8689, |
| "total_reward": 19.1153, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.3467, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8901 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 12, |
| "steps": 40, |
| "score": 0.8977, |
| "total_reward": 18.8524, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.9095, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8938 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 13, |
| "steps": 40, |
| "score": 0.8958, |
| "total_reward": 18.8108, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.894 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 14, |
| "steps": 44, |
| "score": 0.8405, |
| "total_reward": 19.3311, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.932, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.3422, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.2933, |
| 0.8589, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8853 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 15, |
| "steps": 46, |
| "score": 0.8162, |
| "total_reward": 19.5883, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.93, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3556, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.3378, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.3156, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8544, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8739, |
| 0.8825 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 16, |
| "steps": 40, |
| "score": 0.8977, |
| "total_reward": 18.8523, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.9095, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8938 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 17, |
| "steps": 42, |
| "score": 0.867, |
| "total_reward": 19.0739, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.935, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.3467, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8903 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 18, |
| "steps": 40, |
| "score": 0.8958, |
| "total_reward": 18.8108, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.894 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 19, |
| "steps": 42, |
| "score": 0.8689, |
| "total_reward": 19.1153, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.3333, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8901 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 20, |
| "steps": 46, |
| "score": 0.7653, |
| "total_reward": 18.3663, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.909, |
| "adversarial_detections": 5, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3556, |
| 0.9211, |
| 0.9167, |
| 0.3422, |
| 0.9078, |
| 0.9033, |
| 0.3289, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.3067, |
| 0.8722, |
| 0.8678, |
| 0.2933, |
| 0.8589, |
| 0.8544, |
| 0.85, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8739, |
| 0.8423 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 21, |
| "steps": 40, |
| "score": 0.8958, |
| "total_reward": 18.8109, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.932, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.894 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 22, |
| "steps": 46, |
| "score": 0.7652, |
| "total_reward": 18.3659, |
| "completion_rate": 0.9, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.908, |
| "adversarial_detections": 5, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3556, |
| 0.9211, |
| 0.9167, |
| 0.3422, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.2978, |
| 0.8633, |
| 0.2889, |
| 0.8544, |
| 0.85, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8739, |
| 0.8419 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 23, |
| "steps": 42, |
| "score": 0.8403, |
| "total_reward": 18.4862, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.3556, |
| 0.3511, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8727 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 24, |
| "steps": 44, |
| "score": 0.8405, |
| "total_reward": 19.3311, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.933, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.3422, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.2933, |
| 0.8589, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8854 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 25, |
| "steps": 44, |
| "score": 0.8405, |
| "total_reward": 19.3314, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.934, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.3511, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.3244, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8857 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 26, |
| "steps": 42, |
| "score": 0.8403, |
| "total_reward": 18.4855, |
| "completion_rate": 0.95, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.3067, |
| 0.3022, |
| 0.8678, |
| 0.8633, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.872 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 27, |
| "steps": 46, |
| "score": 0.8179, |
| "total_reward": 19.6285, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.924, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.3467, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.3111, |
| 0.8767, |
| 0.8722, |
| 0.2978, |
| 0.8633, |
| 0.8589, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8739, |
| 0.881 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 28, |
| "steps": 40, |
| "score": 0.8977, |
| "total_reward": 18.8524, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.931, |
| "adversarial_detections": 7, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.8944, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.8722, |
| 0.9095, |
| 0.905, |
| 0.9006, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8938 |
| ] |
| }, |
| { |
| "policy": "oracle_lite", |
| "task_type": "task3", |
| "seed": 29, |
| "steps": 44, |
| "score": 0.8405, |
| "total_reward": 19.3311, |
| "completion_rate": 1.0, |
| "detection_rate": 1.0, |
| "trust_calibration": 0.933, |
| "adversarial_detections": 6, |
| "adversarial_poisonings": 0, |
| "status": "completed", |
| "difficulty_profile": { |
| "adaptive": false, |
| "episodes_seen": 0, |
| "rolling_detection_rate": 0.0, |
| "adversarial_threshold": 0.7, |
| "high_stakes_ratio": 0.35, |
| "verify_budget_penalty": 0, |
| "adversary_benign_confidence": 0.88, |
| "adversary_poison_confidence": 0.92 |
| }, |
| "rewards": [ |
| 0.9256, |
| 0.9211, |
| 0.9167, |
| 0.9122, |
| 0.9078, |
| 0.9033, |
| 0.8989, |
| 0.3244, |
| 0.89, |
| 0.8856, |
| 0.8811, |
| 0.8767, |
| 0.3022, |
| 0.8678, |
| 0.8633, |
| 0.8589, |
| 0.8961, |
| 0.8917, |
| 0.8873, |
| 0.8828, |
| 0.8784, |
| 0.8854 |
| ] |
| } |
| ] |
| } |
|
|