{ "task": "all", "tasks": [ "task1", "task2", "task3" ], "episodes_per_policy": 30, "adaptive": false, "difficulty_controller": { "adaptive": true, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "difficulty_controller_by_task_policy": { "task1": { "random": {}, "heuristic": {}, "oracle_lite": {} }, "task2": { "random": {}, "heuristic": {}, "oracle_lite": {} }, "task3": { "random": {}, "heuristic": {}, "oracle_lite": {} } }, "summary": { "random": { "episodes": 90, "avg_score": 0.6904, "avg_completion_rate": 0.8131, "avg_detection_rate": 0.7935, "avg_trust_calibration": 0.4453, "avg_steps": 26.2111 }, "heuristic": { "episodes": 90, "avg_score": 0.7817, "avg_completion_rate": 0.8918, "avg_detection_rate": 0.9178, "avg_trust_calibration": 0.4373, "avg_steps": 24.4 }, "oracle_lite": { "episodes": 90, "avg_score": 0.8405, "avg_completion_rate": 0.8687, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.5892, "avg_steps": 29.4444 } }, "by_task": { "task1": { "random": { "episodes": 30, "avg_score": 0.7635, "avg_completion_rate": 0.76, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.0, "avg_steps": 15.1333 }, "heuristic": { "episodes": 30, "avg_score": 0.8504, "avg_completion_rate": 0.84, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.0, "avg_steps": 13.8333 }, "oracle_lite": { "episodes": 30, "avg_score": 0.9011, "avg_completion_rate": 0.7167, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.0, "avg_steps": 16.0 } }, "task2": { "random": { "episodes": 30, "avg_score": 0.6472, "avg_completion_rate": 0.8644, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.5829, "avg_steps": 26.7667 }, "heuristic": { "episodes": 30, "avg_score": 0.7497, "avg_completion_rate": 0.9288, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.5737, "avg_steps": 23.2333 }, "oracle_lite": { "episodes": 30, "avg_score": 0.7638, "avg_completion_rate": 0.9045, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.8377, "avg_steps": 30.0 } }, "task3": { "random": { "episodes": 30, "avg_score": 0.6606, "avg_completion_rate": 0.815, "avg_detection_rate": 0.3806, "avg_trust_calibration": 0.7531, "avg_steps": 36.7333 }, "heuristic": { "episodes": 30, "avg_score": 0.7449, "avg_completion_rate": 0.9067, "avg_detection_rate": 0.7534, "avg_trust_calibration": 0.7383, "avg_steps": 36.1333 }, "oracle_lite": { "episodes": 30, "avg_score": 0.8567, "avg_completion_rate": 0.985, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.9299, "avg_steps": 42.3333 } } }, "episodes": [ { "policy": "random", "task_type": "task1", "seed": 0, "steps": 15, "score": 0.6569, "total_reward": 7.8825, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.914, 0.962, 0.962, 0.02, 0.962, 0.962, 0.962, 0.3645, 0.02, 0.02, 0.867 ] }, { "policy": "random", "task_type": "task1", "seed": 1, "steps": 15, "score": 0.7996, "total_reward": 7.196, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.982, 0.914, 0.914, 0.962, 0.867, 0.962, 0.3165 ] }, { "policy": "random", "task_type": "task1", "seed": 2, "steps": 15, "score": 0.8129, "total_reward": 8.1294, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.369, 0.3024, 0.962, 0.962, 0.914, 0.962, 0.867, 0.867, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 3, "steps": 14, "score": 0.8084, "total_reward": 10.5095, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.982, 0.962, 0.3455, 0.867, 0.962, 0.946, 0.962, 0.3455, 0.3645, 0.867, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 4, "steps": 15, "score": 0.7814, "total_reward": 8.5956, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.982, 0.962, 0.3645, 0.914, 0.962, 0.3455, 0.3136, 0.962, 0.914 ] }, { "policy": "random", "task_type": "task1", "seed": 5, "steps": 15, "score": 0.725, "total_reward": 8.7, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.914, 0.962, 0.3165, 0.3455, 0.3455, 0.867, 0.946, 0.962, 0.3455, 0.867 ] }, { "policy": "random", "task_type": "task1", "seed": 6, "steps": 15, "score": 0.8118, "total_reward": 8.1182, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.914, 0.914, 0.962, 0.962, 0.962, 0.962, 0.934, 0.867, 0.3206 ] }, { "policy": "random", "task_type": "task1", "seed": 7, "steps": 15, "score": 0.9334, "total_reward": 9.334, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.867, 0.914, 0.914, 0.962, 0.867, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 8, "steps": 15, "score": 0.8425, "total_reward": 9.2675, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.369, 0.962, 0.914, 0.962, 0.962, 0.3645, 0.962, 0.982, 0.914 ] }, { "policy": "random", "task_type": "task1", "seed": 9, "steps": 15, "score": 0.7751, "total_reward": 9.3011, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.962, 0.962, 0.867, 0.3616, 0.914, 0.3645, 0.02, 0.982, 0.962, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 10, "steps": 15, "score": 0.7653, "total_reward": 8.418, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.982, 0.914, 0.982, 0.962, 0.02, 0.3455, 0.3645, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 11, "steps": 15, "score": 0.8199, "total_reward": 9.8394, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.867, 0.962, 0.962, 0.3504, 0.914, 0.982, 0.962, 0.982, 0.02, 0.914, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 12, "steps": 15, "score": 0.6163, "total_reward": 7.3956, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.914, 0.3455, 0.962, 0.898, 0.962, 0.914, 0.02, 0.3616, 0.3455, 0.3455 ] }, { "policy": "random", "task_type": "task1", "seed": 13, "steps": 15, "score": 0.7283, "total_reward": 6.555, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.914, 0.867, 0.962, 0.934, 0.962, 0.914, 0.02 ] }, { "policy": "random", "task_type": "task1", "seed": 14, "steps": 17, "score": 0.8867, "total_reward": 10.6405, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.867, 0.914, 0.914, 0.3455, 0.962, 0.962, 0.962, 0.962, 0.962, 0.914 ] }, { "policy": "random", "task_type": "task1", "seed": 15, "steps": 16, "score": 0.6915, "total_reward": 9.6809, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3504, 0.962, 0.962, 0.867, 0.3645, 0.3645, 0.982, 0.3645, 0.867, 0.982, 0.3455, 0.3455, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 16, "steps": 15, "score": 0.7164, "total_reward": 9.313, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.2975, 0.02, 0.982, 0.02, 0.3455, 0.914, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 17, "steps": 15, "score": 0.6495, "total_reward": 8.4439, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.02, 0.3455, 0.3136, 0.867, 0.982, 0.962, 0.3206, 0.962, 0.962, 0.982, 0.3826 ] }, { "policy": "random", "task_type": "task1", "seed": 18, "steps": 15, "score": 0.8235, "total_reward": 9.8815, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.02, 0.962, 0.982, 0.3645, 0.962, 0.867, 0.962, 0.914, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 19, "steps": 15, "score": 0.7588, "total_reward": 8.347, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2975, 0.962, 0.962, 0.962, 0.962, 0.3455, 0.02, 0.93, 0.982, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 20, "steps": 15, "score": 0.6444, "total_reward": 7.7329, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3525, 0.867, 0.867, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.3504, 0.3686, 0.3645 ] }, { "policy": "random", "task_type": "task1", "seed": 21, "steps": 15, "score": 0.8756, "total_reward": 9.6315, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.914, 0.914, 0.962, 0.3455, 0.962, 0.962, 0.914, 0.867 ] }, { "policy": "random", "task_type": "task1", "seed": 22, "steps": 15, "score": 0.6437, "total_reward": 7.0809, "completion_rate": 0.5, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3504, 0.02, 0.93, 0.914, 0.3645, 0.3645, 0.962, 0.2975, 0.914, 0.982 ] }, { "policy": "random", "task_type": "task1", "seed": 23, "steps": 15, "score": 0.8985, "total_reward": 10.7824, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.867, 0.962, 0.3504, 0.982, 0.962, 0.962, 0.867, 0.962, 0.962, 0.982, 0.962 ] }, { "policy": "random", "task_type": "task1", "seed": 24, "steps": 16, "score": 0.6933, "total_reward": 7.6267, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2975, 0.982, 0.982, 0.3066, 0.934, 0.914, 0.962, 0.3686, 0.02, 0.93 ] }, { "policy": "random", "task_type": "task1", "seed": 25, "steps": 15, "score": 0.8266, "total_reward": 9.0928, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.914, 0.982, 0.962, 0.867, 0.962, 0.962, 0.867, 0.914, 0.962, 0.3504 ] }, { "policy": "random", "task_type": "task1", "seed": 26, "steps": 15, "score": 0.7833, "total_reward": 7.8326, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.914, 0.962, 0.982, 0.962, 0.962, 0.02, 0.3206, 0.914, 0.898 ] }, { "policy": "random", "task_type": "task1", "seed": 27, "steps": 16, "score": 0.8311, "total_reward": 9.1421, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.934, 0.3455, 0.3546, 0.962, 0.93, 0.982, 0.962, 0.93, 0.914, 0.914 ] }, { "policy": "random", "task_type": "task1", "seed": 28, "steps": 15, "score": 0.7196, "total_reward": 8.6356, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.914, 0.962, 0.962, 0.914, 0.962, 0.962, 0.962, 0.962, 0.3826, 0.02, 0.3165 ] }, { "policy": "random", "task_type": "task1", "seed": 29, "steps": 15, "score": 0.5851, "total_reward": 7.021, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.914, 0.3455, 0.962, 0.02, 0.914, 0.3165, 0.02, 0.867, 0.962, 0.369 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 0, "steps": 13, "score": 0.753, "total_reward": 10.5415, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.982, 0.982, 0.3645, 0.3645, 0.982, 0.3645, 0.3645, 0.962, 0.962, 0.3455, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 1, "steps": 12, "score": 0.7843, "total_reward": 10.196, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.982, 0.982, 0.3645, 0.3645, 0.982, 0.3645, 0.3645, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 2, "steps": 11, "score": 0.8612, "total_reward": 10.3345, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.369, 0.3455, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 3, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 4, "steps": 11, "score": 0.911, "total_reward": 10.9324, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 5, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.3525, 0.962, 0.3525, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 6, "steps": 10, "score": 0.962, "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 7, "steps": 16, "score": 0.8166, "total_reward": 9.7988, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3686, 0.962, 0.3826, 0.3896, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 8, "steps": 16, "score": 0.8399, "total_reward": 8.3989, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.3504, 0.962, 0.962, 0.962, 0.962, 0.3525, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 9, "steps": 16, "score": 0.785, "total_reward": 10.2052, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.3756, 0.962, 0.3896, 0.391, 0.391, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 10, "steps": 10, "score": 0.962, "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 11, "steps": 16, "score": 0.7843, "total_reward": 10.196, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.982, 0.982, 0.3645, 0.3645, 0.982, 0.3645, 0.3645, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 12, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 13, "steps": 16, "score": 0.9003, "total_reward": 9.0035, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.3455, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 14, "steps": 13, "score": 0.7534, "total_reward": 10.5473, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3686, 0.3756, 0.962, 0.3896, 0.391, 0.3645, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 15, "steps": 13, "score": 0.8312, "total_reward": 11.6374, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3546, 0.962, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 16, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 17, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 18, "steps": 10, "score": 0.962, "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 19, "steps": 12, "score": 0.8675, "total_reward": 11.2779, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3455, 0.962, 0.962, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 20, "steps": 16, "score": 0.7993, "total_reward": 8.7927, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3686, 0.3756, 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 21, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 22, "steps": 15, "score": 0.772, "total_reward": 12.3526, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.369, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.962, 0.962, 0.3504, 0.962, 0.3504, 0.962, 0.962, 0.3504, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 23, "steps": 11, "score": 0.8606, "total_reward": 10.3271, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.3616, 0.3455, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 24, "steps": 16, "score": 0.8161, "total_reward": 9.7931, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.3616, 0.982, 0.369, 0.982, 0.3645, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 25, "steps": 14, "score": 0.6506, "total_reward": 9.7585, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.982, 0.982, 0.3645, 0.3645, 0.982, 0.946, 0.369, 0.982, 0.3645, 0.3645, 0.982, 0.3645, 0.3645 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 26, "steps": 16, "score": 0.7054, "total_reward": 11.2865, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.982, 0.982, 0.982, 0.3645, 0.946, 0.982, 0.3645, 0.946, 0.982, 0.3645, 0.3645, 0.369, 0.369, 0.3645, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 27, "steps": 15, "score": 0.6937, "total_reward": 9.0177, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3686, 0.962, 0.962, 0.3896, 0.391, 0.391, 0.391, 0.962, 0.3525, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 28, "steps": 10, "score": 0.962, "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "heuristic", "task_type": "task1", "seed": 29, "steps": 11, "score": 0.911, "total_reward": 10.9324, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.3504, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 0, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 1, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 2, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3525, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 3, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 4, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.3525, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 5, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.3525, 0.962, 0.3525, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 6, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 7, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 8, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.3525, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 9, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 10, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 11, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 12, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 13, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 14, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 15, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3525, 0.962, 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 16, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 17, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 18, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 19, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.3525, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 20, "steps": 16, "score": 0.7588, "total_reward": 6.8295, "completion_rate": 0.5, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3525, 0.962, 0.962, 0.3525, 0.962, 0.962, 0.3525, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 21, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 22, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3525, 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 23, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3525, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 24, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 25, "steps": 16, "score": 0.7588, "total_reward": 6.8295, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962, 0.3525 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 26, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 27, "steps": 16, "score": 0.8943, "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.3525, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 28, "steps": 16, "score": 0.962, "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962 ] }, { "policy": "oracle_lite", "task_type": "task1", "seed": 29, "steps": 16, "score": 0.8266, "total_reward": 7.439, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.962, 0.3525 ] }, { "policy": "random", "task_type": "task2", "seed": 0, "steps": 25, "score": 0.5298, "total_reward": 9.5365, "completion_rate": 0.667, "detection_rate": 1.0, "trust_calibration": 0.461, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9083, 0.925, 0.9167, 0.02, 0.8917, 0.8833, 0.8667, 0.2233, 0.02, 0.02, 0.725, 0.8083, 0.6917, 0.775, 0.1317, 0.115, 0.5949 ] }, { "policy": "random", "task_type": "task2", "seed": 1, "steps": 31, "score": 0.7252, "total_reward": 13.054, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.569, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9833, 0.8833, 0.8583, 0.875, 0.7583, 0.8417, 0.165, 0.7833, 0.8, 0.6833, 0.735, 0.1217, 0.7167, 0.7, 0.7083, 0.849 ] }, { "policy": "random", "task_type": "task2", "seed": 2, "steps": 27, "score": 0.6551, "total_reward": 11.7913, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.579, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3517, 0.262, 0.9167, 0.9083, 0.8583, 0.875, 0.7583, 0.7417, 0.825, 0.7083, 0.8, 0.6833, 0.6667, 0.75, 0.0967, 0.0983, 0.766 ] }, { "policy": "random", "task_type": "task2", "seed": 3, "steps": 28, "score": 0.6154, "total_reward": 12.9233, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.586, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9833, 0.925, 0.2717, 0.8, 0.8917, 0.9333, 0.875, 0.2217, 0.2233, 0.7417, 0.8333, 0.1483, 0.7917, 0.1383, 0.7333, 0.75, 0.7083, 0.123, 0.7686 ] }, { "policy": "random", "task_type": "task2", "seed": 4, "steps": 28, "score": 0.6575, "total_reward": 12.4928, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.458, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9833, 0.925, 0.2817, 0.8667, 0.8833, 0.23, 0.2113, 0.8417, 0.8, 0.775, 0.7917, 0.775, 0.7583, 0.75, 0.1067, 0.1313, 0.7235 ] }, { "policy": "random", "task_type": "task2", "seed": 5, "steps": 26, "score": 0.6206, "total_reward": 12.4128, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.408, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9, 0.9167, 0.24, 0.2467, 0.2383, 0.7667, 0.9083, 0.85, 0.1967, 0.725, 0.8667, 0.165, 0.7917, 0.7833, 0.7667, 0.02, 0.65, 0.7061 ] }, { "policy": "random", "task_type": "task2", "seed": 6, "steps": 31, "score": 0.7065, "total_reward": 12.7163, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.576, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9083, 0.8833, 0.9, 0.8917, 0.8833, 0.875, 0.8833, 0.7417, 0.188, 0.8667, 0.13, 0.7583, 0.7667, 0.7583, 0.7083, 0.7, 0.8083 ] }, { "policy": "random", "task_type": "task2", "seed": 7, "steps": 25, "score": 0.7366, "total_reward": 12.5218, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.776, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8333, 0.8917, 0.875, 0.8917, 0.775, 0.8583, 0.85, 0.8333, 0.825, 0.1817, 0.8, 0.6833, 0.825, 0.02, 0.7583, 0.8784 ] }, { "policy": "random", "task_type": "task2", "seed": 8, "steps": 25, "score": 0.7329, "total_reward": 13.9253, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.425, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.335, 0.9167, 0.8667, 0.8833, 0.875, 0.2767, 0.8583, 0.9, 0.8, 0.8167, 0.7, 0.1233, 0.775, 0.7667, 0.8083, 0.75, 0.7987 ] }, { "policy": "random", "task_type": "task2", "seed": 9, "steps": 27, "score": 0.7062, "total_reward": 12.712, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.597, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9333, 0.925, 0.8083, 0.278, 0.8583, 0.925, 0.02, 0.2233, 0.8417, 0.825, 0.8167, 0.735, 0.6833, 0.7333, 0.65, 0.8157 ] }, { "policy": "random", "task_type": "task2", "seed": 10, "steps": 26, "score": 0.5723, "total_reward": 12.0174, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.855, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.925, 0.9667, 0.8667, 0.2483, 0.8667, 0.02, 0.205, 0.8917, 0.825, 0.8167, 0.775, 0.1567, 0.02, 0.153, 0.7667, 0.8083, 0.105, 0.1067, 0.8194 ] }, { "policy": "random", "task_type": "task2", "seed": 11, "steps": 23, "score": 0.7197, "total_reward": 12.955, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.57, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8333, 0.925, 0.9167, 0.2703, 0.8667, 0.2483, 0.8667, 0.9083, 0.02, 0.8083, 0.825, 0.8167, 0.7683, 0.7917, 0.7833, 0.7417, 0.8063 ] }, { "policy": "random", "task_type": "task2", "seed": 12, "steps": 30, "score": 0.6047, "total_reward": 12.0935, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.344, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9, 0.2717, 0.9083, 0.8267, 0.8833, 0.8333, 0.02, 0.2197, 0.1883, 0.18, 0.7833, 0.7917, 0.7417, 0.7583, 0.65, 0.7, 0.0633, 0.6839 ] }, { "policy": "random", "task_type": "task2", "seed": 13, "steps": 30, "score": 0.6649, "total_reward": 11.9681, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.271, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.8917, 0.8, 0.8833, 0.2067, 0.8583, 0.8083, 0.02, 0.7833, 0.6917, 0.75, 0.6583, 0.75, 0.7333, 0.09, 0.7, 0.7014 ] }, { "policy": "random", "task_type": "task2", "seed": 14, "steps": 26, "score": 0.7146, "total_reward": 13.5771, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.416, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.825, 0.8833, 0.8667, 0.2383, 0.875, 0.8667, 0.8583, 0.85, 0.8333, 0.7833, 0.8, 0.1797, 0.7833, 0.775, 0.1217, 0.65, 0.7957 ] }, { "policy": "random", "task_type": "task2", "seed": 15, "steps": 27, "score": 0.5573, "total_reward": 12.2603, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.606, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3037, 0.9333, 0.925, 0.8083, 0.265, 0.2567, 0.8433, 0.925, 0.7583, 0.215, 0.1967, 0.1883, 0.8167, 0.8, 0.1567, 0.675, 0.7583, 0.75, 0.7417, 0.0883, 0.732 ] }, { "policy": "random", "task_type": "task2", "seed": 16, "steps": 23, "score": 0.5543, "total_reward": 11.0864, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.437, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.2217, 0.02, 0.925, 0.02, 0.2133, 0.8167, 0.825, 0.1817, 0.02, 0.8, 0.7583, 0.775, 0.1317, 0.6298 ] }, { "policy": "random", "task_type": "task2", "seed": 17, "steps": 27, "score": 0.5694, "total_reward": 11.9565, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.688, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.02, 0.28, 0.2613, 0.7917, 0.2483, 0.875, 0.2213, 0.85, 0.8417, 0.7933, 0.233, 0.8667, 0.7, 0.7833, 0.7667, 0.7583, 0.168, 0.0703, 0.7609 ] }, { "policy": "random", "task_type": "task2", "seed": 18, "steps": 23, "score": 0.6662, "total_reward": 11.3256, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.602, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.925, 0.02, 0.9, 0.2567, 0.2483, 0.875, 0.7583, 0.85, 0.8083, 0.825, 0.8083, 0.85, 0.02, 0.75, 0.7306 ] }, { "policy": "random", "task_type": "task2", "seed": 19, "steps": 27, "score": 0.7003, "total_reward": 12.6055, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.625, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2633, 0.9167, 0.9083, 0.8917, 0.8833, 0.23, 0.02, 0.825, 0.8917, 0.825, 0.7083, 0.8, 0.6833, 0.7417, 0.7583, 0.7083, 0.8255 ] }, { "policy": "random", "task_type": "task2", "seed": 20, "steps": 32, "score": 0.5557, "total_reward": 12.7811, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.636, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2983, 0.8167, 0.8, 0.8917, 0.8833, 0.237, 0.8667, 0.85, 0.2037, 0.2213, 0.19, 0.8083, 0.8, 0.1997, 0.7833, 0.775, 0.6583, 0.0787, 0.725, 0.1347, 0.7083, 0.7861 ] }, { "policy": "random", "task_type": "task2", "seed": 21, "steps": 23, "score": 0.7485, "total_reward": 12.7247, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.523, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.8917, 0.875, 0.8917, 0.2383, 0.8667, 0.8583, 0.8167, 0.725, 0.8083, 0.8, 0.02, 0.7833, 0.7667, 0.7897 ] }, { "policy": "random", "task_type": "task2", "seed": 22, "steps": 31, "score": 0.4891, "total_reward": 10.2706, "completion_rate": 0.667, "detection_rate": 1.0, "trust_calibration": 0.544, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3037, 0.02, 0.8917, 0.8667, 0.2483, 0.24, 0.8667, 0.18, 0.8083, 0.875, 0.1817, 0.165, 0.7583, 0.13, 0.1217, 0.75, 0.0983, 0.725, 0.7167, 0.6236 ] }, { "policy": "random", "task_type": "task2", "seed": 23, "steps": 20, "score": 0.7708, "total_reward": 13.1031, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.755, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8333, 0.925, 0.2787, 0.9583, 0.9, 0.8917, 0.775, 0.8667, 0.85, 0.8917, 0.825, 0.8167, 0.8083, 0.8, 0.7917, 0.8711 ] }, { "policy": "random", "task_type": "task2", "seed": 24, "steps": 31, "score": 0.5498, "total_reward": 9.8972, "completion_rate": 0.667, "detection_rate": 1.0, "trust_calibration": 0.809, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2633, 0.975, 0.9667, 0.243, 0.9083, 0.8333, 0.85, 0.2297, 0.02, 0.7917, 0.02, 0.1647, 0.7417, 0.09, 0.7333, 0.7, 0.7165 ] }, { "policy": "random", "task_type": "task2", "seed": 25, "steps": 23, "score": 0.791, "total_reward": 13.4466, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.623, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9, 0.9667, 0.9083, 0.7917, 0.8833, 0.875, 0.7583, 0.8167, 0.8333, 0.187, 0.8167, 0.8083, 0.6917, 0.8333, 0.775, 0.8679 ] }, { "policy": "random", "task_type": "task2", "seed": 26, "steps": 29, "score": 0.5823, "total_reward": 11.646, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.895, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9, 0.9167, 0.9583, 0.9, 0.8917, 0.02, 0.2297, 0.8167, 0.76, 0.8083, 0.7917, 0.1813, 0.13, 0.0953, 0.75, 0.7917, 0.1413, 0.08, 0.8333 ] }, { "policy": "random", "task_type": "task2", "seed": 27, "steps": 29, "score": 0.6402, "total_reward": 12.1644, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.703, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9583, 0.28, 0.2847, 0.9083, 0.8667, 0.2483, 0.8667, 0.825, 0.8083, 0.7917, 0.825, 0.02, 0.75, 0.8167, 0.65, 0.0703, 0.725, 0.8094 ] }, { "policy": "random", "task_type": "task2", "seed": 28, "steps": 23, "score": 0.7219, "total_reward": 12.9944, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.671, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9083, 0.9167, 0.9083, 0.8667, 0.8833, 0.875, 0.8667, 0.8583, 0.258, 0.02, 0.165, 0.8167, 0.8, 0.8417, 0.7833, 0.6667, 0.8414 ] }, { "policy": "random", "task_type": "task2", "seed": 29, "steps": 27, "score": 0.5586, "total_reward": 10.614, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.478, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9, 0.2717, 0.9083, 0.02, 0.8583, 0.8917, 0.02, 0.7417, 0.8333, 0.19, 0.1817, 0.7667, 0.7833, 0.7417, 0.1133, 0.0817, 0.644 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 0, "steps": 31, "score": 0.6145, "total_reward": 12.2902, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.72, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9833, 0.975, 0.2817, 0.2733, 0.265, 0.2467, 0.875, 0.8583, 0.8417, 0.19, 0.8083, 0.7917, 0.775, 0.7583, 0.7417, 0.09, 0.0733, 0.7719 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 1, "steps": 17, "score": 0.768, "total_reward": 13.8236, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.282, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9833, 0.975, 0.2817, 0.2733, 0.265, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.8167, 0.7053 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 2, "steps": 17, "score": 0.7237, "total_reward": 13.0266, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.284, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3517, 0.2883, 0.925, 0.9167, 0.9083, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.1787, 0.6626 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 3, "steps": 30, "score": 0.7823, "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.1317, 0.75, 0.7333, 0.7167, 0.9021 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 4, "steps": 17, "score": 0.7999, "total_reward": 14.3981, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.426, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.9, 0.2537, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.1787, 0.7991 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 5, "steps": 30, "score": 0.6545, "total_reward": 10.4723, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.2483, 0.8667, 0.215, 0.8333, 0.8167, 0.165, 0.7833, 0.1317, 0.75, 0.7333, 0.7167, 0.7623 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 6, "steps": 15, "score": 0.8749, "total_reward": 13.9981, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.7481 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 7, "steps": 28, "score": 0.7465, "total_reward": 13.4373, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.833, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.313, 0.9167, 0.3163, 0.318, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.8983 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 8, "steps": 17, "score": 0.7992, "total_reward": 14.3856, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.39, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.2953, 0.925, 0.9167, 0.9083, 0.9, 0.2537, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.8167, 0.7866 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 9, "steps": 28, "score": 0.7248, "total_reward": 13.7712, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.3147, 0.9083, 0.318, 0.3117, 0.3033, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.8986 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 10, "steps": 16, "score": 0.797, "total_reward": 13.5485, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.432, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.212, 0.2037, 0.8333, 0.825, 0.7578 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 11, "steps": 30, "score": 0.6763, "total_reward": 12.8505, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.825, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9833, 0.975, 0.2817, 0.2733, 0.265, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.0817, 0.8522 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 12, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 13, "steps": 30, "score": 0.7935, "total_reward": 13.4903, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.839, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.2883, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9436 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 14, "steps": 18, "score": 0.7309, "total_reward": 13.8869, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.264, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.313, 0.3147, 0.2733, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.187, 0.8167, 0.8083, 0.6989 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 15, "steps": 18, "score": 0.7649, "total_reward": 14.5326, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.33, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3097, 0.9333, 0.925, 0.9167, 0.2703, 0.9, 0.8917, 0.8833, 0.875, 0.2287, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.8167, 0.8083, 0.7656 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 16, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 17, "steps": 30, "score": 0.7823, "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.265, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.902 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 18, "steps": 15, "score": 0.8749, "total_reward": 13.998, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.748 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 19, "steps": 17, "score": 0.7966, "total_reward": 14.3395, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.279, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2967, 0.9333, 0.925, 0.9167, 0.9083, 0.262, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.8167, 0.7475 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 20, "steps": 30, "score": 0.6558, "total_reward": 11.8048, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.82, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.313, 0.3147, 0.9, 0.8833, 0.2317, 0.85, 0.8333, 0.8167, 0.8, 0.1483, 0.7667, 0.75, 0.0983, 0.7167, 0.8071 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 21, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 22, "steps": 22, "score": 0.6604, "total_reward": 15.1886, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.471, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3517, 0.9333, 0.925, 0.2787, 0.9083, 0.9, 0.8917, 0.8833, 0.237, 0.8667, 0.2203, 0.85, 0.8417, 0.1953, 0.825, 0.1787, 0.8083, 0.8, 0.1537, 0.7833, 0.775, 0.8149 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 23, "steps": 16, "score": 0.793, "total_reward": 13.4804, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.212, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.3113, 0.28, 0.9167, 0.9083, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.825, 0.6808 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 24, "steps": 30, "score": 0.6768, "total_reward": 12.8598, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.824, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.3113, 0.975, 0.3267, 0.9583, 0.265, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.115, 0.7333, 0.7167, 0.8518 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 25, "steps": 21, "score": 0.5985, "total_reward": 13.1666, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.709, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9833, 0.975, 0.2817, 0.2733, 0.265, 0.8917, 0.2383, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.1883, 0.18, 0.8167, 0.1633, 0.155, 0.1467, 0.7833, 0.7683 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 26, "steps": 22, "score": 0.5962, "total_reward": 13.1159, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.324, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.99, 0.9833, 0.975, 0.2817, 0.8683, 0.265, 0.2567, 0.9333, 0.925, 0.2317, 0.2233, 0.26, 0.1983, 0.825, 0.8167, 0.8083, 0.162, 0.7917, 0.1453, 0.775, 0.6336 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 27, "steps": 30, "score": 0.6573, "total_reward": 13.146, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.829, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.313, 0.9167, 0.9083, 0.318, 0.3117, 0.3033, 0.8667, 0.85, 0.1983, 0.8167, 0.8, 0.1483, 0.7667, 0.75, 0.7333, 0.7167, 0.8533 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 28, "steps": 15, "score": 0.8749, "total_reward": 13.9979, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.9, 0.8917, 0.8833, 0.875, 0.8667, 0.8583, 0.85, 0.8417, 0.8333, 0.7479 ] }, { "policy": "heuristic", "task_type": "task2", "seed": 29, "steps": 17, "score": 0.7998, "total_reward": 14.3965, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.421, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9417, 0.9333, 0.925, 0.9167, 0.9083, 0.9, 0.8917, 0.2453, 0.875, 0.8667, 0.8583, 0.85, 0.2037, 0.8333, 0.825, 0.8167, 0.7975 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 0, "steps": 30, "score": 0.7823, "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.1317, 0.75, 0.7333, 0.7167, 0.9021 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 1, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 2, "steps": 30, "score": 0.7399, "total_reward": 11.8385, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2983, 0.2817, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.8585 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 3, "steps": 30, "score": 0.7823, "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.1317, 0.75, 0.7333, 0.7167, 0.9021 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 4, "steps": 30, "score": 0.7823, "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.1983, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9021 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 5, "steps": 30, "score": 0.6545, "total_reward": 10.4723, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.2483, 0.8667, 0.215, 0.8333, 0.8167, 0.165, 0.7833, 0.1317, 0.75, 0.7333, 0.7167, 0.7623 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 6, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 7, "steps": 30, "score": 0.8246, "total_reward": 13.1942, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9442 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 8, "steps": 30, "score": 0.7399, "total_reward": 11.8383, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.2817, 0.9, 0.8833, 0.8667, 0.85, 0.1983, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.8583 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 9, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 10, "steps": 30, "score": 0.7398, "total_reward": 11.8373, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.115, 0.0983, 0.7167, 0.8573 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 11, "steps": 30, "score": 0.7823, "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.265, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.902 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 12, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 13, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 14, "steps": 30, "score": 0.7398, "total_reward": 11.8376, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.841, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.2483, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.8576 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 15, "steps": 30, "score": 0.6973, "total_reward": 11.1569, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2983, 0.9167, 0.9, 0.8833, 0.2317, 0.85, 0.8333, 0.8167, 0.8, 0.1483, 0.7667, 0.75, 0.7333, 0.7167, 0.8119 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 16, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 17, "steps": 30, "score": 0.7823, "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.265, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.902 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 18, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 19, "steps": 30, "score": 0.7823, "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.215, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.902 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 20, "steps": 30, "score": 0.6117, "total_reward": 9.7864, "completion_rate": 0.667, "detection_rate": 1.0, "trust_calibration": 0.795, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2983, 0.9167, 0.9, 0.2483, 0.8667, 0.85, 0.1983, 0.8167, 0.8, 0.7833, 0.7667, 0.115, 0.7333, 0.7167, 0.7114 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 21, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 22, "steps": 30, "score": 0.6545, "total_reward": 10.4728, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.817, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2983, 0.9167, 0.9, 0.2483, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.1317, 0.75, 0.7333, 0.0817, 0.7628 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 23, "steps": 30, "score": 0.7399, "total_reward": 11.8385, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.2983, 0.2817, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.8585 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 24, "steps": 30, "score": 0.7398, "total_reward": 11.8376, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.841, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.2483, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.8576 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 25, "steps": 30, "score": 0.7399, "total_reward": 11.838, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.2817, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.1817, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.858 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 26, "steps": 30, "score": 0.7398, "total_reward": 11.8373, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.115, 0.0983, 0.7167, 0.8573 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 27, "steps": 30, "score": 0.6972, "total_reward": 11.156, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.831, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.265, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.1317, 0.75, 0.7333, 0.0817, 0.811 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 28, "steps": 30, "score": 0.8246, "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.8167, 0.8, 0.7833, 0.7667, 0.75, 0.7333, 0.7167, 0.9441 ] }, { "policy": "oracle_lite", "task_type": "task2", "seed": 29, "steps": 30, "score": 0.7398, "total_reward": 11.8373, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9333, 0.9167, 0.9, 0.8833, 0.8667, 0.85, 0.8333, 0.1817, 0.8, 0.7833, 0.7667, 0.75, 0.0983, 0.7167, 0.8573 ] }, { "policy": "random", "task_type": "task3", "seed": 0, "steps": 36, "score": 0.6105, "total_reward": 15.2622, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.884, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9233, 0.9211, 0.02, 0.9144, 0.9122, 0.9078, 0.3556, 0.02, 0.02, 0.8467, 0.8922, 0.8378, 0.8833, 0.9061, 0.3067, 0.02, 0.8722, 0.073, 0.8306, 0.9061, 0.3397, 0.3044, 0.5035 ] }, { "policy": "random", "task_type": "task3", "seed": 1, "steps": 40, "score": 0.7205, "total_reward": 18.0135, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.662, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.8889, 0.8822, 0.91, 0.8556, 0.9011, 0.3167, 0.8622, 0.89, 0.8356, 0.8633, 0.3011, 0.8444, 0.8378, 0.8656, 0.8261, 0.8589, 0.8961, 0.02, 0.3, 0.8106, 0.2933, 0.7916 ] }, { "policy": "random", "task_type": "task3", "seed": 2, "steps": 37, "score": 0.7627, "total_reward": 17.5411, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.752, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3778, 0.3203, 0.9211, 0.9189, 0.8822, 0.91, 0.8556, 0.8511, 0.8967, 0.8422, 0.89, 0.8356, 0.8311, 0.8767, 0.2944, 0.8972, 0.87, 0.8656, 0.8789, 0.02, 0.8722, 0.8207 ] }, { "policy": "random", "task_type": "task3", "seed": 3, "steps": 36, "score": 0.6303, "total_reward": 16.3887, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.798, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9233, 0.3411, 0.8667, 0.9144, 0.9372, 0.91, 0.3278, 0.3556, 0.8511, 0.8989, 0.2922, 0.8878, 0.3056, 0.8489, 0.8767, 0.8922, 0.333, 0.2878, 0.8283, 0.8589, 0.3297, 0.3552, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 4, "steps": 41, "score": 0.6894, "total_reward": 18.6138, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.405, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.9233, 0.3711, 0.8844, 0.9122, 0.33, 0.3186, 0.9011, 0.8667, 0.86, 0.8878, 0.8833, 0.8789, 0.8767, 0.8994, 0.3352, 0.8678, 0.3033, 0.8239, 0.8744, 0.8678, 0.2656, 0.2933, 0.2911, 0.7076 ] }, { "policy": "random", "task_type": "task3", "seed": 5, "steps": 31, "score": 0.6062, "total_reward": 15.1538, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.9211, 0.3367, 0.3344, 0.3322, 0.8578, 0.9306, 0.9033, 0.3211, 0.8467, 0.9194, 0.32, 0.8878, 0.8856, 0.8811, 0.02, 0.8394, 0.0752, 0.87, 0.8678, 0.02, 0.8883, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 6, "steps": 39, "score": 0.6337, "total_reward": 15.8429, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.872, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.8889, 0.9167, 0.9144, 0.9122, 0.91, 0.9006, 0.8511, 0.3197, 0.9194, 0.28, 0.8556, 0.8811, 0.8789, 0.8422, 0.8856, 0.3311, 0.8589, 0.0597, 0.3222, 0.27, 0.8728, 0.02, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 7, "steps": 32, "score": 0.7179, "total_reward": 15.793, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.869, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.8911, 0.8867, 0.9144, 0.86, 0.9056, 0.9033, 0.8989, 0.8967, 0.3444, 0.89, 0.8356, 0.9083, 0.02, 0.8789, 0.8744, 0.87, 0.8928, 0.8633, 0.3111, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 8, "steps": 31, "score": 0.7087, "total_reward": 16.3004, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.701, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3733, 0.9211, 0.8844, 0.9122, 0.91, 0.3578, 0.9056, 0.9283, 0.8667, 0.8944, 0.84, 0.3456, 0.8833, 0.8811, 0.8589, 0.8767, 0.0774, 0.835, 0.8856, 0.8633, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 9, "steps": 37, "score": 0.7151, "total_reward": 17.877, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.558, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9256, 0.9233, 0.8689, 0.3597, 0.8822, 0.935, 0.02, 0.3556, 0.9011, 0.8967, 0.8944, 0.84, 0.8356, 0.8489, 0.8244, 0.835, 0.3178, 0.8656, 0.8261, 0.8217, 0.3044, 0.85, 0.7724 ] }, { "policy": "random", "task_type": "task3", "seed": 10, "steps": 38, "score": 0.6037, "total_reward": 17.5072, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.772, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.9461, 0.8844, 0.3622, 0.9078, 0.02, 0.3233, 0.9261, 0.8967, 0.8944, 0.86, 0.3378, 0.02, 0.3263, 0.8811, 0.3289, 0.2967, 0.8994, 0.8722, 0.8678, 0.3386, 0.3463, 0.02, 0.3089, 0.8544, 0.355, 0.7709 ] }, { "policy": "random", "task_type": "task3", "seed": 11, "steps": 32, "score": 0.5881, "total_reward": 14.703, "completion_rate": 0.7, "detection_rate": 0.3333, "trust_calibration": 0.743, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.9211, 0.3459, 0.8844, 0.3622, 0.9078, 0.9306, 0.02, 0.8689, 0.8967, 0.8944, 0.8722, 0.8878, 0.8856, 0.9228, 0.0819, 0.2967, 0.3244, 0.8722, 0.02, 0.3356, 0.3011, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 12, "steps": 36, "score": 0.6249, "total_reward": 14.9974, "completion_rate": 0.75, "detection_rate": 0.3333, "trust_calibration": 0.716, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.8933, 0.3411, 0.9189, 0.8644, 0.9122, 0.8756, 0.02, 0.3441, 0.3189, 0.3167, 0.8622, 0.8878, 0.8511, 0.8789, 0.8244, 0.8878, 0.2856, 0.0663, 0.8589, 0.8961, 0.8772, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 13, "steps": 38, "score": 0.7872, "total_reward": 18.1053, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.64, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8911, 0.8667, 0.9122, 0.3278, 0.9056, 0.8689, 0.02, 0.8622, 0.8378, 0.8533, 0.8289, 0.8767, 0.8722, 0.895, 0.8856, 0.8633, 0.8611, 0.9017, 0.8939, 0.3, 0.8081 ] }, { "policy": "random", "task_type": "task3", "seed": 14, "steps": 35, "score": 0.6988, "total_reward": 18.1679, "completion_rate": 0.95, "detection_rate": 0.0, "trust_calibration": 0.661, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8733, 0.8889, 0.8844, 0.3322, 0.91, 0.9078, 0.9056, 0.9033, 0.8989, 0.8622, 0.89, 0.3408, 0.8856, 0.8833, 0.3011, 0.8267, 0.8372, 0.87, 0.0708, 0.8833, 0.2811, 0.8839, 0.8544, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 15, "steps": 38, "score": 0.68, "total_reward": 19.0388, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.774, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.9256, 0.9233, 0.8689, 0.3667, 0.3644, 0.8922, 0.935, 0.8556, 0.3533, 0.3211, 0.3189, 0.8944, 0.89, 0.3378, 0.8333, 0.8789, 0.8767, 0.8744, 0.2922, 0.333, 0.8656, 0.8261, 0.8567, 0.3, 0.8478, 0.8066 ] }, { "policy": "random", "task_type": "task3", "seed": 16, "steps": 33, "score": 0.5966, "total_reward": 15.511, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.691, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.3044, 0.02, 0.935, 0.02, 0.3256, 0.8711, 0.8967, 0.3444, 0.02, 0.89, 0.8556, 0.8833, 0.3311, 0.3289, 0.8744, 0.8878, 0.8633, 0.3141, 0.8589, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 17, "steps": 35, "score": 0.5891, "total_reward": 16.4939, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.795, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.02, 0.3433, 0.3319, 0.8644, 0.3622, 0.91, 0.3286, 0.9033, 0.9011, 0.8789, 0.3697, 0.9194, 0.84, 0.8856, 0.8811, 0.8789, 0.3597, 0.2692, 0.87, 0.2878, 0.8656, 0.0663, 0.8239, 0.8817, 0.02, 0.4835 ] }, { "policy": "random", "task_type": "task3", "seed": 18, "steps": 35, "score": 0.6548, "total_reward": 16.3705, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.573, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.02, 0.9167, 0.3644, 0.3622, 0.91, 0.8556, 0.9033, 0.8689, 0.8967, 0.8922, 0.915, 0.02, 0.8533, 0.8789, 0.2967, 0.3422, 0.3078, 0.8656, 0.8611, 0.2789, 0.3297, 0.7281 ] }, { "policy": "random", "task_type": "task3", "seed": 19, "steps": 38, "score": 0.6912, "total_reward": 17.2799, "completion_rate": 0.9, "detection_rate": 0.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3156, 0.9211, 0.9189, 0.9144, 0.9122, 0.33, 0.02, 0.9133, 0.9261, 0.8967, 0.8422, 0.89, 0.8356, 0.8511, 0.8789, 0.8922, 0.87, 0.3178, 0.8811, 0.8589, 0.8544, 0.87, 0.3108, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 20, "steps": 44, "score": 0.6149, "total_reward": 19.0606, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.859, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.8711, 0.8667, 0.9144, 0.9122, 0.337, 0.9078, 0.9033, 0.3281, 0.3519, 0.3467, 0.8922, 0.89, 0.3608, 0.8856, 0.8833, 0.8289, 0.2714, 0.87, 0.3508, 0.8656, 0.3089, 0.0597, 0.8172, 0.053, 0.8728, 0.8083, 0.3439, 0.2567, 0.5146 ] }, { "policy": "random", "task_type": "task3", "seed": 21, "steps": 30, "score": 0.7401, "total_reward": 15.543, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.709, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.8911, 0.8867, 0.9144, 0.3322, 0.9078, 0.9056, 0.8711, 0.8467, 0.8922, 0.89, 0.02, 0.8856, 0.8811, 0.8589, 0.8744, 0.87, 0.8856, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 22, "steps": 45, "score": 0.5529, "total_reward": 16.5871, "completion_rate": 0.7, "detection_rate": 0.0, "trust_calibration": 0.709, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.02, 0.9311, 0.8844, 0.3622, 0.36, 0.9078, 0.2933, 0.8689, 0.9217, 0.3444, 0.32, 0.8556, 0.3033, 0.3011, 0.8767, 0.3022, 0.87, 0.8678, 0.8811, 0.0619, 0.8544, 0.3022, 0.8478, 0.8083, 0.3119, 0.0397, 0.8322, 0.4222 ] }, { "policy": "random", "task_type": "task3", "seed": 23, "steps": 31, "score": 0.7921, "total_reward": 18.2193, "completion_rate": 0.95, "detection_rate": 0.5, "trust_calibration": 0.847, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.3481, 0.9439, 0.9167, 0.9144, 0.86, 0.9078, 0.9033, 0.9261, 0.8967, 0.8944, 0.8922, 0.89, 0.8878, 0.02, 0.9228, 0.0819, 0.8744, 0.87, 0.8833, 0.7254 ] }, { "policy": "random", "task_type": "task3", "seed": 24, "steps": 45, "score": 0.6293, "total_reward": 16.3622, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3156, 0.9483, 0.9461, 0.3197, 0.9072, 0.8756, 0.9033, 0.3541, 0.02, 0.9044, 0.02, 0.3608, 0.8511, 0.2967, 0.8722, 0.8356, 0.8111, 0.2867, 0.8544, 0.845, 0.3156, 0.8061, 0.8367, 0.7972, 0.7658 ] }, { "policy": "random", "task_type": "task3", "seed": 25, "steps": 34, "score": 0.6745, "total_reward": 16.8613, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.809, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8933, 0.9461, 0.9189, 0.8644, 0.9122, 0.91, 0.8556, 0.8711, 0.8989, 0.3237, 0.8944, 0.8922, 0.8378, 0.3356, 0.8833, 0.8489, 0.0797, 0.3244, 0.3222, 0.8878, 0.8906, 0.9061, 0.2967, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 26, "steps": 43, "score": 0.5813, "total_reward": 17.4397, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.815, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8933, 0.9211, 0.9439, 0.9167, 0.9144, 0.02, 0.3308, 0.8711, 0.8467, 0.8922, 0.8878, 0.3486, 0.3033, 0.2759, 0.8767, 0.3244, 0.3452, 0.29, 0.8156, 0.8633, 0.2889, 0.0597, 0.8544, 0.3372, 0.8478, 0.2956, 0.2811, 0.2889, 0.4707 ] }, { "policy": "random", "task_type": "task3", "seed": 27, "steps": 40, "score": 0.5674, "total_reward": 15.3205, "completion_rate": 0.7, "detection_rate": 0.25, "trust_calibration": 0.816, "adversarial_detections": 1, "adversarial_poisonings": 3, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9206, 0.3433, 0.3541, 0.9189, 0.9244, 0.3622, 0.9078, 0.9133, 0.8689, 0.8644, 0.885, 0.02, 0.8533, 0.9061, 0.8267, 0.9139, 0.073, 0.3356, 0.3441, 0.0619, 0.8994, 0.2722, 0.323, 0.3308, 0.8433, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 28, "steps": 32, "score": 0.6685, "total_reward": 16.0443, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.793, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9211, 0.9189, 0.8844, 0.9122, 0.91, 0.9078, 0.9056, 0.3763, 0.02, 0.3167, 0.8944, 0.89, 0.9128, 0.8856, 0.8461, 0.8589, 0.3267, 0.8372, 0.9128, 0.2933, 0.3461, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 29, "steps": 40, "score": 0.6868, "total_reward": 17.8577, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.907, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.3411, 0.9189, 0.02, 0.8822, 0.9028, 0.02, 0.8511, 0.8989, 0.3467, 0.3444, 0.8578, 0.8856, 0.8511, 0.2989, 0.9094, 0.87, 0.8856, 0.3641, 0.8567, 0.3572, 0.8895, 0.8083, 0.8353 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 0, "steps": 43, "score": 0.7114, "total_reward": 18.4969, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.729, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.3344, 0.91, 0.9056, 0.9011, 0.3267, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.3, 0.2956, 0.8611, 0.2867, 0.8522, 0.8478, 0.8433, 0.8389, 0.7841 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 1, "steps": 29, "score": 0.7083, "total_reward": 17.707, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.721, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6632 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 2, "steps": 29, "score": 0.6919, "total_reward": 17.2983, "completion_rate": 0.85, "detection_rate": 0.4444, "trust_calibration": 0.561, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3778, 0.3456, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6065 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 3, "steps": 42, "score": 0.8546, "total_reward": 18.8008, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8411, 0.8675 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 4, "steps": 29, "score": 0.7165, "total_reward": 17.9128, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.721, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.3414, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.664 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 5, "steps": 46, "score": 0.7558, "total_reward": 18.1385, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.832, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.3333, 0.8989, 0.8944, 0.32, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.2711, 0.8367, 0.8322, 0.8229 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 6, "steps": 27, "score": 0.6991, "total_reward": 16.778, "completion_rate": 0.85, "detection_rate": 0.4, "trust_calibration": 0.725, "adversarial_detections": 4, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.1019, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.6387 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 7, "steps": 42, "score": 0.7756, "total_reward": 19.3902, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.3919, 0.3997, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8544, 0.28, 0.8456, 0.8411, 0.8478 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 8, "steps": 44, "score": 0.809, "total_reward": 19.4157, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.853, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3526, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.8411, 0.8367, 0.8654 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 9, "steps": 40, "score": 0.782, "total_reward": 19.5499, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.837, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3841, 0.9189, 0.3997, 0.3994, 0.3972, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.8528 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 10, "steps": 31, "score": 0.712, "total_reward": 17.8008, "completion_rate": 0.85, "detection_rate": 0.625, "trust_calibration": 0.448, "adversarial_detections": 5, "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.3303, 0.3281, 0.8989, 0.0997, 0.0974, 0.0952, 0.9295, 0.925, 0.9206, 0.9161, 0.9117, 0.3356, 0.6281 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 11, "steps": 40, "score": 0.7732, "total_reward": 18.5566, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.2978, 0.2933, 0.8589, 0.8544, 0.85, 0.8456, 0.8349 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 12, "steps": 42, "score": 0.8546, "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.8411, 0.8676 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 13, "steps": 39, "score": 0.833, "total_reward": 18.3252, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.811, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3456, 0.3433, 0.9189, 0.9144, 0.91, 0.9056, 0.9011, 0.8967, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.87, 0.8656, 0.8611, 0.8567, 0.8522, 0.8478, 0.8485 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 14, "steps": 29, "score": 0.6889, "total_reward": 17.9127, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.609, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.3841, 0.3689, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.3237, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.6353 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 15, "steps": 30, "score": 0.6847, "total_reward": 18.4869, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.635, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3608, 0.9256, 0.9233, 0.9211, 0.3459, 0.9167, 0.9144, 0.9122, 0.91, 0.3348, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.0841, 0.9184, 0.9139, 0.9095, 0.6404 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 16, "steps": 42, "score": 0.8546, "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.8411, 0.8676 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 17, "steps": 46, "score": 0.8048, "total_reward": 19.316, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.28, 0.8456, 0.8411, 0.8367, 0.8322, 0.8605 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 18, "steps": 26, "score": 0.6967, "total_reward": 16.7213, "completion_rate": 0.85, "detection_rate": 0.3333, "trust_calibration": 0.701, "adversarial_detections": 3, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.6149 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 19, "steps": 20, "score": 0.6193, "total_reward": 13.0053, "completion_rate": 0.65, "detection_rate": 0.0, "trust_calibration": 0.576, "adversarial_detections": 0, "adversarial_poisonings": 5, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.9233, 0.9211, 0.9189, 0.3437, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.01 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 20, "steps": 46, "score": 0.7498, "total_reward": 19.4938, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.3841, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.8589, 0.2844, 0.85, 0.8456, 0.8411, 0.8367, 0.8322, 0.8412 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 21, "steps": 42, "score": 0.8546, "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.8411, 0.8676 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 22, "steps": 36, "score": 0.7334, "total_reward": 21.2675, "completion_rate": 1.0, "detection_rate": 0.8, "trust_calibration": 0.747, "adversarial_detections": 4, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3778, 0.9256, 0.9233, 0.3481, 0.9189, 0.9167, 0.9144, 0.9122, 0.337, 0.9078, 0.3326, 0.9033, 0.9011, 0.3259, 0.8967, 0.3214, 0.8922, 0.89, 0.3148, 0.8856, 0.0863, 0.9206, 0.9161, 0.9117, 0.9073, 0.8789, 0.8544, 0.7968 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 23, "steps": 28, "score": 0.6922, "total_reward": 17.3057, "completion_rate": 0.85, "detection_rate": 0.4444, "trust_calibration": 0.645, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.3433, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6298 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 24, "steps": 46, "score": 0.7725, "total_reward": 20.0838, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.836, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.9483, 0.3711, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.2711, 0.8367, 0.8322, 0.8591 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 25, "steps": 34, "score": 0.6755, "total_reward": 18.9148, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.71, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.3322, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.3189, 0.3167, 0.8944, 0.3122, 0.31, 0.3078, 0.8856, 0.9011, 0.8967, 0.8922, 0.3078, 0.3033, 0.8789, 0.782 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 26, "steps": 34, "score": 0.6561, "total_reward": 19.0282, "completion_rate": 0.8, "detection_rate": 0.6667, "trust_calibration": 0.467, "adversarial_detections": 2, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.8989, 0.3667, 0.3644, 0.9372, 0.935, 0.3578, 0.3556, 0.3533, 0.3289, 0.8967, 0.8944, 0.8922, 0.317, 0.8878, 0.3126, 0.8833, 0.0841, 0.9184, 0.9139, 0.8878, 0.8656, 0.3163, 0.3419, 0.6213 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 27, "steps": 46, "score": 0.7256, "total_reward": 20.3155, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.828, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.9189, 0.3997, 0.3994, 0.3972, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.2844, 0.85, 0.2756, 0.8411, 0.8367, 0.8322, 0.8395 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 28, "steps": 28, "score": 0.7355, "total_reward": 17.6509, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.725, "adversarial_detections": 5, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.1019, 0.0997, 0.0974, 0.0952, 0.093, 0.9273, 0.9228, 0.9184, 0.9139, 0.6841 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 29, "steps": 28, "score": 0.7258, "total_reward": 18.144, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.709, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.3392, 0.91, 0.9078, 0.9056, 0.9033, 0.3281, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6801 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 0, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 1, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 2, "steps": 42, "score": 0.8422, "total_reward": 18.5276, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8724 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 3, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 4, "steps": 42, "score": 0.8689, "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 5, "steps": 46, "score": 0.7916, "total_reward": 18.9976, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.917, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.3333, 0.8989, 0.8944, 0.32, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8618 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 6, "steps": 40, "score": 0.8977, "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 7, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 8, "steps": 44, "score": 0.8405, "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 9, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 10, "steps": 42, "score": 0.8421, "total_reward": 18.5263, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.928, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.3022, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.871 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 11, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 12, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 13, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 14, "steps": 44, "score": 0.8405, "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8853 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 15, "steps": 46, "score": 0.8162, "total_reward": 19.5883, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.93, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8825 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 16, "steps": 40, "score": 0.8977, "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 17, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8903 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 18, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 19, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 20, "steps": 46, "score": 0.7653, "total_reward": 18.3663, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.909, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.2933, 0.8589, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8423 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 21, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 22, "steps": 46, "score": 0.7652, "total_reward": 18.3659, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.908, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.2978, 0.8633, 0.2889, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8419 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 23, "steps": 42, "score": 0.8403, "total_reward": 18.4862, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8727 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 24, "steps": 44, "score": 0.8405, "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8854 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 25, "steps": 44, "score": 0.8405, "total_reward": 19.3314, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 26, "steps": 42, "score": 0.8403, "total_reward": 18.4855, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.3022, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.872 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 27, "steps": 46, "score": 0.8179, "total_reward": 19.6285, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.924, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.2978, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.881 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 28, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 29, "steps": 44, "score": 0.8405, "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8854 ] } ] }