diff --git "a/outputs/evaluation_results.json" "b/outputs/evaluation_results.json" --- "a/outputs/evaluation_results.json" +++ "b/outputs/evaluation_results.json" @@ -1,11 +1,9 @@ { - "task": "all", + "task": "task3", "tasks": [ - "task1", - "task2", "task3" ], - "episodes_per_policy": 20, + "episodes_per_policy": 100, "adaptive": false, "difficulty_controller": { "adaptive": true, @@ -18,16 +16,6 @@ "adversary_poison_confidence": 0.92 }, "difficulty_controller_by_task_policy": { - "task1": { - "random": {}, - "heuristic": {}, - "oracle_lite": {} - }, - "task2": { - "random": {}, - "heuristic": {}, - "oracle_lite": {} - }, "task3": { "random": {}, "heuristic": {}, @@ -36,123 +24,71 @@ }, "summary": { "random": { - "episodes": 60, - "avg_score": 0.6954, - "avg_completion_rate": 0.8222, - "avg_detection_rate": 0.8111, - "avg_trust_calibration": 0.4227, - "avg_steps": 25.9167 + "episodes": 100, + "avg_score": 0.6601, + "avg_completion_rate": 0.8165, + "avg_detection_rate": 0.375, + "avg_trust_calibration": 0.7349, + "avg_steps": 36.13 }, "heuristic": { - "episodes": 60, - "avg_score": 0.796, - "avg_completion_rate": 0.8958, - "avg_detection_rate": 0.9115, - "avg_trust_calibration": 0.4381, - "avg_steps": 24.25 + "episodes": 100, + "avg_score": 0.7314, + "avg_completion_rate": 0.8935, + "avg_detection_rate": 0.7621, + "avg_trust_calibration": 0.74, + "avg_steps": 35.54 }, "oracle_lite": { - "episodes": 60, - "avg_score": 0.8553, - "avg_completion_rate": 0.8858, + "episodes": 100, + "avg_score": 0.8562, + "avg_completion_rate": 0.991, "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.5905, - "avg_steps": 29.2667 + "avg_trust_calibration": 0.9304, + "avg_steps": 42.62 } }, "by_task": { - "task1": { - "random": { - "episodes": 20, - "avg_score": 0.7702, - "avg_completion_rate": 0.77, - "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.0, - "avg_steps": 15.1 - }, - "heuristic": { - "episodes": 20, - "avg_score": 0.869, - "avg_completion_rate": 0.845, - "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.0, - "avg_steps": 13.75 - }, - "oracle_lite": { - "episodes": 20, - "avg_score": 0.918, - "avg_completion_rate": 0.735, - "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.0, - "avg_steps": 16.0 - } - }, - "task2": { - "random": { - "episodes": 20, - "avg_score": 0.6505, - "avg_completion_rate": 0.8767, - "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.5424, - "avg_steps": 26.75 - }, - "heuristic": { - "episodes": 20, - "avg_score": 0.7677, - "avg_completion_rate": 0.9399, - "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.5741, - "avg_steps": 23.2 - }, - "oracle_lite": { - "episodes": 20, - "avg_score": 0.7801, - "avg_completion_rate": 0.93, - "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.84, - "avg_steps": 30.0 - } - }, "task3": { "random": { - "episodes": 20, - "avg_score": 0.6655, - "avg_completion_rate": 0.82, - "avg_detection_rate": 0.4333, - "avg_trust_calibration": 0.7258, - "avg_steps": 35.9 + "episodes": 100, + "avg_score": 0.6601, + "avg_completion_rate": 0.8165, + "avg_detection_rate": 0.375, + "avg_trust_calibration": 0.7349, + "avg_steps": 36.13 }, "heuristic": { - "episodes": 20, - "avg_score": 0.7513, - "avg_completion_rate": 0.9025, - "avg_detection_rate": 0.7346, - "avg_trust_calibration": 0.7401, - "avg_steps": 35.8 + "episodes": 100, + "avg_score": 0.7314, + "avg_completion_rate": 0.8935, + "avg_detection_rate": 0.7621, + "avg_trust_calibration": 0.74, + "avg_steps": 35.54 }, "oracle_lite": { - "episodes": 20, - "avg_score": 0.8678, - "avg_completion_rate": 0.9925, + "episodes": 100, + "avg_score": 0.8562, + "avg_completion_rate": 0.991, "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.9314, - "avg_steps": 41.8 + "avg_trust_calibration": 0.9304, + "avg_steps": 42.62 } } }, "episodes": [ { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 0, - "steps": 15, - "score": 0.6569, - "total_reward": 7.8825, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 36, + "score": 0.6105, + "total_reward": 15.2622, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.884, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -165,30 +101,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.914, - 0.962, - 0.962, + 0.8956, + 0.9233, + 0.9211, + 0.02, + 0.9144, + 0.9122, + 0.9078, + 0.3556, 0.02, - 0.962, - 0.962, - 0.962, - 0.3645, 0.02, + 0.8467, + 0.8922, + 0.8378, + 0.8833, + 0.9061, + 0.3067, 0.02, - 0.867 + 0.8722, + 0.073, + 0.8306, + 0.9061, + 0.3397, + 0.3044, + 0.5035 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 1, - "steps": 15, - "score": 0.7996, - "total_reward": 7.196, - "completion_rate": 0.7, + "steps": 40, + "score": 0.7205, + "total_reward": 18.0135, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.662, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -202,26 +151,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.982, - 0.914, - 0.914, - 0.962, - 0.867, - 0.962, - 0.3165 + 0.9278, + 0.9506, + 0.8889, + 0.8822, + 0.91, + 0.8556, + 0.9011, + 0.3167, + 0.8622, + 0.89, + 0.8356, + 0.8633, + 0.3011, + 0.8444, + 0.8378, + 0.8656, + 0.8261, + 0.8589, + 0.8961, + 0.02, + 0.3, + 0.8106, + 0.2933, + 0.7916 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 2, - "steps": 15, - "score": 0.8129, - "total_reward": 8.1294, - "completion_rate": 0.7, + "steps": 37, + "score": 0.7627, + "total_reward": 17.5411, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.752, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -236,30 +201,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.369, - 0.3024, - 0.962, - 0.962, - 0.914, - 0.962, - 0.867, - 0.867, - 0.962 + 0.3778, + 0.3203, + 0.9211, + 0.9189, + 0.8822, + 0.91, + 0.8556, + 0.8511, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8311, + 0.8767, + 0.2944, + 0.8972, + 0.87, + 0.8656, + 0.8789, + 0.02, + 0.8722, + 0.8207 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 3, - "steps": 14, - "score": 0.8084, - "total_reward": 10.5095, - "completion_rate": 0.9, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 36, + "score": 0.6303, + "total_reward": 16.3887, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.798, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -271,30 +249,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.982, - 0.982, - 0.962, - 0.3455, - 0.867, - 0.962, - 0.946, - 0.962, - 0.3455, - 0.3645, - 0.867, - 0.962 + 0.9528, + 0.9506, + 0.9233, + 0.3411, + 0.8667, + 0.9144, + 0.9372, + 0.91, + 0.3278, + 0.3556, + 0.8511, + 0.8989, + 0.2922, + 0.8878, + 0.3056, + 0.8489, + 0.8767, + 0.8922, + 0.333, + 0.2878, + 0.8283, + 0.8589, + 0.3297, + 0.3552, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 4, - "steps": 15, - "score": 0.7814, - "total_reward": 8.5956, - "completion_rate": 0.7, + "steps": 41, + "score": 0.6894, + "total_reward": 18.6138, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.405, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -309,31 +300,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.982, - 0.962, - 0.3645, - 0.914, - 0.962, - 0.3455, - 0.3136, - 0.962, - 0.914 + 0.9278, + 0.9506, + 0.9233, + 0.3711, + 0.8844, + 0.9122, + 0.33, + 0.3186, + 0.9011, + 0.8667, + 0.86, + 0.8878, + 0.8833, + 0.8789, + 0.8767, + 0.8994, + 0.3352, + 0.8678, + 0.3033, + 0.8239, + 0.8744, + 0.8678, + 0.2656, + 0.2933, + 0.2911, + 0.7076 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 5, - "steps": 15, - "score": 0.725, - "total_reward": 8.7, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 31, + "score": 0.6062, + "total_reward": 15.1538, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.816, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -345,32 +352,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.914, - 0.962, - 0.3165, - 0.3455, - 0.3455, - 0.867, - 0.946, - 0.962, - 0.3455, - 0.867 + 0.9278, + 0.8933, + 0.9211, + 0.3367, + 0.3344, + 0.3322, + 0.8578, + 0.9306, + 0.9033, + 0.3211, + 0.8467, + 0.9194, + 0.32, + 0.8878, + 0.8856, + 0.8811, + 0.02, + 0.8394, + 0.0752, + 0.87, + 0.8678, + 0.02, + 0.8883, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 6, - "steps": 15, - "score": 0.8118, - "total_reward": 8.1182, + "steps": 39, + "score": 0.6337, + "total_reward": 15.8429, "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "detection_rate": 0.0, + "trust_calibration": 0.872, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -382,65 +402,92 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.914, - 0.914, - 0.962, - 0.962, - 0.962, - 0.962, - 0.934, - 0.867, - 0.3206 - ] - }, - { - "policy": "random", - "task_type": "task1", - "seed": 7, - "steps": 15, - "score": 0.9334, - "total_reward": 9.334, - "completion_rate": 0.9, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, + 0.8956, + 0.8889, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9006, + 0.8511, + 0.3197, + 0.9194, + 0.28, + 0.8556, + 0.8811, + 0.8789, + 0.8422, + 0.8856, + 0.3311, + 0.8589, + 0.0597, + 0.3222, + 0.27, + 0.8728, + 0.02, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 7, + "steps": 32, + "score": 0.7179, + "total_reward": 15.793, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.869, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.867, - 0.914, - 0.914, - 0.962, - 0.867, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8756, + 0.8911, + 0.8867, + 0.9144, + 0.86, + 0.9056, + 0.9033, + 0.8989, + 0.8967, + 0.3444, + 0.89, + 0.8356, + 0.9083, + 0.02, + 0.8789, + 0.8744, + 0.87, + 0.8928, + 0.8633, + 0.3111, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 8, - "steps": 15, - "score": 0.8425, - "total_reward": 9.2675, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 31, + "score": 0.7087, + "total_reward": 16.3004, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.701, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -452,28 +499,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.369, - 0.962, - 0.914, - 0.962, - 0.962, - 0.3645, - 0.962, - 0.982, - 0.914 + 0.9256, + 0.3733, + 0.9211, + 0.8844, + 0.9122, + 0.91, + 0.3578, + 0.9056, + 0.9283, + 0.8667, + 0.8944, + 0.84, + 0.3456, + 0.8833, + 0.8811, + 0.8589, + 0.8767, + 0.0774, + 0.835, + 0.8856, + 0.8633, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 9, - "steps": 15, - "score": 0.7751, - "total_reward": 9.3011, - "completion_rate": 0.8, + "steps": 37, + "score": 0.7151, + "total_reward": 17.877, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.558, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -488,29 +547,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.982, - 0.962, - 0.962, - 0.867, - 0.3616, - 0.914, - 0.3645, + 0.9528, + 0.9256, + 0.9233, + 0.8689, + 0.3597, + 0.8822, + 0.935, 0.02, - 0.982, - 0.962, - 0.962 + 0.3556, + 0.9011, + 0.8967, + 0.8944, + 0.84, + 0.8356, + 0.8489, + 0.8244, + 0.835, + 0.3178, + 0.8656, + 0.8261, + 0.8217, + 0.3044, + 0.85, + 0.7724 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 10, - "steps": 15, - "score": 0.7653, - "total_reward": 8.418, - "completion_rate": 0.7, + "steps": 38, + "score": 0.6037, + "total_reward": 17.5072, + "completion_rate": 0.75, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.772, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -525,31 +597,49 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.982, - 0.914, - 0.982, - 0.962, + 0.9278, + 0.9233, + 0.9461, + 0.8844, + 0.3622, + 0.9078, + 0.02, + 0.3233, + 0.9261, + 0.8967, + 0.8944, + 0.86, + 0.3378, + 0.02, + 0.3263, + 0.8811, + 0.3289, + 0.2967, + 0.8994, + 0.8722, + 0.8678, + 0.3386, + 0.3463, 0.02, - 0.3455, - 0.3645, - 0.962 + 0.3089, + 0.8544, + 0.355, + 0.7709 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 11, - "steps": 15, - "score": 0.8199, - "total_reward": 9.8394, - "completion_rate": 0.9, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "steps": 32, + "score": 0.5881, + "total_reward": 14.703, + "completion_rate": 0.7, + "detection_rate": 0.3333, + "trust_calibration": 0.743, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -561,32 +651,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.867, - 0.962, - 0.962, - 0.3504, - 0.914, - 0.982, - 0.962, - 0.982, + 0.8756, + 0.9233, + 0.9211, + 0.3459, + 0.8844, + 0.3622, + 0.9078, + 0.9306, + 0.02, + 0.8689, + 0.8967, + 0.8944, + 0.8722, + 0.8878, + 0.8856, + 0.9228, + 0.0819, + 0.2967, + 0.3244, + 0.8722, 0.02, - 0.914, - 0.962 + 0.3356, + 0.3011, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 12, - "steps": 15, - "score": 0.6163, - "total_reward": 7.3956, - "completion_rate": 0.6, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "steps": 36, + "score": 0.6249, + "total_reward": 14.9974, + "completion_rate": 0.75, + "detection_rate": 0.3333, + "trust_calibration": 0.716, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -598,30 +701,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.982, - 0.914, - 0.3455, - 0.962, - 0.898, - 0.962, - 0.914, + 0.9528, + 0.8933, + 0.3411, + 0.9189, + 0.8644, + 0.9122, + 0.8756, 0.02, - 0.3616, - 0.3455, - 0.3455 + 0.3441, + 0.3189, + 0.3167, + 0.8622, + 0.8878, + 0.8511, + 0.8789, + 0.8244, + 0.8878, + 0.2856, + 0.0663, + 0.8589, + 0.8961, + 0.8772, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 13, - "steps": 15, - "score": 0.7283, - "total_reward": 6.555, - "completion_rate": 0.7, + "steps": 38, + "score": 0.7872, + "total_reward": 18.1053, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.64, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -635,29 +750,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.914, - 0.867, - 0.962, - 0.934, - 0.962, - 0.914, - 0.02 - ] - }, - { - "policy": "random", - "task_type": "task1", - "seed": 14, - "steps": 17, - "score": 0.8867, - "total_reward": 10.6405, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.0, + 0.9278, + 0.8911, + 0.8667, + 0.9122, + 0.3278, + 0.9056, + 0.8689, + 0.02, + 0.8622, + 0.8378, + 0.8533, + 0.8289, + 0.8767, + 0.8722, + 0.895, + 0.8856, + 0.8633, + 0.8611, + 0.9017, + 0.8939, + 0.3, + 0.8081 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 14, + "steps": 35, + "score": 0.6988, + "total_reward": 18.1679, + "completion_rate": 0.95, + "detection_rate": 0.0, + "trust_calibration": 0.661, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -669,29 +798,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.867, - 0.914, - 0.914, - 0.3455, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.914 + 0.9278, + 0.8733, + 0.8889, + 0.8844, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.8989, + 0.8622, + 0.89, + 0.3408, + 0.8856, + 0.8833, + 0.3011, + 0.8267, + 0.8372, + 0.87, + 0.0708, + 0.8833, + 0.2811, + 0.8839, + 0.8544, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 15, - "steps": 16, - "score": 0.6915, - "total_reward": 9.6809, - "completion_rate": 0.7, + "steps": 38, + "score": 0.68, + "total_reward": 19.0388, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.774, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -706,34 +849,48 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3504, - 0.962, - 0.962, - 0.867, - 0.3645, - 0.3645, - 0.982, - 0.3645, - 0.867, - 0.982, - 0.3455, - 0.3455, - 0.962 + 0.3548, + 0.9256, + 0.9233, + 0.8689, + 0.3667, + 0.3644, + 0.8922, + 0.935, + 0.8556, + 0.3533, + 0.3211, + 0.3189, + 0.8944, + 0.89, + 0.3378, + 0.8333, + 0.8789, + 0.8767, + 0.8744, + 0.2922, + 0.333, + 0.8656, + 0.8261, + 0.8567, + 0.3, + 0.8478, + 0.8066 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 16, - "steps": 15, - "score": 0.7164, - "total_reward": 9.313, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 33, + "score": 0.5966, + "total_reward": 15.511, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.691, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -745,32 +902,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.2975, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3044, + 0.02, + 0.935, 0.02, - 0.982, + 0.3256, + 0.8711, + 0.8967, + 0.3444, 0.02, - 0.3455, - 0.914, - 0.962 + 0.89, + 0.8556, + 0.8833, + 0.3311, + 0.3289, + 0.8744, + 0.8878, + 0.8633, + 0.3141, + 0.8589, + 0.01 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 17, - "steps": 15, - "score": 0.6495, - "total_reward": 8.4439, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 35, + "score": 0.5891, + "total_reward": 16.4939, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.795, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -783,30 +953,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, + 0.9278, + 0.02, + 0.3433, + 0.3319, + 0.8644, + 0.3622, + 0.91, + 0.3286, + 0.9033, + 0.9011, + 0.8789, + 0.3697, + 0.9194, + 0.84, + 0.8856, + 0.8811, + 0.8789, + 0.3597, + 0.2692, + 0.87, + 0.2878, + 0.8656, + 0.0663, + 0.8239, + 0.8817, 0.02, - 0.3455, - 0.3136, - 0.867, - 0.982, - 0.962, - 0.3206, - 0.962, - 0.962, - 0.982, - 0.3826 + 0.4835 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 18, - "steps": 15, - "score": 0.8235, - "total_reward": 9.8815, - "completion_rate": 0.9, + "steps": 35, + "score": 0.6548, + "total_reward": 16.3705, + "completion_rate": 0.75, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.573, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -821,32 +1006,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, + 0.9278, + 0.9233, + 0.02, + 0.9167, + 0.3644, + 0.3622, + 0.91, + 0.8556, + 0.9033, + 0.8689, + 0.8967, + 0.8922, + 0.915, 0.02, - 0.962, - 0.982, - 0.3645, - 0.962, - 0.867, - 0.962, - 0.914, - 0.962 + 0.8533, + 0.8789, + 0.2967, + 0.3422, + 0.3078, + 0.8656, + 0.8611, + 0.2789, + 0.3297, + 0.7281 ] }, { "policy": "random", - "task_type": "task1", + "task_type": "task3", "seed": 19, - "steps": 15, - "score": 0.7588, - "total_reward": 8.347, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "steps": 38, + "score": 0.6912, + "total_reward": 17.2799, + "completion_rate": 0.9, + "detection_rate": 0.0, + "trust_calibration": 0.834, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -858,30 +1056,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.2975, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3455, + 0.3156, + 0.9211, + 0.9189, + 0.9144, + 0.9122, + 0.33, 0.02, - 0.93, - 0.982, - 0.962 + 0.9133, + 0.9261, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8511, + 0.8789, + 0.8922, + 0.87, + 0.3178, + 0.8811, + 0.8589, + 0.8544, + 0.87, + 0.3108, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 0, - "steps": 13, - "score": 0.753, - "total_reward": 10.5415, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 20, + "steps": 44, + "score": 0.6149, + "total_reward": 19.0606, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.859, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -894,34 +1106,51 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.982, - 0.982, - 0.982, - 0.3645, - 0.3645, - 0.982, - 0.3645, - 0.3645, - 0.962, - 0.962, - 0.3455, - 0.962, - 0.962 - ] - }, - { - "policy": "heuristic", - "task_type": "task1", - "seed": 1, - "steps": 12, - "score": 0.7843, - "total_reward": 10.196, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + 0.3556, + 0.8711, + 0.8667, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.9033, + 0.3281, + 0.3519, + 0.3467, + 0.8922, + 0.89, + 0.3608, + 0.8856, + 0.8833, + 0.8289, + 0.2714, + 0.87, + 0.3508, + 0.8656, + 0.3089, + 0.0597, + 0.8172, + 0.053, + 0.8728, + 0.8083, + 0.3439, + 0.2567, + 0.5146 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 21, + "steps": 30, + "score": 0.7401, + "total_reward": 15.543, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.709, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -933,32 +1162,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.982, - 0.982, - 0.982, - 0.3645, - 0.3645, - 0.982, - 0.3645, - 0.3645, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9256, + 0.8911, + 0.8867, + 0.9144, + 0.3322, + 0.9078, + 0.9056, + 0.8711, + 0.8467, + 0.8922, + 0.89, + 0.02, + 0.8856, + 0.8811, + 0.8589, + 0.8744, + 0.87, + 0.8856, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 2, - "steps": 11, - "score": 0.8612, - "total_reward": 10.3345, - "completion_rate": 0.9, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 22, + "steps": 45, + "score": 0.5529, + "total_reward": 16.5871, + "completion_rate": 0.7, + "detection_rate": 0.0, + "trust_calibration": 0.709, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -971,31 +1208,49 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.369, - 0.3455, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.3548, + 0.02, + 0.9311, + 0.8844, + 0.3622, + 0.36, + 0.9078, + 0.2933, + 0.8689, + 0.9217, + 0.3444, + 0.32, + 0.8556, + 0.3033, + 0.3011, + 0.8767, + 0.3022, + 0.87, + 0.8678, + 0.8811, + 0.0619, + 0.8544, + 0.3022, + 0.8478, + 0.8083, + 0.3119, + 0.0397, + 0.8322, + 0.4222 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 3, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "random", + "task_type": "task3", + "seed": 23, + "steps": 31, + "score": 0.7921, + "total_reward": 18.2193, + "completion_rate": 0.95, + "detection_rate": 0.5, + "trust_calibration": 0.847, + "adversarial_detections": 1, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1008,26 +1263,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8756, + 0.9233, + 0.3481, + 0.9439, + 0.9167, + 0.9144, + 0.86, + 0.9078, + 0.9033, + 0.9261, + 0.8967, + 0.8944, + 0.8922, + 0.89, + 0.8878, + 0.02, + 0.9228, + 0.0819, + 0.8744, + 0.87, + 0.8833, + 0.7254 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 4, - "steps": 11, - "score": 0.911, - "total_reward": 10.9324, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 24, + "steps": 45, + "score": 0.6293, + "total_reward": 16.3622, + "completion_rate": 0.75, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -1042,32 +1311,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3504, - 0.962, - 0.962, - 0.962, - 0.962 + 0.3156, + 0.9483, + 0.9461, + 0.3197, + 0.9072, + 0.8756, + 0.9033, + 0.3541, + 0.02, + 0.9044, + 0.02, + 0.3608, + 0.8511, + 0.2967, + 0.8722, + 0.8356, + 0.8111, + 0.2867, + 0.8544, + 0.845, + 0.3156, + 0.8061, + 0.8367, + 0.7972, + 0.7658 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 5, - "steps": 16, - "score": 0.8266, - "total_reward": 7.439, - "completion_rate": 0.6, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 25, + "steps": 34, + "score": 0.6745, + "total_reward": 16.8613, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.809, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1079,28 +1362,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962, - 0.3525, - 0.962, - 0.962 + 0.8933, + 0.9461, + 0.9189, + 0.8644, + 0.9122, + 0.91, + 0.8556, + 0.8711, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.8378, + 0.3356, + 0.8833, + 0.8489, + 0.0797, + 0.3244, + 0.3222, + 0.8878, + 0.8906, + 0.9061, + 0.2967, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 6, - "steps": 10, - "score": 0.962, - "total_reward": 10.582, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 26, + "steps": 43, + "score": 0.5813, + "total_reward": 17.4397, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.815, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1113,31 +1412,50 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8933, + 0.9211, + 0.9439, + 0.9167, + 0.9144, + 0.02, + 0.3308, + 0.8711, + 0.8467, + 0.8922, + 0.8878, + 0.3486, + 0.3033, + 0.2759, + 0.8767, + 0.3244, + 0.3452, + 0.29, + 0.8156, + 0.8633, + 0.2889, + 0.0597, + 0.8544, + 0.3372, + 0.8478, + 0.2956, + 0.2811, + 0.2889, + 0.4707 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 7, - "steps": 16, - "score": 0.8166, - "total_reward": 9.7988, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "policy": "random", + "task_type": "task3", + "seed": 27, + "steps": 40, + "score": 0.5674, + "total_reward": 15.3205, + "completion_rate": 0.7, + "detection_rate": 0.25, + "trust_calibration": 0.816, + "adversarial_detections": 1, + "adversarial_poisonings": 3, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1149,32 +1467,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.3686, - 0.962, - 0.3826, - 0.3896, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9206, + 0.3433, + 0.3541, + 0.9189, + 0.9244, + 0.3622, + 0.9078, + 0.9133, + 0.8689, + 0.8644, + 0.885, + 0.02, + 0.8533, + 0.9061, + 0.8267, + 0.9139, + 0.073, + 0.3356, + 0.3441, + 0.0619, + 0.8994, + 0.2722, + 0.323, + 0.3308, + 0.8433, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 8, - "steps": 16, - "score": 0.8399, - "total_reward": 8.3989, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 28, + "steps": 32, + "score": 0.6685, + "total_reward": 16.0443, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.793, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1186,28 +1519,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.3504, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962 - ] - }, + 0.8956, + 0.9211, + 0.9189, + 0.8844, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3763, + 0.02, + 0.3167, + 0.8944, + 0.89, + 0.9128, + 0.8856, + 0.8461, + 0.8589, + 0.3267, + 0.8372, + 0.9128, + 0.2933, + 0.3461, + 0.01 + ] + }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 9, - "steps": 16, - "score": 0.785, - "total_reward": 10.2052, - "completion_rate": 0.8, + "policy": "random", + "task_type": "task3", + "seed": 29, + "steps": 40, + "score": 0.6868, + "total_reward": 17.8577, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.907, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -1221,33 +1568,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.3756, - 0.962, - 0.3896, - 0.391, - 0.391, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.8933, + 0.3411, + 0.9189, + 0.02, + 0.8822, + 0.9028, + 0.02, + 0.8511, + 0.8989, + 0.3467, + 0.3444, + 0.8578, + 0.8856, + 0.8511, + 0.2989, + 0.9094, + 0.87, + 0.8856, + 0.3641, + 0.8567, + 0.3572, + 0.8895, + 0.8083, + 0.8353 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 10, - "steps": 10, - "score": 0.962, - "total_reward": 10.582, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 30, + "steps": 28, + "score": 0.4767, + "total_reward": 10.4876, + "completion_rate": 0.5, + "detection_rate": 0.0, + "trust_calibration": 0.745, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 3, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1259,30 +1619,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8956, + 0.9233, + 0.02, + 0.9189, + 0.02, + 0.9394, + 0.02, + 0.9078, + 0.8533, + 0.9011, + 0.02, + 0.8967, + 0.3144, + 0.2878, + 0.8533, + 0.3441, + 0.0819, + 0.2967, + 0.0774, + 0.835, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 11, - "steps": 16, - "score": 0.7843, - "total_reward": 10.196, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "random", + "task_type": "task3", + "seed": 31, + "steps": 33, + "score": 0.7077, + "total_reward": 19.8143, + "completion_rate": 0.95, + "detection_rate": 0.5, + "trust_calibration": 0.68, + "adversarial_detections": 1, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1295,32 +1666,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.982, - 0.982, - 0.982, - 0.3645, - 0.3645, - 0.982, - 0.3645, - 0.3645, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9528, + 0.9256, + 0.9483, + 0.3711, + 0.9189, + 0.3344, + 0.9122, + 0.91, + 0.02, + 0.3556, + 0.9033, + 0.3211, + 0.8989, + 0.8967, + 0.8944, + 0.89, + 0.3456, + 0.8833, + 0.8989, + 0.0797, + 0.8372, + 0.29, + 0.9073, + 0.8633, + 0.8611, + 0.8589, + 0.679 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 12, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 32, + "steps": 40, + "score": 0.5915, + "total_reward": 18.3369, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.392, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1333,28 +1719,50 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8756, + 0.9233, + 0.9211, + 0.3689, + 0.3444, + 0.3622, + 0.33, + 0.9328, + 0.9056, + 0.3311, + 0.8667, + 0.3214, + 0.8922, + 0.8878, + 0.8856, + 0.3033, + 0.9061, + 0.8789, + 0.8944, + 0.2922, + 0.323, + 0.2956, + 0.3133, + 0.0641, + 0.8839, + 0.8744, + 0.3152, + 0.3178, + 0.3133, + 0.3715 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 13, - "steps": 16, - "score": 0.9003, - "total_reward": 9.0035, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "random", + "task_type": "task3", + "seed": 33, + "steps": 41, + "score": 0.59, + "total_reward": 15.3412, + "completion_rate": 0.7, + "detection_rate": 0.6667, + "trust_calibration": 0.72, + "adversarial_detections": 2, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1367,30 +1775,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.3455, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.8911, + 0.9439, + 0.8644, + 0.02, + 0.8578, + 0.02, + 0.9033, + 0.8489, + 0.8967, + 0.3444, + 0.287, + 0.8556, + 0.8311, + 0.3497, + 0.8722, + 0.073, + 0.02, + 0.905, + 0.9006, + 0.8544, + 0.3022, + 0.3308, + 0.3186, + 0.6338 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 14, - "steps": 13, - "score": 0.7534, - "total_reward": 10.5473, + "policy": "random", + "task_type": "task3", + "seed": 34, + "steps": 35, + "score": 0.6414, + "total_reward": 16.0344, "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "detection_rate": 0.0, + "trust_calibration": 0.865, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1402,34 +1826,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.3686, - 0.3756, - 0.962, - 0.3896, - 0.391, - 0.3645, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.8933, + 0.8689, + 0.02, + 0.3344, + 0.3392, + 0.353, + 0.9056, + 0.3303, + 0.8667, + 0.8622, + 0.89, + 0.8556, + 0.8511, + 0.8767, + 0.8744, + 0.8722, + 0.32, + 0.8678, + 0.8456, + 0.8611, + 0.8589, + 0.2944, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 15, - "steps": 13, - "score": 0.8312, - "total_reward": 11.6374, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "policy": "random", + "task_type": "task3", + "seed": 35, + "steps": 39, + "score": 0.5508, + "total_reward": 15.9746, + "completion_rate": 0.7, + "detection_rate": 0.5, + "trust_calibration": 0.645, + "adversarial_detections": 1, + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1441,34 +1876,49 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3546, - 0.962, - 0.962, - 0.962, - 0.3504, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3504, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.3133, + 0.3411, + 0.9167, + 0.8822, + 0.3056, + 0.8511, + 0.8989, + 0.3497, + 0.9194, + 0.3192, + 0.34, + 0.02, + 0.8856, + 0.3033, + 0.2989, + 0.8767, + 0.8744, + 0.2922, + 0.02, + 0.8478, + 0.3386, + 0.8789, + 0.8817, + 0.8172, + 0.8895, + 0.3286, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 16, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "policy": "random", + "task_type": "task3", + "seed": 36, + "steps": 31, + "score": 0.5216, + "total_reward": 12.5188, + "completion_rate": 0.55, + "detection_rate": 0.5, + "trust_calibration": 0.506, + "adversarial_detections": 1, + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1480,28 +1930,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.02, + 0.8711, + 0.3519, + 0.3367, + 0.3422, + 0.91, + 0.9328, + 0.02, + 0.8711, + 0.8467, + 0.8922, + 0.34, + 0.3078, + 0.8856, + 0.3333, + 0.3311, + 0.02, + 0.8744, + 0.2922, + 0.9095, + 0.8283, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 17, - "steps": 16, - "score": 0.8943, - "total_reward": 8.0485, + "policy": "random", + "task_type": "task3", + "seed": 37, + "steps": 38, + "score": 0.5875, + "total_reward": 16.449, "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "detection_rate": 0.5, + "trust_calibration": 0.866, + "adversarial_detections": 1, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1514,28 +1979,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.8933, + 0.9211, + 0.3689, + 0.02, + 0.9122, + 0.8578, + 0.9033, + 0.3211, + 0.8989, + 0.9217, + 0.86, + 0.3508, + 0.02, + 0.3563, + 0.3641, + 0.8789, + 0.8394, + 0.31, + 0.0708, + 0.3156, + 0.3011, + 0.8984, + 0.3222, + 0.335, + 0.8278, + 0.627 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 18, - "steps": 10, - "score": 0.962, - "total_reward": 10.582, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 38, + "steps": 35, + "score": 0.7022, + "total_reward": 18.2564, + "completion_rate": 0.9, + "detection_rate": 0.0, + "trust_calibration": 0.441, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1548,29 +2032,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9256, + 0.9233, + 0.8889, + 0.9167, + 0.9072, + 0.8778, + 0.02, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.3574, + 0.84, + 0.0908, + 0.8833, + 0.3311, + 0.8417, + 0.8744, + 0.8722, + 0.8678, + 0.3486, + 0.3133, + 0.3289, + 0.4474 ] }, { - "policy": "heuristic", - "task_type": "task1", - "seed": 19, - "steps": 12, - "score": 0.8675, - "total_reward": 11.2779, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 39, + "steps": 33, + "score": 0.7547, + "total_reward": 18.1121, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.604, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -1584,33 +2083,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3455, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3504, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9256, + 0.9233, + 0.9139, + 0.9167, + 0.8822, + 0.02, + 0.9078, + 0.9056, + 0.9033, + 0.3189, + 0.8444, + 0.3122, + 0.8578, + 0.9106, + 0.8833, + 0.8489, + 0.9161, + 0.835, + 0.8928, + 0.9083, + 0.3141, + 0.3219, + 0.7927 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 0, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 40, + "steps": 37, + "score": 0.5943, + "total_reward": 15.4518, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.636, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1622,26 +2132,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9256, + 0.9233, + 0.8889, + 0.9167, + 0.3574, + 0.8778, + 0.8533, + 0.9011, + 0.8989, + 0.8622, + 0.34, + 0.8878, + 0.02, + 0.2711, + 0.8267, + 0.3244, + 0.33, + 0.8356, + 0.8633, + 0.2811, + 0.0619, + 0.8567, + 0.8172, + 0.27, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 1, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, + "policy": "random", + "task_type": "task3", + "seed": 41, + "steps": 38, + "score": 0.7027, + "total_reward": 18.9728, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.857, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -1656,27 +2183,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9528, + 0.8733, + 0.9461, + 0.9167, + 0.8622, + 0.337, + 0.8756, + 0.3233, + 0.02, + 0.8967, + 0.8944, + 0.32, + 0.8878, + 0.8511, + 0.8789, + 0.8422, + 0.87, + 0.8678, + 0.8833, + 0.3341, + 0.8567, + 0.2744, + 0.3352, + 0.85, + 0.3328, + 0.8448 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 2, - "steps": 16, - "score": 0.8266, - "total_reward": 7.439, - "completion_rate": 0.6, + "policy": "random", + "task_type": "task3", + "seed": 42, + "steps": 34, + "score": 0.7826, + "total_reward": 18.7831, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.768, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -1690,29 +2235,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3525, - 0.3525, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9506, + 0.8911, + 0.9167, + 0.9144, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8967, + 0.8622, + 0.8578, + 0.8656, + 0.02, + 0.3311, + 0.8789, + 0.3267, + 0.9139, + 0.8678, + 0.3286, + 0.9061, + 0.8217, + 0.849 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 3, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, + "policy": "random", + "task_type": "task3", + "seed": 43, + "steps": 34, + "score": 0.6248, + "total_reward": 16.2447, "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "detection_rate": 0.0, + "trust_calibration": 0.572, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1724,27 +2284,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.02, + 0.8911, + 0.9189, + 0.3667, + 0.9122, + 0.91, + 0.8556, + 0.3533, + 0.8489, + 0.8967, + 0.8944, + 0.3422, + 0.34, + 0.8856, + 0.8833, + 0.3011, + 0.8417, + 0.8744, + 0.8722, + 0.02, + 0.8833, + 0.2811, + 0.8567, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 4, - "steps": 16, - "score": 0.8943, - "total_reward": 8.0485, - "completion_rate": 0.7, + "policy": "random", + "task_type": "task3", + "seed": 44, + "steps": 41, + "score": 0.6806, + "total_reward": 19.7386, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.81, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -1758,27 +2335,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 5, - "steps": 16, - "score": 0.8266, - "total_reward": 7.439, - "completion_rate": 0.6, + 0.9256, + 0.9483, + 0.3411, + 0.9189, + 0.3667, + 0.8822, + 0.8778, + 0.9033, + 0.9011, + 0.8667, + 0.3214, + 0.8922, + 0.34, + 0.8878, + 0.3586, + 0.8811, + 0.8267, + 0.8744, + 0.31, + 0.9106, + 0.8789, + 0.8567, + 0.8522, + 0.8895, + 0.2656, + 0.02, + 0.3261, + 0.8264 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 45, + "steps": 35, + "score": 0.8133, + "total_reward": 19.519, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, + "trust_calibration": 0.742, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -1792,26 +2389,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962, - 0.3525, - 0.962, - 0.962 + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.8867, + 0.8622, + 0.8578, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8422, + 0.89, + 0.8556, + 0.3033, + 0.8789, + 0.2967, + 0.9139, + 0.9128, + 0.9083, + 0.2811, + 0.8567, + 0.8578 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 6, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, + "policy": "random", + "task_type": "task3", + "seed": 46, + "steps": 36, + "score": 0.7646, + "total_reward": 19.8787, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.0, + "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -1826,28 +2438,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.3478, + 0.9233, + 0.9461, + 0.9439, + 0.9167, + 0.9144, + 0.88, + 0.9328, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.3167, + 0.8922, + 0.317, + 0.8856, + 0.3363, + 0.3089, + 0.8767, + 0.8372, + 0.32, + 0.8856, + 0.9061, + 0.8567, + 0.8559 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 7, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 47, + "steps": 31, + "score": 0.7026, + "total_reward": 16.8629, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.89, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1860,29 +2489,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9506, + 0.9233, + 0.9189, + 0.8844, + 0.02, + 0.91, + 0.8733, + 0.8489, + 0.8967, + 0.8944, + 0.9172, + 0.89, + 0.3256, + 0.8833, + 0.3311, + 0.3319, + 0.0797, + 0.0774, + 0.87, + 0.8678, + 0.8283, + 0.5511 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 8, - "steps": 16, - "score": 0.8266, - "total_reward": 7.439, - "completion_rate": 0.6, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 48, + "steps": 40, + "score": 0.6251, + "total_reward": 17.5018, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.739, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1894,28 +2538,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.3525, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962 + 0.9278, + 0.8933, + 0.8867, + 0.9144, + 0.3622, + 0.91, + 0.3278, + 0.9056, + 0.3763, + 0.3211, + 0.3489, + 0.3544, + 0.8922, + 0.373, + 0.8533, + 0.8489, + 0.8744, + 0.82, + 0.8156, + 0.8633, + 0.8411, + 0.9017, + 0.3394, + 0.85, + 0.3328, + 0.3133, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 9, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 49, + "steps": 34, + "score": 0.7592, + "total_reward": 18.2216, + "completion_rate": 0.95, + "detection_rate": 0.0, + "trust_calibration": 0.658, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1928,29 +2591,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9256, + 0.9211, + 0.8867, + 0.9144, + 0.9122, + 0.9078, + 0.9033, + 0.9011, + 0.8667, + 0.8944, + 0.9172, + 0.31, + 0.02, + 0.8833, + 0.3489, + 0.8744, + 0.0752, + 0.8678, + 0.8656, + 0.8633, + 0.8589, + 0.5214 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 10, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 50, + "steps": 31, + "score": 0.5546, + "total_reward": 13.3116, + "completion_rate": 0.65, + "detection_rate": 0.0, + "trust_calibration": 0.768, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1962,28 +2640,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8756, + 0.9233, + 0.8867, + 0.3344, + 0.9122, + 0.91, + 0.3356, + 0.02, + 0.9011, + 0.02, + 0.02, + 0.8944, + 0.84, + 0.8556, + 0.8511, + 0.9039, + 0.2967, + 0.3422, + 0.87, + 0.8928, + 0.0686, + 0.2833, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 11, - "steps": 16, - "score": 0.8943, - "total_reward": 8.0485, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 51, + "steps": 35, + "score": 0.6911, + "total_reward": 16.5857, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.634, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1996,29 +2689,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9206, + 0.9233, + 0.9139, + 0.8644, + 0.9122, + 0.8778, + 0.9056, + 0.3311, + 0.8967, + 0.3222, + 0.8378, + 0.8856, + 0.8833, + 0.9061, + 0.8767, + 0.9172, + 0.073, + 0.3178, + 0.02, + 0.8633, + 0.02, + 0.8217, + 0.4783 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 12, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "policy": "random", + "task_type": "task3", + "seed": 52, + "steps": 39, + "score": 0.6835, + "total_reward": 19.1391, + "completion_rate": 0.95, + "detection_rate": 0.5, + "trust_calibration": 0.96, + "adversarial_detections": 1, + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2030,62 +2738,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 13, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.3608, + 0.9506, + 0.9233, + 0.9211, + 0.3144, + 0.9122, + 0.337, + 0.9056, + 0.3233, + 0.9011, + 0.8989, + 0.8944, + 0.84, + 0.8356, + 0.3333, + 0.8811, + 0.8789, + 0.8644, + 0.8722, + 0.8878, + 0.8811, + 0.8567, + 0.2744, + 0.8772, + 0.27, + 0.8873, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 14, - "steps": 16, - "score": 0.8943, - "total_reward": 8.0485, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 53, + "steps": 38, + "score": 0.636, + "total_reward": 16.5363, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.643, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2098,29 +2791,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962, - 0.962, - 0.962 + 0.8756, + 0.9483, + 0.8689, + 0.9167, + 0.8822, + 0.8578, + 0.8733, + 0.8489, + 0.3244, + 0.8922, + 0.34, + 0.3378, + 0.8856, + 0.02, + 0.8789, + 0.9017, + 0.3174, + 0.02, + 0.3378, + 0.8656, + 0.2833, + 0.0641, + 0.8217, + 0.8522, + 0.4564 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 15, - "steps": 16, - "score": 0.8266, - "total_reward": 7.439, - "completion_rate": 0.6, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 54, + "steps": 36, + "score": 0.6308, + "total_reward": 16.4, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.696, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2132,29 +2842,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3525, - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9506, + 0.9233, + 0.9211, + 0.8667, + 0.3252, + 0.9078, + 0.9056, + 0.9011, + 0.8989, + 0.3237, + 0.3474, + 0.84, + 0.8878, + 0.8833, + 0.3089, + 0.8744, + 0.87, + 0.3308, + 0.8811, + 0.0619, + 0.02, + 0.3274, + 0.8522, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 16, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 55, + "steps": 32, + "score": 0.5079, + "total_reward": 13.2064, + "completion_rate": 0.6, + "detection_rate": 0.0, + "trust_calibration": 0.674, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 3, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2166,29 +2893,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.02, + 0.9256, + 0.3241, + 0.9189, + 0.8644, + 0.3322, + 0.33, + 0.9156, + 0.02, + 0.3511, + 0.02, + 0.8622, + 0.8878, + 0.3126, + 0.8833, + 0.8811, + 0.8789, + 0.8767, + 0.3244, + 0.8722, + 0.333, + 0.0708, + 0.0686, + 0.8611, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 17, - "steps": 16, - "score": 0.8943, - "total_reward": 8.0485, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 56, + "steps": 32, + "score": 0.5675, + "total_reward": 14.1867, + "completion_rate": 0.65, + "detection_rate": 0.0, + "trust_calibration": 0.73, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2200,29 +2944,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.3608, + 0.9256, + 0.02, + 0.8867, + 0.9144, + 0.9122, + 0.02, + 0.3578, + 0.9056, + 0.9033, + 0.8667, + 0.8744, + 0.3422, + 0.363, + 0.9128, + 0.8833, + 0.8489, + 0.3774, + 0.3222, + 0.355, + 0.8678, + 0.0686, + 0.8261, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 18, - "steps": 16, - "score": 0.962, - "total_reward": 8.658, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 57, + "steps": 37, + "score": 0.5298, + "total_reward": 13.7744, + "completion_rate": 0.6, + "detection_rate": 0.0, + "trust_calibration": 0.716, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2234,28 +2994,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.962 + 0.9278, + 0.9233, + 0.9189, + 0.3667, + 0.3644, + 0.3622, + 0.343, + 0.9056, + 0.02, + 0.8667, + 0.8922, + 0.8878, + 0.8856, + 0.02, + 0.8289, + 0.8922, + 0.8678, + 0.2933, + 0.3141, + 0.0619, + 0.3197, + 0.2744, + 0.8772, + 0.3, + 0.01 ] }, { - "policy": "oracle_lite", - "task_type": "task1", - "seed": 19, - "steps": 16, - "score": 0.8943, - "total_reward": 8.0485, - "completion_rate": 0.7, - "detection_rate": 1.0, - "trust_calibration": 0.0, + "policy": "random", + "task_type": "task3", + "seed": 58, + "steps": 37, + "score": 0.6112, + "total_reward": 17.1148, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.808, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2268,28 +3045,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.962, - 0.962, - 0.962, - 0.962, - 0.962, - 0.3525, - 0.962, - 0.962 + 0.9278, + 0.9506, + 0.3433, + 0.3167, + 0.02, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.2911, + 0.3259, + 0.3467, + 0.8422, + 0.8878, + 0.3163, + 0.8811, + 0.8789, + 0.9194, + 0.8522, + 0.8856, + 0.0663, + 0.3341, + 0.8567, + 0.3044, + 0.3022, + 0.3, + 0.4822 ] }, { "policy": "random", - "task_type": "task2", - "seed": 0, - "steps": 25, - "score": 0.5298, - "total_reward": 9.5365, - "completion_rate": 0.667, - "detection_rate": 1.0, - "trust_calibration": 0.461, + "task_type": "task3", + "seed": 59, + "steps": 39, + "score": 0.6007, + "total_reward": 16.2199, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.55, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2302,35 +3098,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9083, - 0.925, + 0.9278, + 0.3533, + 0.9139, 0.9167, - 0.02, - 0.8917, - 0.8833, - 0.8667, - 0.2233, + 0.9122, + 0.33, + 0.8756, 0.02, 0.02, - 0.725, - 0.8083, - 0.6917, - 0.775, - 0.1317, - 0.115, - 0.5949 + 0.8467, + 0.8944, + 0.86, + 0.3378, + 0.3063, + 0.8811, + 0.8789, + 0.8444, + 0.8722, + 0.32, + 0.2956, + 0.0663, + 0.2811, + 0.8839, + 0.8722, + 0.2878, + 0.4134 ] }, { "policy": "random", - "task_type": "task2", - "seed": 1, - "steps": 31, - "score": 0.7252, - "total_reward": 13.054, - "completion_rate": 1.0, + "task_type": "task3", + "seed": 60, + "steps": 39, + "score": 0.7149, + "total_reward": 17.1579, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.569, + "trust_calibration": 0.717, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -2345,37 +3150,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9833, + 0.9278, + 0.9233, + 0.9211, + 0.02, + 0.3044, + 0.92, + 0.9056, + 0.3233, + 0.3211, + 0.8967, + 0.9194, + 0.89, + 0.3056, 0.8833, - 0.8583, - 0.875, - 0.7583, - 0.8417, - 0.165, - 0.7833, - 0.8, - 0.6833, - 0.735, - 0.1217, - 0.7167, - 0.7, - 0.7083, - 0.849 + 0.02, + 0.8767, + 0.8994, + 0.87, + 0.8306, + 0.8789, + 0.8194, + 0.8678, + 0.7902 ] }, { "policy": "random", - "task_type": "task2", - "seed": 2, - "steps": 27, - "score": 0.6551, - "total_reward": 11.7913, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.579, + "task_type": "task3", + "seed": 61, + "steps": 34, + "score": 0.6745, + "total_reward": 17.5378, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.937, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2388,37 +3199,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3517, - 0.262, + 0.9278, + 0.8911, + 0.9439, 0.9167, - 0.9083, - 0.8583, - 0.875, - 0.7583, - 0.7417, - 0.825, - 0.7083, - 0.8, - 0.6833, - 0.6667, - 0.75, - 0.0967, - 0.0983, - 0.766 + 0.3644, + 0.3322, + 0.9078, + 0.8533, + 0.9011, + 0.9239, + 0.8944, + 0.8922, + 0.8828, + 0.3356, + 0.3333, + 0.8967, + 0.0774, + 0.8722, + 0.8678, + 0.8906, + 0.0663, + 0.02, + 0.2789, + 0.8567, + 0.5562 ] }, { "policy": "random", - "task_type": "task2", - "seed": 3, - "steps": 28, - "score": 0.6154, - "total_reward": 12.9233, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.586, + "task_type": "task3", + "seed": 62, + "steps": 38, + "score": 0.5955, + "total_reward": 16.078, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.583, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2431,41 +3250,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.99, - 0.9833, - 0.925, - 0.2717, - 0.8, - 0.8917, - 0.9333, - 0.875, - 0.2217, - 0.2233, - 0.7417, + 0.9278, + 0.8733, + 0.8689, + 0.9144, + 0.3622, + 0.02, + 0.8556, + 0.9033, + 0.9011, + 0.02, + 0.3544, + 0.8922, + 0.89, + 0.8878, 0.8333, - 0.1483, - 0.7917, - 0.1383, - 0.7333, - 0.75, - 0.7083, - 0.123, - 0.7686 + 0.3089, + 0.0797, + 0.3022, + 0.3378, + 0.8633, + 0.8789, + 0.3297, + 0.8794, + 0.3022, + 0.8478, + 0.4237 ] }, { "policy": "random", - "task_type": "task2", - "seed": 4, - "steps": 28, - "score": 0.6575, - "total_reward": 12.4928, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.458, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "task_type": "task3", + "seed": 63, + "steps": 35, + "score": 0.6184, + "total_reward": 16.0796, + "completion_rate": 0.75, + "detection_rate": 0.5, + "trust_calibration": 0.677, + "adversarial_detections": 1, + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2477,37 +3302,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9833, - 0.925, - 0.2817, + 0.9278, + 0.9256, + 0.3511, + 0.9167, + 0.8944, + 0.3, + 0.9078, + 0.9306, + 0.9033, + 0.02, 0.8667, - 0.8833, - 0.23, - 0.2113, - 0.8417, - 0.8, - 0.775, - 0.7917, - 0.775, - 0.7583, - 0.75, - 0.1067, - 0.1313, - 0.7235 + 0.3122, + 0.89, + 0.8878, + 0.9106, + 0.8811, + 0.8789, + 0.8744, + 0.3222, + 0.3308, + 0.2933, + 0.2811, + 0.3119, + 0.8961, + 0.01 ] }, { "policy": "random", - "task_type": "task2", - "seed": 5, - "steps": 26, - "score": 0.6206, - "total_reward": 12.4128, - "completion_rate": 0.867, + "task_type": "task3", + "seed": 64, + "steps": 43, + "score": 0.6578, + "total_reward": 19.7347, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.408, - "adversarial_detections": 0, + "trust_calibration": 0.572, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2521,40 +3353,50 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9, - 0.9167, - 0.24, - 0.2467, - 0.2383, - 0.7667, - 0.9083, - 0.85, - 0.1967, - 0.725, - 0.8667, - 0.165, - 0.7917, - 0.7833, - 0.7667, + 0.9256, + 0.9233, + 0.9211, + 0.3367, + 0.8822, + 0.33, + 0.9056, + 0.3463, + 0.8489, + 0.3167, 0.02, - 0.65, - 0.7061 + 0.3422, + 0.31, + 0.3086, + 0.8311, + 0.3367, + 0.8744, + 0.8722, + 0.9028, + 0.8656, + 0.3263, + 0.8611, + 0.8589, + 0.8744, + 0.87, + 0.8106, + 0.8061, + 0.2889, + 0.7624 ] }, { "policy": "random", - "task_type": "task2", - "seed": 6, - "steps": 31, - "score": 0.7065, - "total_reward": 12.7163, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.576, + "task_type": "task3", + "seed": 65, + "steps": 34, + "score": 0.6769, + "total_reward": 16.2452, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.879, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2566,37 +3408,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9083, - 0.8833, - 0.9, - 0.8917, - 0.8833, - 0.875, + 0.9528, + 0.8933, + 0.9461, + 0.3689, + 0.8844, + 0.86, + 0.8556, + 0.9283, + 0.3511, + 0.8989, + 0.3244, + 0.8978, + 0.8856, 0.8833, - 0.7417, - 0.188, - 0.8667, - 0.13, - 0.7583, - 0.7667, - 0.7583, - 0.7083, - 0.7, - 0.8083 + 0.3011, + 0.8767, + 0.8544, + 0.8878, + 0.8656, + 0.3011, + 0.2789, + 0.8817, + 0.01 ] }, { "policy": "random", - "task_type": "task2", - "seed": 7, - "steps": 25, - "score": 0.7366, - "total_reward": 12.5218, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.776, + "task_type": "task3", + "seed": 66, + "steps": 34, + "score": 0.5832, + "total_reward": 15.7473, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.662, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2609,37 +3457,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8333, - 0.8917, - 0.875, - 0.8917, - 0.775, - 0.8583, - 0.85, - 0.8333, - 0.825, - 0.1817, - 0.8, - 0.6833, - 0.825, + 0.9278, + 0.8933, + 0.9211, + 0.9167, + 0.3644, + 0.91, + 0.8756, + 0.9033, + 0.3511, + 0.8989, + 0.02, + 0.8944, + 0.86, + 0.02, + 0.3063, + 0.8789, + 0.8767, + 0.0774, 0.02, - 0.7583, - 0.8784 + 0.8878, + 0.8906, + 0.0663, + 0.0641, + 0.8589, + 0.3067, + 0.4526 ] }, { "policy": "random", - "task_type": "task2", - "seed": 8, - "steps": 25, - "score": 0.7329, - "total_reward": 13.9253, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.425, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "task_type": "task3", + "seed": 67, + "steps": 34, + "score": 0.5986, + "total_reward": 14.3671, + "completion_rate": 0.7, + "detection_rate": 0.0, + "trust_calibration": 0.891, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2651,37 +3509,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.335, - 0.9167, + 0.9278, + 0.02, + 0.9233, + 0.3489, + 0.9144, + 0.3622, + 0.36, + 0.9328, + 0.3003, + 0.9011, 0.8667, + 0.8922, + 0.8378, + 0.02, 0.8833, - 0.875, - 0.2767, - 0.8583, - 0.9, - 0.8, - 0.8167, - 0.7, - 0.1233, - 0.775, - 0.7667, - 0.8083, - 0.75, - 0.7987 + 0.8489, + 0.9194, + 0.89, + 0.8678, + 0.8811, + 0.0619, + 0.3397, + 0.01 ] }, { "policy": "random", - "task_type": "task2", - "seed": 9, - "steps": 27, - "score": 0.7062, - "total_reward": 12.712, - "completion_rate": 0.933, + "task_type": "task3", + "seed": 68, + "steps": 33, + "score": 0.7306, + "total_reward": 18.2654, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.597, - "adversarial_detections": 0, + "trust_calibration": 0.684, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2695,37 +3558,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.99, - 0.9333, - 0.925, - 0.8083, - 0.278, - 0.8583, - 0.925, + 0.8756, + 0.9233, + 0.3481, + 0.9167, + 0.3644, + 0.9122, + 0.91, + 0.9078, + 0.9033, + 0.3511, + 0.8989, + 0.3297, + 0.8922, + 0.8878, 0.02, - 0.2233, - 0.8417, - 0.825, - 0.8167, - 0.735, - 0.6833, - 0.7333, - 0.65, - 0.8157 + 0.02, + 0.8811, + 0.9039, + 0.8767, + 0.8722, + 0.9095, + 0.8656, + 0.8611, + 0.8126 ] }, { "policy": "random", - "task_type": "task2", - "seed": 10, - "steps": 26, - "score": 0.5723, - "total_reward": 12.0174, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.855, + "task_type": "task3", + "seed": 69, + "steps": 40, + "score": 0.7144, + "total_reward": 18.574, + "completion_rate": 0.95, + "detection_rate": 0.0, + "trust_calibration": 0.723, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2738,40 +3608,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.925, - 0.9667, - 0.8667, - 0.2483, - 0.8667, - 0.02, - 0.205, - 0.8917, - 0.825, - 0.8167, - 0.775, - 0.1567, + 0.3478, + 0.9506, + 0.9233, + 0.9189, + 0.9167, + 0.3422, + 0.8756, + 0.9283, + 0.9011, + 0.8989, 0.02, - 0.153, - 0.7667, - 0.8083, - 0.105, - 0.1067, - 0.8194 + 0.3122, + 0.8556, + 0.8833, + 0.8467, + 0.8222, + 0.8678, + 0.8656, + 0.3133, + 0.8239, + 0.0597, + 0.8544, + 0.87, + 0.8106, + 0.5244 ] }, { "policy": "random", - "task_type": "task2", - "seed": 11, - "steps": 23, - "score": 0.7197, - "total_reward": 12.955, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.57, + "task_type": "task3", + "seed": 70, + "steps": 34, + "score": 0.6867, + "total_reward": 17.1674, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.786, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2784,38 +3659,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8333, - 0.925, - 0.9167, - 0.2703, - 0.8667, - 0.2483, - 0.8667, - 0.9083, + 0.9278, + 0.9506, 0.02, - 0.8083, - 0.825, - 0.8167, - 0.7683, - 0.7917, - 0.7833, - 0.7417, - 0.8063 + 0.8889, + 0.9144, + 0.33, + 0.9078, + 0.9056, + 0.9033, + 0.8489, + 0.3497, + 0.8944, + 0.8922, + 0.8828, + 0.8856, + 0.8833, + 0.8789, + 0.3774, + 0.87, + 0.3706, + 0.0663, + 0.8239, + 0.8567, + 0.5184 ] }, { "policy": "random", - "task_type": "task2", - "seed": 12, - "steps": 30, - "score": 0.6047, - "total_reward": 12.0935, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.344, + "task_type": "task3", + "seed": 71, + "steps": 29, + "score": 0.5734, + "total_reward": 14.3348, + "completion_rate": 0.65, + "detection_rate": 0.0, + "trust_calibration": 0.702, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 3, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2827,40 +3709,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.99, - 0.9, - 0.2717, - 0.9083, - 0.8267, - 0.8833, - 0.8333, + 0.9278, + 0.9256, + 0.8911, + 0.3459, + 0.9167, + 0.3644, + 0.88, + 0.9328, + 0.8711, + 0.8989, + 0.8967, + 0.3574, + 0.3422, + 0.89, + 0.8856, 0.02, - 0.2197, - 0.1883, - 0.18, - 0.7833, - 0.7917, - 0.7417, - 0.7583, - 0.65, - 0.7, - 0.0633, - 0.6839 + 0.8811, + 0.0819, + 0.3267, + 0.3244, + 0.8722, + 0.353, + 0.0708, + 0.01 ] }, { "policy": "random", - "task_type": "task2", - "seed": 13, + "task_type": "task3", + "seed": 72, "steps": 30, - "score": 0.6649, - "total_reward": 11.9681, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.271, + "score": 0.7073, + "total_reward": 16.2683, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.584, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2872,36 +3759,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.8917, - 0.8, - 0.8833, - 0.2067, - 0.8583, - 0.8083, + 0.8956, + 0.9483, + 0.3489, + 0.8844, + 0.91, + 0.9078, + 0.9306, + 0.9283, + 0.9011, + 0.3259, + 0.8644, + 0.84, + 0.8878, + 0.9106, + 0.3033, + 0.8811, + 0.9039, + 0.8394, + 0.87, 0.02, - 0.7833, - 0.6917, - 0.75, - 0.6583, - 0.75, - 0.7333, - 0.09, - 0.7, - 0.7014 + 0.8906, + 0.01 ] }, { "policy": "random", - "task_type": "task2", - "seed": 14, - "steps": 26, - "score": 0.7146, - "total_reward": 13.5771, - "completion_rate": 1.0, + "task_type": "task3", + "seed": 73, + "steps": 34, + "score": 0.7782, + "total_reward": 18.6761, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.416, - "adversarial_detections": 0, + "trust_calibration": 0.803, + "adversarial_detections": 2, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2915,39 +3807,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.825, - 0.8833, - 0.8667, - 0.2383, - 0.875, - 0.8667, - 0.8583, - 0.85, + 0.9278, + 0.9233, + 0.9189, + 0.9167, + 0.3644, + 0.91, + 0.9078, + 0.9056, + 0.3533, + 0.02, + 0.8467, + 0.8944, + 0.28, + 0.8878, 0.8333, - 0.7833, - 0.8, - 0.1797, - 0.7833, - 0.775, - 0.1217, - 0.65, - 0.7957 + 0.8811, + 0.8267, + 0.8372, + 0.8328, + 0.8656, + 0.8883, + 0.9006, + 0.8577 ] }, { "policy": "random", - "task_type": "task2", - "seed": 15, - "steps": 27, - "score": 0.5573, - "total_reward": 12.2603, - "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.606, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "task_type": "task3", + "seed": 74, + "steps": 33, + "score": 0.7164, + "total_reward": 17.1935, + "completion_rate": 0.9, + "detection_rate": 0.3333, + "trust_calibration": 0.957, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2959,87 +3856,97 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3037, - 0.9333, - 0.925, - 0.8083, - 0.265, - 0.2567, - 0.8433, - 0.925, - 0.7583, - 0.215, - 0.1967, - 0.1883, - 0.8167, - 0.8, - 0.1567, - 0.675, - 0.7583, - 0.75, - 0.7417, - 0.0883, - 0.732 - ] - }, - { - "policy": "random", - "task_type": "task2", - "seed": 16, - "steps": 23, - "score": 0.5543, - "total_reward": 11.0864, - "completion_rate": 0.733, - "detection_rate": 1.0, - "trust_calibration": 0.437, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, + 0.9278, + 0.9506, + 0.9233, + 0.9211, + 0.8844, + 0.86, + 0.9056, + 0.3233, + 0.3189, + 0.9217, + 0.8944, + 0.3352, + 0.89, + 0.8856, + 0.8311, + 0.0819, + 0.9161, + 0.8722, + 0.8678, + 0.8906, + 0.8633, + 0.8589, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 75, + "steps": 40, + "score": 0.6588, + "total_reward": 18.4472, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.642, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.925, - 0.9167, - 0.9083, - 0.2217, - 0.02, - 0.925, - 0.02, - 0.2133, - 0.8167, - 0.825, - 0.1817, - 0.02, - 0.8, - 0.7583, - 0.775, - 0.1317, - 0.6298 + 0.9278, + 0.9256, + 0.8911, + 0.8667, + 0.3344, + 0.86, + 0.3278, + 0.3256, + 0.9033, + 0.3511, + 0.3259, + 0.8444, + 0.89, + 0.8856, + 0.3103, + 0.8811, + 0.8867, + 0.8994, + 0.8722, + 0.3378, + 0.3333, + 0.2811, + 0.9017, + 0.8544, + 0.8917, + 0.2756, + 0.7515 ] }, { "policy": "random", - "task_type": "task2", - "seed": 17, - "steps": 27, - "score": 0.5694, - "total_reward": 11.9565, + "task_type": "task3", + "seed": 76, + "steps": 28, + "score": 0.7202, + "total_reward": 15.8447, "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.688, + "detection_rate": 0.0, + "trust_calibration": 0.793, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3051,39 +3958,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.02, - 0.28, - 0.2613, - 0.7917, - 0.2483, - 0.875, - 0.2213, - 0.85, - 0.8417, - 0.7933, - 0.233, - 0.8667, - 0.7, - 0.7833, - 0.7667, - 0.7583, - 0.168, - 0.0703, - 0.7609 + 0.9278, + 0.3526, + 0.9233, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.935, + 0.9078, + 0.8983, + 0.8689, + 0.8967, + 0.8944, + 0.8922, + 0.8878, + 0.9033, + 0.3189, + 0.8394, + 0.3222, + 0.333, + 0.01 ] }, { "policy": "random", - "task_type": "task2", - "seed": 18, - "steps": 23, - "score": 0.6662, - "total_reward": 11.3256, + "task_type": "task3", + "seed": 77, + "steps": 34, + "score": 0.6731, + "total_reward": 16.8286, "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.602, - "adversarial_detections": 0, + "trust_calibration": 0.682, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3097,36 +4005,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.925, + 0.9256, + 0.9233, + 0.9211, 0.02, - 0.9, - 0.2567, - 0.2483, - 0.875, - 0.7583, - 0.85, - 0.8083, - 0.825, - 0.8083, - 0.85, + 0.9167, + 0.9394, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.8989, + 0.8894, + 0.3422, + 0.89, + 0.2756, + 0.8311, + 0.3467, + 0.2944, + 0.9117, + 0.8678, + 0.3363, 0.02, - 0.75, - 0.7306 + 0.8567, + 0.775 ] }, { "policy": "random", - "task_type": "task2", - "seed": 19, - "steps": 27, - "score": 0.7003, - "total_reward": 12.6055, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.625, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "task_type": "task3", + "seed": 78, + "steps": 44, + "score": 0.627, + "total_reward": 18.1844, + "completion_rate": 0.8, + "detection_rate": 0.5, + "trust_calibration": 0.849, + "adversarial_detections": 1, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3139,37 +4055,48 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.2633, - 0.9167, - 0.9083, + 0.9528, + 0.8933, + 0.3159, + 0.8822, + 0.8578, + 0.9033, + 0.9011, 0.8917, + 0.8944, + 0.8578, + 0.3356, 0.8833, - 0.23, - 0.02, - 0.825, - 0.8917, - 0.825, - 0.7083, - 0.8, - 0.6833, - 0.7417, - 0.7583, - 0.7083, - 0.8255 + 0.2989, + 0.3267, + 0.3374, + 0.2922, + 0.32, + 0.8156, + 0.8711, + 0.8589, + 0.3022, + 0.3, + 0.0508, + 0.8883, + 0.8661, + 0.3119, + 0.2567, + 0.6445 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 0, - "steps": 31, - "score": 0.6145, - "total_reward": 12.2902, + "policy": "random", + "task_type": "task3", + "seed": 79, + "steps": 37, + "score": 0.6279, + "total_reward": 16.9527, "completion_rate": 0.8, - "detection_rate": 1.0, - "trust_calibration": 0.72, + "detection_rate": 0.0, + "trust_calibration": 0.561, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3182,39 +4109,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.99, - 0.9833, - 0.975, - 0.2817, - 0.2733, - 0.265, - 0.2467, - 0.875, - 0.8583, - 0.8417, - 0.19, - 0.8083, - 0.7917, - 0.775, - 0.7583, - 0.7417, - 0.09, - 0.0733, - 0.7719 + 0.9256, + 0.9233, + 0.8867, + 0.9144, + 0.9122, + 0.02, + 0.8756, + 0.9011, + 0.8989, + 0.3467, + 0.8622, + 0.89, + 0.8556, + 0.8511, + 0.9039, + 0.3267, + 0.0774, + 0.02, + 0.343, + 0.8656, + 0.3011, + 0.8589, + 0.3067, + 0.8172, + 0.333, + 0.4381 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 1, - "steps": 17, - "score": 0.768, - "total_reward": 13.8236, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.282, + "policy": "random", + "task_type": "task3", + "seed": 80, + "steps": 40, + "score": 0.6766, + "total_reward": 18.9455, + "completion_rate": 0.9, + "detection_rate": 0.0, + "trust_calibration": 0.946, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3227,37 +4161,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.99, - 0.9833, - 0.975, - 0.2817, - 0.2733, - 0.265, - 0.8917, - 0.8833, - 0.875, - 0.8667, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.825, - 0.8167, - 0.7053 + 0.9278, + 0.9256, + 0.9483, + 0.3489, + 0.8644, + 0.3622, + 0.91, + 0.9328, + 0.9056, + 0.8511, + 0.3189, + 0.3467, + 0.3144, + 0.32, + 0.8933, + 0.8811, + 0.8267, + 0.8994, + 0.8722, + 0.8678, + 0.0686, + 0.9061, + 0.0619, + 0.8194, + 0.323, + 0.8456, + 0.5627 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 2, - "steps": 17, - "score": 0.7237, - "total_reward": 13.0266, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.284, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "random", + "task_type": "task3", + "seed": 81, + "steps": 33, + "score": 0.7394, + "total_reward": 17.745, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.704, + "adversarial_detections": 1, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3270,76 +4214,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3517, - 0.2883, - 0.925, + 0.8956, + 0.9233, + 0.9211, 0.9167, - 0.9083, - 0.9, - 0.8917, - 0.8833, - 0.875, - 0.8667, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.825, - 0.1787, - 0.6626 - ] - }, - { - "policy": "heuristic", - "task_type": "task2", - "seed": 3, - "steps": 30, - "score": 0.7823, - "total_reward": 12.5171, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.9333, - 0.9167, - 0.9, + 0.8822, + 0.91, + 0.9078, + 0.3256, + 0.3533, + 0.9261, + 0.8467, + 0.02, + 0.8922, + 0.3078, + 0.8856, 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.1317, - 0.75, - 0.7333, - 0.7167, - 0.9021 + 0.0841, + 0.8967, + 0.8744, + 0.87, + 0.9073, + 0.8261, + 0.6676 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 4, - "steps": 17, - "score": 0.7999, - "total_reward": 14.3981, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 82, + "steps": 39, + "score": 0.6307, + "total_reward": 17.661, + "completion_rate": 0.75, "detection_rate": 1.0, - "trust_calibration": 0.426, + "trust_calibration": 0.581, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -3354,38 +4263,48 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.925, - 0.9167, - 0.9083, - 0.9, - 0.2537, - 0.8833, - 0.875, - 0.8667, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.825, - 0.1787, - 0.7991 + 0.8956, + 0.9233, + 0.8889, + 0.3667, + 0.9122, + 0.2978, + 0.3586, + 0.9033, + 0.3289, + 0.9294, + 0.89, + 0.8556, + 0.3663, + 0.3311, + 0.3289, + 0.8767, + 0.8994, + 0.3222, + 0.32, + 0.3528, + 0.3683, + 0.8411, + 0.8567, + 0.02, + 0.815, + 0.8478, + 0.7211 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 5, - "steps": 30, - "score": 0.6545, - "total_reward": 10.4723, - "completion_rate": 0.733, - "detection_rate": 1.0, - "trust_calibration": 0.816, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "policy": "random", + "task_type": "task3", + "seed": 83, + "steps": 33, + "score": 0.6648, + "total_reward": 15.9564, + "completion_rate": 0.8, + "detection_rate": 0.25, + "trust_calibration": 0.917, + "adversarial_detections": 1, + "adversarial_poisonings": 3, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3397,34 +4316,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.2483, - 0.8667, - 0.215, - 0.8333, - 0.8167, - 0.165, - 0.7833, - 0.1317, - 0.75, - 0.7333, - 0.7167, - 0.7623 + 0.8956, + 0.3663, + 0.9211, + 0.9439, + 0.9144, + 0.3322, + 0.9078, + 0.9306, + 0.8961, + 0.8789, + 0.8967, + 0.8944, + 0.86, + 0.8356, + 0.8811, + 0.9184, + 0.3474, + 0.3552, + 0.073, + 0.0708, + 0.8633, + 0.9039, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 6, - "steps": 15, - "score": 0.8749, - "total_reward": 13.9981, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 84, + "steps": 39, + "score": 0.7325, + "total_reward": 19.0457, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.28, - "adversarial_detections": 0, + "trust_calibration": 0.851, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3438,36 +4365,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.925, - 0.9167, - 0.9083, - 0.9, - 0.8917, + 0.8756, + 0.9233, + 0.02, + 0.3067, + 0.9122, + 0.8578, + 0.3256, + 0.8511, + 0.8989, + 0.8444, + 0.8922, + 0.8578, 0.8833, - 0.875, - 0.8667, - 0.8583, + 0.8811, + 0.8767, + 0.8744, + 0.3252, + 0.8678, + 0.8656, + 0.3363, + 0.8589, + 0.8961, 0.85, - 0.8417, - 0.8333, - 0.7481 + 0.2978, + 0.8586 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 7, - "steps": 28, - "score": 0.7465, - "total_reward": 13.4373, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.833, + "policy": "random", + "task_type": "task3", + "seed": 85, + "steps": 32, + "score": 0.661, + "total_reward": 15.2038, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.807, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3479,37 +4416,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.313, - 0.9167, - 0.3163, - 0.318, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.8983 + 0.8956, + 0.9483, + 0.9211, + 0.9189, + 0.9144, + 0.9122, + 0.33, + 0.3356, + 0.3533, + 0.3211, + 0.8967, + 0.86, + 0.8533, + 0.8811, + 0.3289, + 0.8767, + 0.8744, + 0.8722, + 0.8856, + 0.0663, + 0.8861, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 8, - "steps": 17, - "score": 0.7992, - "total_reward": 14.3856, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.39, + "policy": "random", + "task_type": "task3", + "seed": 86, + "steps": 44, + "score": 0.6056, + "total_reward": 19.3805, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.837, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3522,38 +4464,52 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.2953, - 0.925, - 0.9167, + 0.3478, + 0.8933, + 0.3489, + 0.8844, + 0.9372, + 0.9078, + 0.3556, + 0.8689, + 0.8967, + 0.3222, + 0.89, + 0.3078, + 0.8856, 0.9083, - 0.9, - 0.2537, - 0.8833, - 0.875, - 0.8667, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.825, - 0.8167, - 0.7866 + 0.3311, + 0.2967, + 0.3274, + 0.88, + 0.8678, + 0.2926, + 0.3133, + 0.8861, + 0.0619, + 0.8544, + 0.3022, + 0.8678, + 0.8456, + 0.8061, + 0.3119, + 0.02, + 0.5089 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 9, - "steps": 28, - "score": 0.7248, - "total_reward": 13.7712, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.834, + "policy": "random", + "task_type": "task3", + "seed": 87, + "steps": 37, + "score": 0.6574, + "total_reward": 16.4362, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.807, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3565,38 +4521,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.925, - 0.3147, - 0.9083, - 0.318, - 0.3117, - 0.3033, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.8986 - ] - }, + 0.8933, + 0.9211, + 0.9439, + 0.8644, + 0.86, + 0.9078, + 0.8733, + 0.9011, + 0.8667, + 0.8922, + 0.8556, + 0.8833, + 0.3311, + 0.8789, + 0.8767, + 0.3422, + 0.8878, + 0.2856, + 0.8261, + 0.0619, + 0.8994, + 0.02, + 0.303, + 0.01 + ] + }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 10, - "steps": 16, - "score": 0.797, - "total_reward": 13.5485, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.432, + "policy": "random", + "task_type": "task3", + "seed": 88, + "steps": 36, + "score": 0.5911, + "total_reward": 15.369, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.855, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3609,35 +4571,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.925, - 0.9167, - 0.9083, - 0.9, - 0.8917, - 0.8833, - 0.875, + 0.02, + 0.8733, + 0.9461, 0.8667, - 0.8583, - 0.212, - 0.2037, - 0.8333, - 0.825, - 0.7578 + 0.9144, + 0.905, + 0.9078, + 0.9056, + 0.3233, + 0.8989, + 0.8967, + 0.8922, + 0.3178, + 0.02, + 0.02, + 0.0841, + 0.8967, + 0.0774, + 0.0752, + 0.8678, + 0.8633, + 0.8789, + 0.8194, + 0.3022, + 0.4961 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 11, - "steps": 30, - "score": 0.6763, - "total_reward": 12.8505, - "completion_rate": 0.867, + "policy": "random", + "task_type": "task3", + "seed": 89, + "steps": 34, + "score": 0.7869, + "total_reward": 20.4589, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.825, - "adversarial_detections": 0, + "trust_calibration": 0.809, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3651,39 +4622,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.99, - 0.9833, - 0.975, - 0.2817, - 0.2733, - 0.265, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.0817, - 0.8522 + 0.8956, + 0.9233, + 0.3481, + 0.9439, + 0.3367, + 0.9144, + 0.9122, + 0.337, + 0.9056, + 0.9033, + 0.9011, + 0.9239, + 0.8644, + 0.8922, + 0.89, + 0.8878, + 0.925, + 0.3441, + 0.8967, + 0.3474, + 0.87, + 0.8928, + 0.8283, + 0.8789, + 0.8766 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 12, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.84, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "policy": "random", + "task_type": "task3", + "seed": 90, + "steps": 31, + "score": 0.6723, + "total_reward": 14.7899, + "completion_rate": 0.8, + "detection_rate": 0.3333, + "trust_calibration": 0.902, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3695,33 +4673,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, + 0.9206, + 0.8889, + 0.3367, + 0.8822, + 0.8578, + 0.9056, + 0.9033, + 0.9089, + 0.02, + 0.8922, + 0.89, + 0.8878, + 0.8856, 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 + 0.9061, + 0.8767, + 0.0774, + 0.02, + 0.9095, + 0.8633, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 13, - "steps": 30, - "score": 0.7935, - "total_reward": 13.4903, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 91, + "steps": 44, + "score": 0.6543, + "total_reward": 18.9747, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.839, + "trust_calibration": 0.289, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -3736,37 +4720,49 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.2883, - 0.9167, - 0.9, - 0.8833, + 0.9278, + 0.9256, + 0.3111, + 0.9189, + 0.3667, + 0.88, + 0.3278, + 0.9056, + 0.8711, 0.8667, + 0.3222, + 0.34, + 0.9128, + 0.3133, + 0.3311, + 0.3367, + 0.84, + 0.8156, + 0.8633, + 0.8239, + 0.2767, + 0.8544, 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9436 + 0.8728, + 0.8883, + 0.8039, + 0.2744, + 0.672 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 14, - "steps": 18, - "score": 0.7309, - "total_reward": 13.8869, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.264, + "policy": "random", + "task_type": "task3", + "seed": 92, + "steps": 39, + "score": 0.6758, + "total_reward": 16.8941, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.804, "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", + "adversarial_poisonings": 1, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3778,36 +4774,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.313, - 0.3147, - 0.2733, - 0.9, - 0.8917, - 0.8833, - 0.875, + 0.9528, + 0.9256, + 0.9233, + 0.9211, + 0.3689, + 0.3367, + 0.9144, + 0.3322, + 0.8578, + 0.9033, 0.8667, - 0.8583, + 0.3122, + 0.2856, + 0.8489, + 0.2967, + 0.8994, + 0.8378, + 0.8633, + 0.8239, + 0.8544, + 0.8522, 0.85, - 0.8417, - 0.8333, - 0.187, - 0.8167, - 0.8083, - 0.6989 + 0.8106, + 0.01 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 15, - "steps": 18, - "score": 0.7649, - "total_reward": 14.5326, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 93, + "steps": 38, + "score": 0.7063, + "total_reward": 17.6574, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.33, + "trust_calibration": 0.542, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -3822,38 +4824,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3097, - 0.9333, - 0.925, + 0.8756, + 0.8711, + 0.3389, 0.9167, - 0.2703, - 0.9, - 0.8917, + 0.9144, + 0.9122, + 0.9178, + 0.9033, + 0.8689, + 0.8644, + 0.3422, + 0.8878, 0.8833, - 0.875, - 0.2287, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.825, - 0.8167, - 0.8083, - 0.7656 + 0.8289, + 0.8767, + 0.8372, + 0.02, + 0.8306, + 0.8789, + 0.8567, + 0.3274, + 0.3352, + 0.27, + 0.7487 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 16, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.84, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "random", + "task_type": "task3", + "seed": 94, + "steps": 35, + "score": 0.6388, + "total_reward": 15.969, + "completion_rate": 0.75, + "detection_rate": 0.5, + "trust_calibration": 0.941, + "adversarial_detections": 1, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3866,35 +4874,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 - ] - }, + 0.9278, + 0.9233, + 0.9139, + 0.02, + 0.9144, + 0.02, + 0.935, + 0.9078, + 0.3256, + 0.8711, + 0.3489, + 0.3237, + 0.8422, + 0.8878, + 0.8333, + 0.8789, + 0.3044, + 0.3222, + 0.073, + 0.2878, + 0.8633, + 0.9006, + 0.8567, + 0.6701 + ] + }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 17, - "steps": 30, - "score": 0.7823, - "total_reward": 12.517, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.844, + "policy": "random", + "task_type": "task3", + "seed": 95, + "steps": 35, + "score": 0.6469, + "total_reward": 17.465, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.834, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3907,34 +4924,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.265, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.902 + 0.9278, + 0.8933, + 0.8689, + 0.3667, + 0.3644, + 0.307, + 0.3578, + 0.9056, + 0.9283, + 0.9011, + 0.8967, + 0.8944, + 0.8922, + 0.89, + 0.8856, + 0.9161, + 0.2989, + 0.3267, + 0.3474, + 0.0752, + 0.073, + 0.8306, + 0.8261, + 0.3089, + 0.8194, + 0.5106 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 18, - "steps": 15, - "score": 0.8749, - "total_reward": 13.998, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 96, + "steps": 36, + "score": 0.7393, + "total_reward": 17.7436, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.28, - "adversarial_detections": 0, + "trust_calibration": 0.619, + "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3948,33 +4976,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9417, - 0.9333, - 0.925, - 0.9167, - 0.9083, - 0.9, - 0.8917, - 0.8833, - 0.875, - 0.8667, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.748 + 0.9278, + 0.8733, + 0.3167, + 0.92, + 0.9056, + 0.9033, + 0.9011, + 0.3189, + 0.8622, + 0.02, + 0.8878, + 0.8933, + 0.8811, + 0.9039, + 0.8767, + 0.8744, + 0.8722, + 0.87, + 0.8306, + 0.02, + 0.3289, + 0.8961, + 0.7897 ] }, { - "policy": "heuristic", - "task_type": "task2", - "seed": 19, - "steps": 17, - "score": 0.7966, - "total_reward": 14.3395, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task3", + "seed": 97, + "steps": 36, + "score": 0.7834, + "total_reward": 18.8027, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.279, + "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -3989,35 +5025,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.2967, - 0.9333, - 0.925, - 0.9167, - 0.9083, - 0.262, - 0.8917, - 0.8833, - 0.875, - 0.8667, - 0.8583, - 0.85, - 0.8417, - 0.8333, - 0.825, - 0.8167, - 0.7475 + 0.9256, + 0.8711, + 0.9439, + 0.8844, + 0.86, + 0.9078, + 0.9306, + 0.8711, + 0.8989, + 0.8967, + 0.3214, + 0.84, + 0.8856, + 0.8811, + 0.8767, + 0.3244, + 0.835, + 0.2878, + 0.3156, + 0.8611, + 0.8589, + 0.8544, + 0.8557 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 0, - "steps": 30, - "score": 0.7823, - "total_reward": 12.5171, - "completion_rate": 0.933, + "policy": "random", + "task_type": "task3", + "seed": 98, + "steps": 37, + "score": 0.7572, + "total_reward": 18.9302, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.844, + "trust_calibration": 0.605, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4032,35 +5074,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, + 0.9278, + 0.9256, + 0.02, + 0.8889, + 0.8967, + 0.8622, + 0.9078, + 0.3256, + 0.8711, + 0.8989, + 0.8944, + 0.3422, + 0.89, + 0.3308, 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.1317, - 0.75, - 0.7333, - 0.7167, - 0.9021 + 0.3341, + 0.8767, + 0.9172, + 0.8678, + 0.8656, + 0.8811, + 0.8839, + 0.8194, + 0.8014 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 1, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.84, + "policy": "random", + "task_type": "task3", + "seed": 99, + "steps": 37, + "score": 0.6531, + "total_reward": 18.9394, + "completion_rate": 0.9, + "detection_rate": 0.0, + "trust_calibration": 0.623, "adversarial_detections": 0, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4073,33 +5124,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9278, + 0.9256, + 0.9483, + 0.9211, + 0.9189, 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 + 0.9144, + 0.333, + 0.9078, + 0.3011, + 0.9239, + 0.3144, + 0.89, + 0.8556, + 0.3033, + 0.8811, + 0.8789, + 0.9017, + 0.0774, + 0.835, + 0.0708, + 0.8656, + 0.02, + 0.9039, + 0.3297, + 0.8172, + 0.3, + 0.4885 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 2, - "steps": 30, - "score": 0.7399, - "total_reward": 11.8385, - "completion_rate": 0.867, + "policy": "heuristic", + "task_type": "task3", + "seed": 0, + "steps": 43, + "score": 0.7114, + "total_reward": 18.4969, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.843, + "trust_calibration": 0.729, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4114,76 +5178,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.2983, - 0.2817, - 0.9, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3344, + 0.91, + 0.9056, + 0.9011, + 0.3267, + 0.8922, + 0.8878, 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.8585 + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8611, + 0.2867, + 0.8522, + 0.8478, + 0.8433, + 0.8389, + 0.7841 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 3, - "steps": 30, - "score": 0.7823, - "total_reward": 12.5171, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.1317, - 0.75, - 0.7333, - 0.7167, - 0.9021 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 4, - "steps": 30, - "score": 0.7823, - "total_reward": 12.5171, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "heuristic", + "task_type": "task3", + "seed": 1, + "steps": 29, + "score": 0.7083, + "total_reward": 17.707, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.721, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4196,35 +5229,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.1983, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9021 + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6632 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 5, - "steps": 30, - "score": 0.6545, - "total_reward": 10.4723, - "completion_rate": 0.733, - "detection_rate": 1.0, - "trust_calibration": 0.816, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "heuristic", + "task_type": "task3", + "seed": 2, + "steps": 29, + "score": 0.6919, + "total_reward": 17.2983, + "completion_rate": 0.85, + "detection_rate": 0.4444, + "trust_calibration": 0.561, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4237,33 +5279,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.3778, + 0.3456, + 0.9233, + 0.9211, + 0.9189, 0.9167, - 0.9, - 0.2483, - 0.8667, - 0.215, - 0.8333, - 0.8167, - 0.165, - 0.7833, - 0.1317, - 0.75, - 0.7333, - 0.7167, - 0.7623 + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6065 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 6, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, + "policy": "heuristic", + "task_type": "task3", + "seed": 3, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8008, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.84, + "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4278,35 +5329,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9256, + 0.9211, 0.9167, - 0.9, - 0.8833, - 0.8667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 + 0.8456, + 0.8411, + 0.8675 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 7, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1942, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.84, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "heuristic", + "task_type": "task3", + "seed": 4, + "steps": 29, + "score": 0.7165, + "total_reward": 17.9128, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.721, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4319,33 +5376,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9442 + 0.3414, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.664 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 8, - "steps": 30, - "score": 0.7399, - "total_reward": 11.8383, - "completion_rate": 0.867, + "policy": "heuristic", + "task_type": "task3", + "seed": 5, + "steps": 46, + "score": 0.7558, + "total_reward": 18.1385, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.843, + "trust_calibration": 0.832, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4360,35 +5426,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.2817, - 0.9, - 0.8833, - 0.8667, + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, 0.85, - 0.1983, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.8583 + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8229 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 9, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.84, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "heuristic", + "task_type": "task3", + "seed": 6, + "steps": 27, + "score": 0.6991, + "total_reward": 16.778, + "completion_rate": 0.85, + "detection_rate": 0.4, + "trust_calibration": 0.725, + "adversarial_detections": 4, + "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4401,74 +5475,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 10, - "steps": 30, - "score": 0.7398, - "total_reward": 11.8373, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.84, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.115, - 0.0983, - 0.7167, - 0.8573 + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6387 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 11, - "steps": 30, - "score": 0.7823, - "total_reward": 12.517, - "completion_rate": 0.933, + "policy": "heuristic", + "task_type": "task3", + "seed": 7, + "steps": 42, + "score": 0.7756, + "total_reward": 19.3902, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.844, + "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4483,33 +5524,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.265, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.902 + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.3919, + 0.3997, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8478 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 12, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, + "policy": "heuristic", + "task_type": "task3", + "seed": 8, + "steps": 44, + "score": 0.809, + "total_reward": 19.4157, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.84, + "trust_calibration": 0.853, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4524,33 +5574,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9278, + 0.3526, + 0.9211, 0.9167, - 0.9, - 0.8833, - 0.8667, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 + 0.2756, + 0.8411, + 0.8367, + 0.8654 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 13, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, - "completion_rate": 1.0, + "policy": "heuristic", + "task_type": "task3", + "seed": 9, + "steps": 40, + "score": 0.782, + "total_reward": 19.5499, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.84, + "trust_calibration": 0.837, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4565,35 +5623,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, - 0.8833, - 0.8667, + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 + 0.8456, + 0.8528 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 14, - "steps": 30, - "score": 0.7398, - "total_reward": 11.8376, - "completion_rate": 0.867, - "detection_rate": 1.0, - "trust_calibration": 0.841, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "heuristic", + "task_type": "task3", + "seed": 10, + "steps": 31, + "score": 0.712, + "total_reward": 17.8008, + "completion_rate": 0.85, + "detection_rate": 0.625, + "trust_calibration": 0.448, + "adversarial_detections": 5, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4606,33 +5673,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, 0.9167, - 0.9, - 0.2483, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.8576 + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.3281, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.3356, + 0.6281 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 15, - "steps": 30, - "score": 0.6973, - "total_reward": 11.1569, - "completion_rate": 0.8, + "policy": "heuristic", + "task_type": "task3", + "seed": 11, + "steps": 40, + "score": 0.7732, + "total_reward": 18.5566, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.834, + "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4647,33 +5723,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.2983, - 0.9167, - 0.9, - 0.8833, - 0.2317, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.2978, + 0.2933, + 0.8589, + 0.8544, 0.85, - 0.8333, - 0.8167, - 0.8, - 0.1483, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.8119 + 0.8456, + 0.8349 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 16, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, + "policy": "heuristic", + "task_type": "task3", + "seed": 12, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.84, + "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4688,74 +5772,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9256, + 0.9211, 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 17, - "steps": 30, - "score": 0.7823, - "total_reward": 12.517, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.9333, - 0.9167, - 0.265, - 0.8833, - 0.8667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.902 + 0.8456, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 18, - "steps": 30, - "score": 0.8246, - "total_reward": 13.1941, - "completion_rate": 1.0, + "policy": "heuristic", + "task_type": "task3", + "seed": 13, + "steps": 39, + "score": 0.833, + "total_reward": 18.3252, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.84, + "trust_calibration": 0.811, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4770,35 +5819,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, - 0.9167, - 0.9, + 0.9278, + 0.3456, + 0.3433, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, 0.8833, - 0.8667, - 0.85, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.9441 + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8485 ] }, { - "policy": "oracle_lite", - "task_type": "task2", - "seed": 19, - "steps": 30, - "score": 0.7823, - "total_reward": 12.517, - "completion_rate": 0.933, - "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "policy": "heuristic", + "task_type": "task3", + "seed": 14, + "steps": 29, + "score": 0.6889, + "total_reward": 17.9127, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.609, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4811,35 +5866,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9333, + 0.9278, + 0.9256, + 0.3763, + 0.3841, + 0.3689, 0.9167, - 0.9, - 0.8833, - 0.8667, - 0.215, - 0.8333, - 0.8167, - 0.8, - 0.7833, - 0.7667, - 0.75, - 0.7333, - 0.7167, - 0.902 + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6353 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 0, - "steps": 36, - "score": 0.6105, - "total_reward": 15.2622, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.884, - "adversarial_detections": 0, - "adversarial_poisonings": 1, + "seed": 15, + "steps": 30, + "score": 0.6847, + "total_reward": 18.4869, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.635, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4852,43 +5917,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, + 0.3608, + 0.9256, 0.9233, 0.9211, - 0.02, + 0.3459, + 0.9167, 0.9144, 0.9122, - 0.9078, - 0.3556, - 0.02, - 0.02, - 0.8467, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, 0.8922, - 0.8378, - 0.8833, - 0.9061, - 0.3067, - 0.02, - 0.8722, - 0.073, - 0.8306, - 0.9061, - 0.3397, - 0.3044, - 0.5035 + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.0841, + 0.9184, + 0.9139, + 0.9095, + 0.6404 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 1, - "steps": 40, - "score": 0.7205, - "total_reward": 18.0135, - "completion_rate": 0.9, + "seed": 16, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.662, - "adversarial_detections": 1, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -4902,42 +5969,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.8889, - 0.8822, - 0.91, - 0.8556, - 0.9011, - 0.3167, - 0.8622, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, 0.89, - 0.8356, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, 0.8633, - 0.3011, - 0.8444, - 0.8378, - 0.8656, - 0.8261, - 0.8589, - 0.8961, - 0.02, - 0.3, - 0.8106, - 0.2933, - 0.7916 + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 2, - "steps": 37, - "score": 0.7627, - "total_reward": 17.5411, - "completion_rate": 0.9, + "seed": 17, + "steps": 46, + "score": 0.8048, + "total_reward": 19.316, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.752, + "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4952,42 +6016,92 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3778, - 0.3203, + 0.9256, 0.9211, - 0.9189, - 0.8822, - 0.91, - 0.8556, - 0.8511, - 0.8967, - 0.8422, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, 0.89, - 0.8356, - 0.8311, + 0.8856, + 0.8811, 0.8767, - 0.2944, - 0.8972, - 0.87, - 0.8656, - 0.8789, - 0.02, 0.8722, - 0.8207 + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8605 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 3, - "steps": 36, - "score": 0.6303, - "total_reward": 16.3887, - "completion_rate": 0.75, + "seed": 18, + "steps": 26, + "score": 0.6967, + "total_reward": 16.7213, + "completion_rate": 0.85, + "detection_rate": 0.3333, + "trust_calibration": 0.701, + "adversarial_detections": 3, + "adversarial_poisonings": 6, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.6149 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 19, + "steps": 20, + "score": 0.6193, + "total_reward": 13.0053, + "completion_rate": 0.65, "detection_rate": 0.0, - "trust_calibration": 0.798, + "trust_calibration": 0.576, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 5, "status": "failed", "difficulty_profile": { "adaptive": false, @@ -5000,43 +6114,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, + 0.3478, + 0.9256, 0.9233, - 0.3411, - 0.8667, + 0.9211, + 0.9189, + 0.3437, 0.9144, - 0.9372, + 0.9122, 0.91, - 0.3278, - 0.3556, - 0.8511, + 0.9078, + 0.9056, + 0.9033, + 0.9011, 0.8989, - 0.2922, - 0.8878, - 0.3056, - 0.8489, - 0.8767, - 0.8922, - 0.333, - 0.2878, - 0.8283, - 0.8589, - 0.3297, - 0.3552, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, 0.01 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 4, - "steps": 41, - "score": 0.6894, - "total_reward": 18.6138, - "completion_rate": 0.85, + "seed": 20, + "steps": 46, + "score": 0.7498, + "total_reward": 19.4938, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.405, + "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5052,46 +6161,45 @@ }, "rewards": [ 0.9278, - 0.9506, - 0.9233, - 0.3711, - 0.8844, + 0.9256, + 0.3763, + 0.3841, + 0.9167, 0.9122, - 0.33, - 0.3186, - 0.9011, - 0.8667, - 0.86, - 0.8878, - 0.8833, - 0.8789, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, 0.8767, - 0.8994, - 0.3352, - 0.8678, - 0.3033, - 0.8239, - 0.8744, + 0.3022, 0.8678, - 0.2656, - 0.2933, - 0.2911, - 0.7076 + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8412 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 5, - "steps": 31, - "score": 0.6062, - "total_reward": 15.1538, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.816, + "seed": 21, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5103,45 +6211,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, + 0.9256, 0.9211, - 0.3367, - 0.3344, - 0.3322, - 0.8578, - 0.9306, + 0.9167, + 0.9122, + 0.9078, 0.9033, - 0.3211, - 0.8467, - 0.9194, - 0.32, - 0.8878, + 0.8989, + 0.8944, + 0.89, 0.8856, 0.8811, - 0.02, - 0.8394, - 0.0752, - 0.87, + 0.8767, + 0.8722, 0.8678, - 0.02, - 0.8883, - 0.01 + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8676 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 6, - "steps": 39, - "score": 0.6337, - "total_reward": 15.8429, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.872, - "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "seed": 22, + "steps": 36, + "score": 0.7334, + "total_reward": 21.2675, + "completion_rate": 1.0, + "detection_rate": 0.8, + "trust_calibration": 0.747, + "adversarial_detections": 4, + "adversarial_poisonings": 1, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5153,45 +6258,49 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.8889, + 0.3778, + 0.9256, + 0.9233, + 0.3481, + 0.9189, 0.9167, 0.9144, 0.9122, - 0.91, - 0.9006, - 0.8511, - 0.3197, - 0.9194, - 0.28, - 0.8556, - 0.8811, - 0.8789, - 0.8422, + 0.337, + 0.9078, + 0.3326, + 0.9033, + 0.9011, + 0.3259, + 0.8967, + 0.3214, + 0.8922, + 0.89, + 0.3148, 0.8856, - 0.3311, - 0.8589, - 0.0597, - 0.3222, - 0.27, - 0.8728, - 0.02, - 0.01 + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.9073, + 0.8789, + 0.8544, + 0.7968 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 7, - "steps": 32, - "score": 0.7179, - "total_reward": 15.793, + "seed": 23, + "steps": 28, + "score": 0.6922, + "total_reward": 17.3057, "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.869, - "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "detection_rate": 0.4444, + "trust_calibration": 0.645, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5203,42 +6312,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.8911, - 0.8867, + 0.9278, + 0.3686, + 0.3433, + 0.9211, + 0.9189, + 0.9167, 0.9144, - 0.86, + 0.9122, + 0.91, + 0.9078, 0.9056, 0.9033, + 0.9011, 0.8989, 0.8967, - 0.3444, - 0.89, - 0.8356, - 0.9083, - 0.02, - 0.8789, - 0.8744, - 0.87, - 0.8928, - 0.8633, - 0.3111, - 0.01 + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6298 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 8, - "steps": 31, - "score": 0.7087, - "total_reward": 16.3004, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.701, + "seed": 24, + "steps": 46, + "score": 0.7725, + "total_reward": 20.0838, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.836, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5250,40 +6362,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.3733, - 0.9211, - 0.8844, + 0.9278, + 0.3686, + 0.9483, + 0.3711, + 0.9167, 0.9122, - 0.91, - 0.3578, - 0.9056, - 0.9283, - 0.8667, + 0.9078, + 0.9033, + 0.8989, 0.8944, - 0.84, - 0.3456, - 0.8833, + 0.89, + 0.8856, 0.8811, - 0.8589, 0.8767, - 0.0774, - 0.835, - 0.8856, + 0.3022, + 0.8678, 0.8633, - 0.01 + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8591 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 9, - "steps": 37, - "score": 0.7151, - "total_reward": 17.877, - "completion_rate": 0.9, + "seed": 25, + "steps": 34, + "score": 0.6755, + "total_reward": 18.9148, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.558, + "trust_calibration": 0.71, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5299,41 +6414,98 @@ }, "rewards": [ 0.9528, - 0.9256, - 0.9233, - 0.8689, - 0.3597, - 0.8822, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.3189, + 0.3167, + 0.8944, + 0.3122, + 0.31, + 0.3078, + 0.8856, + 0.9011, + 0.8967, + 0.8922, + 0.3078, + 0.3033, + 0.8789, + 0.782 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 26, + "steps": 34, + "score": 0.6561, + "total_reward": 19.0282, + "completion_rate": 0.8, + "detection_rate": 0.6667, + "trust_calibration": 0.467, + "adversarial_detections": 2, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.8989, + 0.3667, + 0.3644, + 0.9372, 0.935, - 0.02, + 0.3578, 0.3556, - 0.9011, + 0.3533, + 0.3289, 0.8967, 0.8944, - 0.84, - 0.8356, - 0.8489, - 0.8244, - 0.835, - 0.3178, + 0.8922, + 0.317, + 0.8878, + 0.3126, + 0.8833, + 0.0841, + 0.9184, + 0.9139, + 0.8878, 0.8656, - 0.8261, - 0.8217, - 0.3044, - 0.85, - 0.7724 + 0.3163, + 0.3419, + 0.6213 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 10, - "steps": 38, - "score": 0.6037, - "total_reward": 17.5072, - "completion_rate": 0.75, + "seed": 27, + "steps": 46, + "score": 0.7256, + "total_reward": 20.3155, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.772, + "trust_calibration": 0.828, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5349,48 +6521,47 @@ }, "rewards": [ 0.9278, - 0.9233, - 0.9461, - 0.8844, - 0.3622, + 0.9256, + 0.3763, + 0.9211, + 0.9189, + 0.3997, + 0.3994, + 0.3972, 0.9078, - 0.02, - 0.3233, - 0.9261, - 0.8967, + 0.9033, + 0.3289, 0.8944, - 0.86, - 0.3378, - 0.02, - 0.3263, + 0.89, + 0.3156, 0.8811, - 0.3289, - 0.2967, - 0.8994, + 0.8767, 0.8722, 0.8678, - 0.3386, - 0.3463, - 0.02, - 0.3089, - 0.8544, - 0.355, - 0.7709 + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8322, + 0.8395 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 11, - "steps": 32, - "score": 0.5881, - "total_reward": 14.703, - "completion_rate": 0.7, - "detection_rate": 0.3333, - "trust_calibration": 0.743, - "adversarial_detections": 1, - "adversarial_poisonings": 2, - "status": "failed", + "seed": 28, + "steps": 28, + "score": 0.7355, + "total_reward": 17.6509, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.725, + "adversarial_detections": 5, + "adversarial_poisonings": 5, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5402,45 +6573,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, + 0.9278, + 0.9256, 0.9233, 0.9211, - 0.3459, - 0.8844, - 0.3622, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, 0.9078, - 0.9306, - 0.02, - 0.8689, - 0.8967, - 0.8944, - 0.8722, - 0.8878, - 0.8856, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, 0.9228, - 0.0819, - 0.2967, - 0.3244, - 0.8722, - 0.02, - 0.3356, - 0.3011, - 0.01 + 0.9184, + 0.9139, + 0.6841 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 12, - "steps": 36, - "score": 0.6249, - "total_reward": 14.9974, - "completion_rate": 0.75, - "detection_rate": 0.3333, - "trust_calibration": 0.716, - "adversarial_detections": 1, - "adversarial_poisonings": 2, - "status": "failed", + "seed": 29, + "steps": 28, + "score": 0.7258, + "total_reward": 18.144, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.709, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5452,42 +6622,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.8933, - 0.3411, + 0.9278, + 0.9256, + 0.9233, + 0.9211, 0.9189, - 0.8644, - 0.9122, - 0.8756, - 0.02, - 0.3441, - 0.3189, - 0.3167, - 0.8622, - 0.8878, - 0.8511, - 0.8789, - 0.8244, - 0.8878, - 0.2856, - 0.0663, - 0.8589, - 0.8961, - 0.8772, - 0.01 + 0.9167, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.3281, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6801 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 13, - "steps": 38, - "score": 0.7872, - "total_reward": 18.1053, - "completion_rate": 0.95, + "seed": 30, + "steps": 39, + "score": 0.6238, + "total_reward": 18.7149, + "completion_rate": 0.75, "detection_rate": 1.0, - "trust_calibration": 0.64, - "adversarial_detections": 1, + "trust_calibration": 0.712, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -5502,42 +6673,49 @@ }, "rewards": [ 0.9278, - 0.8911, - 0.8667, - 0.9122, - 0.3278, - 0.9056, - 0.8689, - 0.02, - 0.8622, - 0.8378, - 0.8533, - 0.8289, - 0.8767, + 0.9256, + 0.9233, + 0.3411, + 0.9189, + 0.9167, + 0.9144, + 0.3322, + 0.33, + 0.9078, + 0.3256, + 0.3233, + 0.9011, + 0.8989, + 0.8967, + 0.3144, + 0.8922, + 0.31, + 0.3078, + 0.3233, + 0.8989, + 0.8944, + 0.89, + 0.3056, + 0.3011, + 0.2967, 0.8722, - 0.895, - 0.8856, - 0.8633, - 0.8611, - 0.9017, - 0.8939, - 0.3, - 0.8081 + 0.2878, + 0.7538 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 14, - "steps": 35, - "score": 0.6988, - "total_reward": 18.1679, - "completion_rate": 0.95, - "detection_rate": 0.0, - "trust_calibration": 0.661, - "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "seed": 31, + "steps": 37, + "score": 0.6915, + "total_reward": 20.0529, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.911, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5549,45 +6727,48 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8733, - 0.8889, - 0.8844, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, 0.3322, 0.91, - 0.9078, + 0.3278, 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8622, + 0.3697, + 0.3774, + 0.3772, 0.89, - 0.3408, - 0.8856, - 0.8833, - 0.3011, - 0.8267, - 0.8372, + 0.3728, + 0.3706, + 0.9228, + 0.8944, 0.87, - 0.0708, - 0.8833, - 0.2811, - 0.8839, - 0.8544, - 0.01 + 0.8656, + 0.2911, + 0.8567, + 0.8522, + 0.843 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 15, - "steps": 38, - "score": 0.68, - "total_reward": 19.0388, - "completion_rate": 0.85, - "detection_rate": 1.0, - "trust_calibration": 0.774, - "adversarial_detections": 0, - "adversarial_poisonings": 0, + "seed": 32, + "steps": 30, + "score": 0.689, + "total_reward": 17.9142, + "completion_rate": 0.8, + "detection_rate": 0.6, + "trust_calibration": 0.712, + "adversarial_detections": 3, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5600,48 +6781,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3548, - 0.9256, - 0.9233, - 0.8689, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, 0.3667, - 0.3644, - 0.8922, - 0.935, - 0.8556, - 0.3533, - 0.3211, - 0.3189, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.3326, + 0.3303, + 0.9011, + 0.8989, + 0.8967, 0.8944, - 0.89, - 0.3378, - 0.8333, - 0.8789, - 0.8767, - 0.8744, - 0.2922, - 0.333, - 0.8656, - 0.8261, - 0.8567, - 0.3, - 0.8478, - 0.8066 + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.8922, + 0.87, + 0.3208, + 0.6714 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 16, - "steps": 33, - "score": 0.5966, - "total_reward": 15.511, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.691, - "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "seed": 33, + "steps": 31, + "score": 0.7464, + "total_reward": 18.6604, + "completion_rate": 0.9, + "detection_rate": 0.6667, + "trust_calibration": 0.845, + "adversarial_detections": 4, + "adversarial_poisonings": 2, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5658,40 +6837,87 @@ 0.9233, 0.9211, 0.9189, - 0.3044, - 0.02, - 0.935, - 0.02, - 0.3256, - 0.8711, - 0.8967, - 0.3444, - 0.02, + 0.3437, + 0.3414, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.8856, + 0.7574 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 34, + "steps": 40, + "score": 0.8105, + "total_reward": 18.6408, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.838, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3433, + 0.3411, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, 0.89, - 0.8556, - 0.8833, - 0.3311, - 0.3289, - 0.8744, - 0.8878, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, 0.8633, - 0.3141, 0.8589, - 0.01 + 0.8544, + 0.85, + 0.8456, + 0.853 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 17, - "steps": 35, - "score": 0.5891, - "total_reward": 16.4939, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.795, - "adversarial_detections": 0, - "adversarial_poisonings": 1, + "seed": 35, + "steps": 28, + "score": 0.7257, + "total_reward": 18.1436, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.708, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5705,44 +6931,41 @@ }, "rewards": [ 0.9278, - 0.02, - 0.3433, - 0.3319, - 0.8644, - 0.3622, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3437, + 0.9144, + 0.9122, 0.91, - 0.3286, + 0.3348, + 0.9056, 0.9033, 0.9011, - 0.8789, - 0.3697, - 0.9194, - 0.84, - 0.8856, - 0.8811, - 0.8789, - 0.3597, - 0.2692, - 0.87, - 0.2878, - 0.8656, - 0.0663, - 0.8239, - 0.8817, - 0.02, - 0.4835 + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6797 ] }, { - "policy": "random", + "policy": "heuristic", "task_type": "task3", - "seed": 18, - "steps": 35, - "score": 0.6548, - "total_reward": 16.3705, - "completion_rate": 0.75, + "seed": 36, + "steps": 42, + "score": 0.7886, + "total_reward": 18.9271, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.573, + "trust_calibration": 0.854, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5758,44 +6981,6038 @@ }, "rewards": [ 0.9278, - 0.9233, - 0.02, + 0.3686, + 0.3433, + 0.3481, 0.9167, - 0.3644, - 0.3622, - 0.91, - 0.8556, + 0.9122, + 0.9078, 0.9033, - 0.8689, - 0.8967, - 0.8922, - 0.915, - 0.02, - 0.8533, - 0.8789, - 0.2967, - 0.3422, - 0.3078, - 0.8656, - 0.8611, - 0.2789, - 0.3297, - 0.7281 + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8527 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 37, + "steps": 28, + "score": 0.6822, + "total_reward": 17.056, + "completion_rate": 0.85, + "detection_rate": 0.4, + "trust_calibration": 0.659, + "adversarial_detections": 4, + "adversarial_poisonings": 6, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.62 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 38, + "steps": 43, + "score": 0.8297, + "total_reward": 19.0823, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.774, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9233, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8433, + 0.8389, + 0.8479 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 39, + "steps": 28, + "score": 0.7161, + "total_reward": 17.9036, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.71, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6637 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 40, + "steps": 37, + "score": 0.6368, + "total_reward": 19.7412, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.678, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.9122, + 0.33, + 0.9078, + 0.9056, + 0.3233, + 0.9011, + 0.3189, + 0.3167, + 0.8944, + 0.3122, + 0.89, + 0.3078, + 0.8856, + 0.3033, + 0.8811, + 0.8967, + 0.3122, + 0.3078, + 0.8833, + 0.2989, + 0.2944, + 0.87, + 0.7673 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 41, + "steps": 38, + "score": 0.6416, + "total_reward": 20.5321, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.732, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3411, + 0.9189, + 0.3367, + 0.9144, + 0.9122, + 0.33, + 0.9078, + 0.3256, + 0.3233, + 0.3211, + 0.8989, + 0.8967, + 0.8944, + 0.3122, + 0.89, + 0.3078, + 0.3056, + 0.8833, + 0.3189, + 0.8944, + 0.31, + 0.3056, + 0.8811, + 0.8589, + 0.3097, + 0.8722, + 0.323, + 0.7961 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 42, + "steps": 31, + "score": 0.6759, + "total_reward": 17.5723, + "completion_rate": 0.85, + "detection_rate": 0.4444, + "trust_calibration": 0.442, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.3414, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.3056, + 0.5724 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 43, + "steps": 32, + "score": 0.6896, + "total_reward": 17.9304, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.681, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3456, + 0.9233, + 0.9211, + 0.3719, + 0.3797, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.3356, + 0.6467 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 44, + "steps": 38, + "score": 0.6652, + "total_reward": 19.2895, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.721, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.8967, + 0.9394, + 0.9372, + 0.36, + 0.3578, + 0.3556, + 0.9283, + 0.3511, + 0.3489, + 0.3167, + 0.8944, + 0.8922, + 0.343, + 0.3508, + 0.9011, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.7757 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 45, + "steps": 40, + "score": 0.8827, + "total_reward": 18.5376, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.839, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8709 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 46, + "steps": 40, + "score": 0.6245, + "total_reward": 18.7359, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.429, + "adversarial_detections": 3, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9506, + 0.9483, + 0.9461, + 0.3689, + 0.3667, + 0.3644, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3763, + 0.3841, + 0.3839, + 0.3817, + 0.8944, + 0.3192, + 0.3178, + 0.8856, + 0.9228, + 0.9184, + 0.3422, + 0.3728, + 0.3683, + 0.9006, + 0.8994, + 0.85, + 0.3156, + 0.6634 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 47, + "steps": 29, + "score": 0.7165, + "total_reward": 17.9132, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.722, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6644 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 48, + "steps": 28, + "score": 0.7257, + "total_reward": 18.1437, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.708, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6798 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 49, + "steps": 28, + "score": 0.7355, + "total_reward": 17.6512, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.726, + "adversarial_detections": 5, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.6844 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 50, + "steps": 42, + "score": 0.717, + "total_reward": 18.643, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.82, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3919, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.28, + 0.8456, + 0.8411, + 0.8092 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 51, + "steps": 34, + "score": 0.7274, + "total_reward": 19.6391, + "completion_rate": 0.95, + "detection_rate": 0.625, + "trust_calibration": 0.682, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.3414, + 0.9122, + 0.337, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.9073, + 0.3311, + 0.715 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 52, + "steps": 46, + "score": 0.5965, + "total_reward": 19.0884, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.686, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.9256, + 0.3433, + 0.9211, + 0.3619, + 0.9144, + 0.34, + 0.3578, + 0.3333, + 0.9011, + 0.8989, + 0.8967, + 0.3774, + 0.8922, + 0.375, + 0.8878, + 0.3706, + 0.3683, + 0.3661, + 0.3639, + 0.2967, + 0.3594, + 0.3572, + 0.355, + 0.8656, + 0.3089, + 0.8722, + 0.8656, + 0.8589, + 0.8522, + 0.7165 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 53, + "steps": 44, + "score": 0.8286, + "total_reward": 19.0588, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8632 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 54, + "steps": 35, + "score": 0.6607, + "total_reward": 18.4987, + "completion_rate": 0.8, + "detection_rate": 0.75, + "trust_calibration": 0.358, + "adversarial_detections": 6, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9233, + 0.9211, + 0.9189, + 0.3797, + 0.3874, + 0.3952, + 0.36, + 0.9078, + 0.3326, + 0.3233, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.89, + 0.8878, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.9073, + 0.3661, + 0.8984, + 0.6167 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 55, + "steps": 29, + "score": 0.728, + "total_reward": 18.1995, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.713, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.3756, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6787 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 56, + "steps": 37, + "score": 0.6768, + "total_reward": 20.9793, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.796, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.9506, + 0.9483, + 0.9461, + 0.3689, + 0.3667, + 0.3644, + 0.9122, + 0.91, + 0.9078, + 0.3256, + 0.3233, + 0.9011, + 0.3189, + 0.8967, + 0.3144, + 0.8922, + 0.89, + 0.8878, + 0.8856, + 0.3033, + 0.8989, + 0.2967, + 0.8922, + 0.8878, + 0.2856, + 0.8811, + 0.2967, + 0.8722, + 0.8317 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 57, + "steps": 38, + "score": 0.6683, + "total_reward": 20.0487, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.771, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3389, + 0.9167, + 0.3344, + 0.3322, + 0.33, + 0.9078, + 0.3256, + 0.9033, + 0.3211, + 0.8989, + 0.3167, + 0.3144, + 0.8922, + 0.89, + 0.8878, + 0.3486, + 0.3563, + 0.3819, + 0.9161, + 0.34, + 0.8833, + 0.8589, + 0.8544, + 0.85, + 0.8058 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 58, + "steps": 29, + "score": 0.7278, + "total_reward": 18.1944, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.731, + "adversarial_detections": 5, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3481, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6999 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 59, + "steps": 43, + "score": 0.7547, + "total_reward": 19.6215, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.735, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3526, + 0.9233, + 0.9211, + 0.3719, + 0.3797, + 0.3644, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.8656, + 0.2911, + 0.8567, + 0.8522, + 0.8478, + 0.8433, + 0.8389, + 0.8208 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 60, + "steps": 36, + "score": 0.7716, + "total_reward": 18.5184, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.836, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3411, + 0.9189, + 0.9167, + 0.3344, + 0.3322, + 0.33, + 0.3278, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8439 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 61, + "steps": 42, + "score": 0.7616, + "total_reward": 19.8021, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.836, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.2711, + 0.8483 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 62, + "steps": 43, + "score": 0.7892, + "total_reward": 18.9402, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.925, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.9506, + 0.3733, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.3222, + 0.3178, + 0.8833, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8433, + 0.8389, + 0.8682 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 63, + "steps": 44, + "score": 0.8031, + "total_reward": 18.4705, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.841, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.2711, + 0.8367, + 0.8449 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 64, + "steps": 37, + "score": 0.7634, + "total_reward": 19.0848, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.76, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.33, + 0.9328, + 0.3556, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8227 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 65, + "steps": 40, + "score": 0.797, + "total_reward": 19.1287, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.838, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.3919, + 0.3997, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.853 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 66, + "steps": 29, + "score": 0.7165, + "total_reward": 17.9135, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.723, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6647 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 67, + "steps": 32, + "score": 0.6707, + "total_reward": 18.1095, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.44, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3503, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.3333, + 0.5864 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 68, + "steps": 42, + "score": 0.7575, + "total_reward": 19.6962, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.641, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.9233, + 0.3741, + 0.9189, + 0.3897, + 0.3974, + 0.3622, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.7994 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 69, + "steps": 30, + "score": 0.6917, + "total_reward": 17.9836, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.658, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9506, + 0.9483, + 0.9461, + 0.3689, + 0.3667, + 0.3644, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.6454 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 70, + "steps": 27, + "score": 0.7, + "total_reward": 19.5996, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.716, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3919, + 0.9167, + 0.3994, + 0.3972, + 0.395, + 0.3928, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.3144, + 0.8922, + 0.89, + 0.8878, + 0.8856, + 0.3033, + 0.8811, + 0.8789, + 0.2967, + 0.8744, + 0.2922, + 0.8166 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 71, + "steps": 29, + "score": 0.6808, + "total_reward": 17.7021, + "completion_rate": 0.85, + "detection_rate": 0.375, + "trust_calibration": 0.687, + "adversarial_detections": 3, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.3919, + 0.3997, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6174 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 72, + "steps": 34, + "score": 0.6937, + "total_reward": 19.4234, + "completion_rate": 0.9, + "detection_rate": 0.5714, + "trust_calibration": 0.723, + "adversarial_detections": 4, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.3919, + 0.3997, + 0.3644, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.8833, + 0.2811, + 0.6916 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 73, + "steps": 26, + "score": 0.6967, + "total_reward": 16.7204, + "completion_rate": 0.85, + "detection_rate": 0.3333, + "trust_calibration": 0.697, + "adversarial_detections": 3, + "adversarial_poisonings": 6, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.614 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 74, + "steps": 28, + "score": 0.675, + "total_reward": 18.8991, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.806, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3414, + 0.9122, + 0.91, + 0.9078, + 0.3256, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.31, + 0.8878, + 0.3056, + 0.8833, + 0.8811, + 0.2989, + 0.2967, + 0.8744, + 0.2922, + 0.8193 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 75, + "steps": 40, + "score": 0.7977, + "total_reward": 19.1445, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.837, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8529 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 76, + "steps": 42, + "score": 0.8335, + "total_reward": 19.1712, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.842, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3686, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8671 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 77, + "steps": 29, + "score": 0.6992, + "total_reward": 18.1794, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.645, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.3259, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6451 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 78, + "steps": 28, + "score": 0.7161, + "total_reward": 17.9032, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.709, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3503, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6633 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 79, + "steps": 44, + "score": 0.7563, + "total_reward": 19.6626, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.829, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8421 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 80, + "steps": 46, + "score": 0.7065, + "total_reward": 16.9564, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.803, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.3111, + 0.3067, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8322, + 0.7808 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 81, + "steps": 44, + "score": 0.8031, + "total_reward": 18.4703, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.2711, + 0.8367, + 0.8447 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 82, + "steps": 44, + "score": 0.8286, + "total_reward": 19.0588, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8632 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 83, + "steps": 33, + "score": 0.7082, + "total_reward": 18.4133, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.826, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3456, + 0.9483, + 0.9461, + 0.8989, + 0.3667, + 0.3644, + 0.9372, + 0.935, + 0.8878, + 0.9306, + 0.3533, + 0.3511, + 0.3489, + 0.3467, + 0.9194, + 0.3422, + 0.34, + 0.9033, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8133 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 84, + "steps": 29, + "score": 0.71, + "total_reward": 18.4609, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.716, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3686, + 0.9233, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.3214, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6795 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 85, + "steps": 29, + "score": 0.6923, + "total_reward": 17.999, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.718, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6627 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 86, + "steps": 33, + "score": 0.6856, + "total_reward": 18.5122, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.605, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.3663, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.3333, + 0.6263 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 87, + "steps": 27, + "score": 0.6991, + "total_reward": 16.7786, + "completion_rate": 0.85, + "detection_rate": 0.4, + "trust_calibration": 0.727, + "adversarial_detections": 4, + "adversarial_poisonings": 6, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6393 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 88, + "steps": 42, + "score": 0.7762, + "total_reward": 19.4062, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8479 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 89, + "steps": 29, + "score": 0.7259, + "total_reward": 18.1473, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.696, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3503, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6745 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 90, + "steps": 28, + "score": 0.7161, + "total_reward": 17.9035, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.71, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6635 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 91, + "steps": 29, + "score": 0.7262, + "total_reward": 18.1541, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.723, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.3259, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6813 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 92, + "steps": 38, + "score": 0.7692, + "total_reward": 18.4611, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.842, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3389, + 0.3367, + 0.3344, + 0.3322, + 0.9078, + 0.9033, + 0.8989, + 0.3244, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8411 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 93, + "steps": 45, + "score": 0.6795, + "total_reward": 19.7049, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.749, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3897, + 0.9144, + 0.3972, + 0.395, + 0.9078, + 0.3906, + 0.3883, + 0.3861, + 0.8967, + 0.3222, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8789, + 0.8722, + 0.8478, + 0.8433, + 0.8389, + 0.2644, + 0.7847 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 94, + "steps": 32, + "score": 0.6725, + "total_reward": 18.1568, + "completion_rate": 0.8, + "detection_rate": 0.3333, + "trust_calibration": 0.85, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.8989, + 0.3667, + 0.3644, + 0.9372, + 0.935, + 0.3578, + 0.8856, + 0.9283, + 0.3511, + 0.3489, + 0.3467, + 0.3444, + 0.8922, + 0.89, + 0.8878, + 0.0886, + 0.0863, + 0.9206, + 0.8922, + 0.8678, + 0.8633, + 0.6213 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 95, + "steps": 40, + "score": 0.7977, + "total_reward": 19.1447, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.838, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.3733, + 0.9461, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.853 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 96, + "steps": 31, + "score": 0.6978, + "total_reward": 17.4444, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.726, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.9189, + 0.9167, + 0.3344, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.9073, + 0.66 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 97, + "steps": 38, + "score": 0.8207, + "total_reward": 18.8758, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.829, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8552 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 98, + "steps": 29, + "score": 0.7165, + "total_reward": 17.9132, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.722, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6644 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 99, + "steps": 38, + "score": 0.7037, + "total_reward": 21.1121, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.723, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.3389, + 0.9167, + 0.9144, + 0.3322, + 0.91, + 0.3278, + 0.9056, + 0.9033, + 0.3211, + 0.8989, + 0.3167, + 0.8944, + 0.8922, + 0.31, + 0.8878, + 0.3056, + 0.3211, + 0.8967, + 0.3122, + 0.8878, + 0.3033, + 0.8789, + 0.8744, + 0.87, + 0.8287 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 0, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8904 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 1, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 2, + "steps": 42, + "score": 0.8422, + "total_reward": 18.5276, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8724 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 3, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 4, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1154, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 5, + "steps": 46, + "score": 0.7916, + "total_reward": 18.9976, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.917, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8618 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 6, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8523, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 7, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 8, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3315, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8857 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 9, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 10, + "steps": 42, + "score": 0.8421, + "total_reward": 18.5263, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.928, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.3022, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.871 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 11, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 12, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 13, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 14, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8853 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 15, + "steps": 46, + "score": 0.8162, + "total_reward": 19.5883, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.93, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8825 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 16, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8523, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 17, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8903 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 18, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 19, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 20, + "steps": 46, + "score": 0.7653, + "total_reward": 18.3663, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.909, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8423 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 21, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 22, + "steps": 46, + "score": 0.7652, + "total_reward": 18.3659, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.908, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8419 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 23, + "steps": 42, + "score": 0.8403, + "total_reward": 18.4862, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8727 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 24, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8854 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 25, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3314, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.3244, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8857 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 26, + "steps": 42, + "score": 0.8403, + "total_reward": 18.4855, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.3022, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.872 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 27, + "steps": 46, + "score": 0.8179, + "total_reward": 19.6285, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.924, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.881 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 28, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 29, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.3244, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8854 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 30, + "steps": 44, + "score": 0.8148, + "total_reward": 18.741, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.922, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.3067, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8653 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 31, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 32, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8851 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 33, + "steps": 46, + "score": 0.8178, + "total_reward": 19.6279, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.922, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8804 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 34, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8904 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 35, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3313, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8855 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 36, + "steps": 44, + "score": 0.8167, + "total_reward": 18.7838, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.927, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.3467, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8664 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 37, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 38, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 39, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 40, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8904 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 41, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8904 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 42, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 43, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 44, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 45, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 46, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1154, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.3244, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 47, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 48, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3312, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8855 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 49, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 50, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.885 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 51, + "steps": 46, + "score": 0.8178, + "total_reward": 19.6279, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.922, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8804 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 52, + "steps": 46, + "score": 0.7899, + "total_reward": 18.9582, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.926, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.3289, + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8641 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 53, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3723, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.93, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8848 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 54, + "steps": 46, + "score": 0.7916, + "total_reward": 18.9976, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.918, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8619 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 55, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8851 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 56, + "steps": 46, + "score": 0.7899, + "total_reward": 18.957, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.922, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8629 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 57, + "steps": 46, + "score": 0.8179, + "total_reward": 19.6284, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.924, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.881 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 58, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.885 ] }, { - "policy": "random", + "policy": "oracle_lite", "task_type": "task3", - "seed": 19, - "steps": 38, - "score": 0.6912, - "total_reward": 17.2799, - "completion_rate": 0.9, - "detection_rate": 0.0, - "trust_calibration": 0.834, - "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "seed": 59, + "steps": 42, + "score": 0.8403, + "total_reward": 18.4861, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -5807,43 +13024,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3156, - 0.9211, - 0.9189, - 0.9144, + 0.9256, + 0.3511, + 0.3467, 0.9122, - 0.33, - 0.02, - 0.9133, - 0.9261, - 0.8967, - 0.8422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, 0.89, - 0.8356, - 0.8511, - 0.8789, - 0.8922, - 0.87, - 0.3178, + 0.8856, 0.8811, - 0.8589, - 0.8544, - 0.87, - 0.3108, - 0.01 + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8726 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 0, - "steps": 43, - "score": 0.7114, - "total_reward": 18.4969, - "completion_rate": 0.85, + "seed": 60, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.729, - "adversarial_detections": 0, + "trust_calibration": 0.931, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -5857,45 +13071,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.3344, - 0.91, - 0.9056, - 0.9011, - 0.3267, - 0.8922, - 0.8878, - 0.8833, - 0.8789, - 0.8744, - 0.3, - 0.2956, - 0.8611, - 0.2867, - 0.8522, - 0.8478, - 0.8433, - 0.8389, - 0.7841 + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 1, - "steps": 29, - "score": 0.7083, - "total_reward": 17.707, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.721, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "seed": 61, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5908,44 +13117,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, + 0.9256, + 0.9211, + 0.9167, 0.9122, - 0.91, 0.9078, - 0.9056, 0.9033, - 0.9011, 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6632 + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 2, - "steps": 29, - "score": 0.6919, - "total_reward": 17.2983, - "completion_rate": 0.85, - "detection_rate": 0.4444, - "trust_calibration": 0.561, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "seed": 62, + "steps": 46, + "score": 0.7933, + "total_reward": 19.0389, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.916, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5958,43 +13163,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3778, - 0.3456, - 0.9233, + 0.3556, 0.9211, - 0.9189, - 0.9167, - 0.9144, + 0.3467, 0.9122, - 0.91, 0.9078, - 0.9056, 0.9033, - 0.9011, 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6065 + 0.8944, + 0.89, + 0.3156, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8615 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 3, + "seed": 63, "steps": 42, - "score": 0.8546, - "total_reward": 18.8008, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.843, - "adversarial_detections": 0, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6010,7 +13214,7 @@ "rewards": [ 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, 0.9078, 0.9033, @@ -6018,31 +13222,31 @@ 0.8944, 0.89, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8675 + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 4, - "steps": 29, - "score": 0.7165, - "total_reward": 17.9128, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.721, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "seed": 64, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6055,43 +13259,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, 0.9256, - 0.9233, 0.9211, - 0.9189, 0.9167, - 0.3414, - 0.9122, - 0.91, + 0.3422, 0.9078, - 0.9056, - 0.9033, - 0.9011, + 0.3333, 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.664 + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8851 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 5, - "steps": 46, - "score": 0.7558, - "total_reward": 18.1385, - "completion_rate": 0.9, + "seed": 65, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.832, - "adversarial_detections": 0, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6108,40 +13310,38 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.32, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.2711, - 0.8367, - 0.8322, - 0.8229 + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 6, - "steps": 27, - "score": 0.6991, - "total_reward": 16.778, - "completion_rate": 0.85, - "detection_rate": 0.4, - "trust_calibration": 0.725, - "adversarial_detections": 4, - "adversarial_poisonings": 6, + "seed": 66, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6154,42 +13354,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, 0.9256, - 0.9233, 0.9211, - 0.9189, 0.9167, - 0.9144, 0.9122, - 0.91, 0.9078, - 0.9056, 0.9033, - 0.9011, - 0.1019, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.6387 + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 7, - "steps": 42, - "score": 0.7756, - "total_reward": 19.3902, - "completion_rate": 0.95, + "seed": 67, + "steps": 46, + "score": 0.8179, + "total_reward": 19.6293, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.835, - "adversarial_detections": 0, + "trust_calibration": 0.927, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6203,43 +13401,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, 0.9256, - 0.3763, 0.9211, - 0.3919, - 0.3997, + 0.3467, 0.9122, - 0.9078, + 0.3378, 0.9033, 0.8989, - 0.8944, + 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.2933, + 0.8633, 0.8589, - 0.8544, - 0.28, - 0.8456, - 0.8411, - 0.8478 + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8818 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 8, - "steps": 44, - "score": 0.809, - "total_reward": 19.4157, + "seed": 68, + "steps": 46, + "score": 0.8162, + "total_reward": 19.5881, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.853, - "adversarial_detections": 0, + "trust_calibration": 0.929, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6253,42 +13450,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3526, - 0.9211, + 0.9256, + 0.3511, 0.9167, - 0.9122, + 0.3422, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, - 0.85, - 0.2756, - 0.8411, - 0.8367, - 0.8654 + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8823 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 9, - "steps": 40, - "score": 0.782, - "total_reward": 19.5499, - "completion_rate": 0.95, + "seed": 69, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.837, - "adversarial_detections": 0, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6302,16 +13499,12 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, 0.9256, - 0.9233, - 0.3841, - 0.9189, - 0.3997, - 0.3994, - 0.3972, + 0.9211, + 0.9167, + 0.9122, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, 0.89, @@ -6320,26 +13513,27 @@ 0.8767, 0.8722, 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.8528 + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 10, - "steps": 31, - "score": 0.712, - "total_reward": 17.8008, - "completion_rate": 0.85, - "detection_rate": 0.625, - "trust_calibration": 0.448, - "adversarial_detections": 5, - "adversarial_poisonings": 3, + "seed": 70, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3728, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6352,43 +13546,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, + 0.3556, 0.9211, - 0.9189, 0.9167, - 0.9144, 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3303, - 0.3281, + 0.3378, + 0.9033, 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.9295, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.3356, - 0.6281 + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8854 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 11, - "steps": 40, - "score": 0.7732, - "total_reward": 18.5566, - "completion_rate": 0.9, + "seed": 71, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3313, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.835, - "adversarial_detections": 0, + "trust_calibration": 0.933, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6402,15 +13594,12 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9122, + 0.9256, + 0.9211, + 0.9167, + 0.3422, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, 0.89, @@ -6418,26 +13607,28 @@ 0.8811, 0.8767, 0.8722, - 0.2978, - 0.2933, + 0.8678, + 0.8633, 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.8349 + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8856 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 12, - "steps": 42, - "score": 0.8546, - "total_reward": 18.8009, + "seed": 72, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3724, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, + "trust_calibration": 0.931, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6453,38 +13644,39 @@ "rewards": [ 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8676 + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8849 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 13, - "steps": 39, - "score": 0.833, - "total_reward": 18.3252, - "completion_rate": 0.95, + "seed": 73, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.811, - "adversarial_detections": 0, + "trust_calibration": 0.932, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6498,41 +13690,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3456, - 0.3433, - 0.9189, - 0.9144, - 0.91, - 0.9056, - 0.9011, - 0.8967, - 0.8922, - 0.8878, - 0.8833, - 0.8789, - 0.8744, - 0.87, - 0.8656, - 0.8611, - 0.8567, - 0.8522, - 0.8478, - 0.8485 + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 14, - "steps": 29, - "score": 0.6889, - "total_reward": 17.9127, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.609, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "seed": 74, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3724, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6545,45 +13736,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, 0.9256, - 0.3763, - 0.3841, - 0.3689, + 0.9211, 0.9167, - 0.9144, 0.9122, - 0.91, 0.9078, - 0.9056, 0.9033, - 0.9011, - 0.8989, - 0.3237, + 0.3289, 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.6353 + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8849 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 15, - "steps": 30, - "score": 0.6847, - "total_reward": 18.4869, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.635, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "seed": 75, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6596,45 +13784,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3608, 0.9256, - 0.9233, 0.9211, - 0.3459, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.3348, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.0841, - 0.9184, - 0.9139, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, 0.9095, - 0.6404 + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 16, + "seed": 76, "steps": 42, - "score": 0.8546, - "total_reward": 18.8009, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.844, - "adversarial_detections": 0, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6649,7 +13831,7 @@ }, "rewards": [ 0.9256, - 0.9211, + 0.3511, 0.9167, 0.9122, 0.9078, @@ -6662,26 +13844,26 @@ 0.8767, 0.8722, 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8676 + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 17, - "steps": 46, - "score": 0.8048, - "total_reward": 19.316, + "seed": 77, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.842, - "adversarial_detections": 0, + "trust_calibration": 0.935, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6697,7 +13879,7 @@ "rewards": [ 0.9256, 0.9211, - 0.3467, + 0.9167, 0.9122, 0.9078, 0.9033, @@ -6708,30 +13890,28 @@ 0.8811, 0.8767, 0.8722, - 0.8678, + 0.2978, 0.8633, - 0.2889, - 0.8544, - 0.28, - 0.8456, - 0.8411, - 0.8367, - 0.8322, - 0.8605 + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8904 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 18, - "steps": 26, - "score": 0.6967, - "total_reward": 16.7213, - "completion_rate": 0.85, - "detection_rate": 0.3333, - "trust_calibration": 0.701, - "adversarial_detections": 3, - "adversarial_poisonings": 6, + "seed": 78, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6744,44 +13924,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, 0.9256, - 0.9233, 0.9211, - 0.9189, - 0.9167, - 0.9144, + 0.3467, 0.9122, - 0.91, 0.9078, - 0.9056, 0.9033, - 0.9011, 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.6149 + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8903 ] }, { - "policy": "heuristic", + "policy": "oracle_lite", "task_type": "task3", - "seed": 19, - "steps": 20, - "score": 0.6193, - "total_reward": 13.0053, - "completion_rate": 0.65, - "detection_rate": 0.0, - "trust_calibration": 0.576, - "adversarial_detections": 0, - "adversarial_poisonings": 5, - "status": "failed", + "seed": 79, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -6793,39 +13971,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, 0.9256, - 0.9233, 0.9211, - 0.9189, - 0.3437, - 0.9144, + 0.9167, 0.9122, - 0.91, 0.9078, - 0.9056, 0.9033, - 0.9011, 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.01 + 0.8944, + 0.32, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 0, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, + "seed": 80, + "steps": 46, + "score": 0.7652, + "total_reward": 18.3653, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, + "trust_calibration": 0.905, + "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6842,37 +14021,39 @@ 0.9256, 0.9211, 0.9167, - 0.9122, + 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.3111, - 0.8767, + 0.3067, 0.8722, 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, + 0.2933, + 0.8589, + 0.8544, + 0.85, 0.8873, 0.8828, - 0.8904 + 0.8784, + 0.8739, + 0.8412 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 1, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, + "seed": 81, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.935, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6895,29 +14076,30 @@ 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, - 0.9095, - 0.905, + 0.8678, + 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8938 + 0.8828, + 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 2, - "steps": 42, - "score": 0.8422, - "total_reward": 18.5276, - "completion_rate": 0.95, + "seed": 82, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.933, + "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -6932,39 +14114,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.3511, + 0.9256, + 0.9211, 0.9167, - 0.9122, + 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, - 0.89, + 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.905, + 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, - 0.8724 + 0.8784, + 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 3, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, + "seed": 83, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, + "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -6989,30 +14172,29 @@ 0.8944, 0.89, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, - 0.8678, + 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8901 + 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 4, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1154, + "seed": 84, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, - "adversarial_detections": 7, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7027,12 +14209,12 @@ }, "rewards": [ 0.9256, - 0.9211, + 0.3511, 0.9167, 0.9122, - 0.9078, + 0.3378, 0.9033, - 0.3289, + 0.8989, 0.8944, 0.89, 0.8856, @@ -7040,25 +14222,26 @@ 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, + 0.8633, + 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, - 0.8901 + 0.8784, + 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 5, - "steps": 46, - "score": 0.7916, - "total_reward": 18.9976, - "completion_rate": 0.95, + "seed": 85, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3313, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.917, + "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -7078,30 +14261,29 @@ 0.9167, 0.3422, 0.9078, - 0.3333, + 0.9033, 0.8989, - 0.8944, - 0.32, + 0.3244, + 0.89, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, - 0.8544, + 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, - 0.8739, - 0.8618 + 0.8856 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 6, + "seed": 86, "steps": 40, "score": 0.8977, "total_reward": 18.8523, @@ -7147,14 +14329,14 @@ { "policy": "oracle_lite", "task_type": "task3", - "seed": 7, + "seed": 87, "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, + "score": 0.8977, + "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, - "adversarial_detections": 6, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7181,26 +14363,26 @@ 0.8811, 0.8767, 0.8722, - 0.8678, + 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.894 + 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 8, + "seed": 88, "steps": 44, - "score": 0.8405, - "total_reward": 19.3315, + "score": 0.8423, + "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 6, + "trust_calibration": 0.931, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7215,40 +14397,40 @@ }, "rewards": [ 0.9256, - 0.3511, - 0.9167, + 0.9211, + 0.3467, 0.9122, 0.9078, 0.9033, - 0.3289, + 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, - 0.8589, + 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, - 0.8857 + 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 9, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, + "seed": 89, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7264,7 +14446,7 @@ "rewards": [ 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, 0.9078, 0.9033, @@ -7281,20 +14463,21 @@ 0.8961, 0.8917, 0.8873, - 0.894 + 0.8828, + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 10, - "steps": 42, - "score": 0.8421, - "total_reward": 18.5263, - "completion_rate": 0.95, + "seed": 90, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.928, - "adversarial_detections": 7, + "trust_calibration": 0.932, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7319,28 +14502,27 @@ 0.89, 0.8856, 0.8811, - 0.3067, - 0.3022, + 0.8767, + 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.871 + 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 11, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, + "seed": 91, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3721, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, + "trust_calibration": 0.93, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -7357,9 +14539,9 @@ "rewards": [ 0.9256, 0.9211, - 0.3467, + 0.9167, 0.9122, - 0.9078, + 0.3378, 0.9033, 0.8989, 0.8944, @@ -7368,23 +14550,24 @@ 0.8811, 0.8767, 0.8722, - 0.8678, - 0.905, + 0.2978, + 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, - 0.8901 + 0.8784, + 0.8846 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 12, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, + "seed": 92, + "steps": 44, + "score": 0.8423, + "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, @@ -7407,34 +14590,36 @@ 0.9167, 0.9122, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, - 0.8767, + 0.3067, 0.8722, - 0.9095, - 0.905, + 0.8678, + 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8938 + 0.8828, + 0.8784, + 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 13, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, + "seed": 93, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7448,7 +14633,7 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.3556, 0.9211, 0.9167, 0.9122, @@ -7467,19 +14652,20 @@ 0.8961, 0.8917, 0.8873, - 0.894 + 0.8828, + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 14, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3311, + "seed": 94, + "steps": 46, + "score": 0.8161, + "total_reward": 19.5872, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.932, + "trust_calibration": 0.926, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -7497,37 +14683,38 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, 0.9033, - 0.8989, + 0.3289, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, - 0.8678, - 0.2933, + 0.2978, + 0.8633, 0.8589, - 0.8961, + 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, - 0.8853 + 0.8739, + 0.8814 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 15, - "steps": 46, - "score": 0.8162, - "total_reward": 19.5883, + "seed": 95, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.93, + "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -7542,41 +14729,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, - 0.8544, + 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, - 0.8739, - 0.8825 + 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 16, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8523, + "seed": 96, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, + "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -7592,7 +14778,7 @@ }, "rewards": [ 0.9256, - 0.9211, + 0.3511, 0.9167, 0.9122, 0.9078, @@ -7604,25 +14790,26 @@ 0.8811, 0.8767, 0.8722, - 0.9095, + 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8938 + 0.8828, + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 17, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, + "seed": 97, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.935, + "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -7639,7 +14826,7 @@ "rewards": [ 0.9256, 0.9211, - 0.3467, + 0.9167, 0.9122, 0.9078, 0.9033, @@ -7651,26 +14838,25 @@ 0.8767, 0.8722, 0.8678, - 0.8633, + 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8903 + 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 18, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, + "seed": 98, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7695,7 +14881,7 @@ 0.89, 0.8856, 0.8811, - 0.8767, + 0.3067, 0.8722, 0.8678, 0.905, @@ -7703,16 +14889,17 @@ 0.8961, 0.8917, 0.8873, - 0.894 + 0.8828, + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 19, + "seed": 99, "steps": 42, "score": 0.8689, - "total_reward": 19.1153, + "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, @@ -7735,11 +14922,11 @@ 0.9167, 0.9122, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722,