diff --git "a/outputs/evaluation_results.json" "b/outputs/evaluation_results.json" --- "a/outputs/evaluation_results.json" +++ "b/outputs/evaluation_results.json" @@ -1,9 +1,11 @@ { - "task": "task3", + "task": "all", "tasks": [ + "task1", + "task2", "task3" ], - "episodes_per_policy": 100, + "episodes_per_policy": 30, "adaptive": false, "difficulty_controller": { "adaptive": true, @@ -16,79 +18,176 @@ "adversary_poison_confidence": 0.92 }, "difficulty_controller_by_task_policy": { + "task1": { + "random": {}, + "heuristic": {}, + "oracle_lite": {}, + "trained": {} + }, + "task2": { + "random": {}, + "heuristic": {}, + "oracle_lite": {}, + "trained": {} + }, "task3": { "random": {}, "heuristic": {}, - "oracle_lite": {} + "oracle_lite": {}, + "trained": {} } }, "summary": { "random": { - "episodes": 100, - "avg_score": 0.6601, - "avg_completion_rate": 0.8165, - "avg_detection_rate": 0.375, - "avg_trust_calibration": 0.7349, - "avg_steps": 36.13 + "episodes": 90, + "avg_score": 0.6904, + "avg_completion_rate": 0.8131, + "avg_detection_rate": 0.7935, + "avg_trust_calibration": 0.4453, + "avg_steps": 26.2111 }, "heuristic": { - "episodes": 100, - "avg_score": 0.7314, - "avg_completion_rate": 0.8935, - "avg_detection_rate": 0.7621, - "avg_trust_calibration": 0.74, - "avg_steps": 35.54 + "episodes": 90, + "avg_score": 0.7817, + "avg_completion_rate": 0.8918, + "avg_detection_rate": 0.9178, + "avg_trust_calibration": 0.4373, + "avg_steps": 24.4 }, "oracle_lite": { - "episodes": 100, - "avg_score": 0.8562, - "avg_completion_rate": 0.991, + "episodes": 90, + "avg_score": 0.8405, + "avg_completion_rate": 0.8687, "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.9304, - "avg_steps": 42.62 + "avg_trust_calibration": 0.5892, + "avg_steps": 29.4444 + }, + "trained": { + "episodes": 90, + "avg_score": 0.788, + "avg_completion_rate": 0.8979, + "avg_detection_rate": 0.9437, + "avg_trust_calibration": 0.4378, + "avg_steps": 24.5 } }, "by_task": { + "task1": { + "random": { + "episodes": 30, + "avg_score": 0.7635, + "avg_completion_rate": 0.76, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 15.1333 + }, + "heuristic": { + "episodes": 30, + "avg_score": 0.8504, + "avg_completion_rate": 0.84, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 13.8333 + }, + "oracle_lite": { + "episodes": 30, + "avg_score": 0.9011, + "avg_completion_rate": 0.7167, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 16.0 + }, + "trained": { + "episodes": 30, + "avg_score": 0.8504, + "avg_completion_rate": 0.84, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 13.8333 + } + }, + "task2": { + "random": { + "episodes": 30, + "avg_score": 0.6472, + "avg_completion_rate": 0.8644, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5829, + "avg_steps": 26.7667 + }, + "heuristic": { + "episodes": 30, + "avg_score": 0.7497, + "avg_completion_rate": 0.9288, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5737, + "avg_steps": 23.2333 + }, + "oracle_lite": { + "episodes": 30, + "avg_score": 0.7638, + "avg_completion_rate": 0.9045, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.8377, + "avg_steps": 30.0 + }, + "trained": { + "episodes": 30, + "avg_score": 0.7497, + "avg_completion_rate": 0.9288, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5737, + "avg_steps": 23.2333 + } + }, "task3": { "random": { - "episodes": 100, - "avg_score": 0.6601, - "avg_completion_rate": 0.8165, - "avg_detection_rate": 0.375, - "avg_trust_calibration": 0.7349, - "avg_steps": 36.13 + "episodes": 30, + "avg_score": 0.6606, + "avg_completion_rate": 0.815, + "avg_detection_rate": 0.3806, + "avg_trust_calibration": 0.7531, + "avg_steps": 36.7333 }, "heuristic": { - "episodes": 100, - "avg_score": 0.7314, - "avg_completion_rate": 0.8935, - "avg_detection_rate": 0.7621, - "avg_trust_calibration": 0.74, - "avg_steps": 35.54 + "episodes": 30, + "avg_score": 0.7449, + "avg_completion_rate": 0.9067, + "avg_detection_rate": 0.7534, + "avg_trust_calibration": 0.7383, + "avg_steps": 36.1333 }, "oracle_lite": { - "episodes": 100, - "avg_score": 0.8562, - "avg_completion_rate": 0.991, + "episodes": 30, + "avg_score": 0.8567, + "avg_completion_rate": 0.985, "avg_detection_rate": 1.0, - "avg_trust_calibration": 0.9304, - "avg_steps": 42.62 + "avg_trust_calibration": 0.9299, + "avg_steps": 42.3333 + }, + "trained": { + "episodes": 30, + "avg_score": 0.7637, + "avg_completion_rate": 0.925, + "avg_detection_rate": 0.8312, + "avg_trust_calibration": 0.7396, + "avg_steps": 36.4333 } } }, "episodes": [ { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 0, - "steps": 36, - "score": 0.6105, - "total_reward": 15.2622, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.884, + "steps": 15, + "score": 0.6569, + "total_reward": 7.8825, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -101,43 +200,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9233, - 0.9211, - 0.02, - 0.9144, - 0.9122, - 0.9078, - 0.3556, + 0.914, + 0.962, + 0.962, 0.02, + 0.962, + 0.962, + 0.962, + 0.3645, 0.02, - 0.8467, - 0.8922, - 0.8378, - 0.8833, - 0.9061, - 0.3067, 0.02, - 0.8722, - 0.073, - 0.8306, - 0.9061, - 0.3397, - 0.3044, - 0.5035 + 0.867 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 1, - "steps": 40, - "score": 0.7205, - "total_reward": 18.0135, - "completion_rate": 0.9, + "steps": 15, + "score": 0.7996, + "total_reward": 7.196, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.662, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -151,42 +237,26 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.8889, - 0.8822, - 0.91, - 0.8556, - 0.9011, - 0.3167, - 0.8622, - 0.89, - 0.8356, - 0.8633, - 0.3011, - 0.8444, - 0.8378, - 0.8656, - 0.8261, - 0.8589, - 0.8961, - 0.02, - 0.3, - 0.8106, - 0.2933, - 0.7916 + 0.962, + 0.982, + 0.914, + 0.914, + 0.962, + 0.867, + 0.962, + 0.3165 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 2, - "steps": 37, - "score": 0.7627, - "total_reward": 17.5411, - "completion_rate": 0.9, + "steps": 15, + "score": 0.8129, + "total_reward": 8.1294, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.752, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -201,43 +271,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3778, - 0.3203, - 0.9211, - 0.9189, - 0.8822, - 0.91, - 0.8556, - 0.8511, - 0.8967, - 0.8422, - 0.89, - 0.8356, - 0.8311, - 0.8767, - 0.2944, - 0.8972, - 0.87, - 0.8656, - 0.8789, - 0.02, - 0.8722, - 0.8207 + 0.369, + 0.3024, + 0.962, + 0.962, + 0.914, + 0.962, + 0.867, + 0.867, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 3, - "steps": 36, - "score": 0.6303, - "total_reward": 16.3887, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.798, + "steps": 14, + "score": 0.8084, + "total_reward": 10.5095, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -249,43 +306,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9233, - 0.3411, - 0.8667, - 0.9144, - 0.9372, - 0.91, - 0.3278, - 0.3556, - 0.8511, - 0.8989, - 0.2922, - 0.8878, - 0.3056, - 0.8489, - 0.8767, - 0.8922, - 0.333, - 0.2878, - 0.8283, - 0.8589, - 0.3297, - 0.3552, - 0.01 + 0.982, + 0.982, + 0.962, + 0.3455, + 0.867, + 0.962, + 0.946, + 0.962, + 0.3455, + 0.3645, + 0.867, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 4, - "steps": 41, - "score": 0.6894, - "total_reward": 18.6138, - "completion_rate": 0.85, + "steps": 15, + "score": 0.7814, + "total_reward": 8.5956, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.405, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -300,47 +344,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.9233, - 0.3711, - 0.8844, - 0.9122, - 0.33, - 0.3186, - 0.9011, - 0.8667, - 0.86, - 0.8878, - 0.8833, - 0.8789, - 0.8767, - 0.8994, - 0.3352, - 0.8678, - 0.3033, - 0.8239, - 0.8744, - 0.8678, - 0.2656, - 0.2933, - 0.2911, - 0.7076 + 0.962, + 0.982, + 0.962, + 0.3645, + 0.914, + 0.962, + 0.3455, + 0.3136, + 0.962, + 0.914 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 5, - "steps": 31, - "score": 0.6062, - "total_reward": 15.1538, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.816, + "steps": 15, + "score": 0.725, + "total_reward": 8.7, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -352,45 +380,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.9211, - 0.3367, - 0.3344, - 0.3322, - 0.8578, - 0.9306, - 0.9033, - 0.3211, - 0.8467, - 0.9194, - 0.32, - 0.8878, - 0.8856, - 0.8811, - 0.02, - 0.8394, - 0.0752, - 0.87, - 0.8678, - 0.02, - 0.8883, - 0.01 + 0.962, + 0.914, + 0.962, + 0.3165, + 0.3455, + 0.3455, + 0.867, + 0.946, + 0.962, + 0.3455, + 0.867 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 6, - "steps": 39, - "score": 0.6337, - "total_reward": 15.8429, + "steps": 15, + "score": 0.8118, + "total_reward": 8.1182, "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.872, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -402,45 +417,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.8889, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9006, - 0.8511, - 0.3197, - 0.9194, - 0.28, - 0.8556, - 0.8811, - 0.8789, - 0.8422, - 0.8856, - 0.3311, - 0.8589, - 0.0597, - 0.3222, - 0.27, - 0.8728, - 0.02, - 0.01 + 0.914, + 0.914, + 0.962, + 0.962, + 0.962, + 0.962, + 0.934, + 0.867, + 0.3206 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 7, - "steps": 32, - "score": 0.7179, - "total_reward": 15.793, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.869, + "steps": 15, + "score": 0.9334, + "total_reward": 9.334, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -452,42 +452,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.8911, - 0.8867, - 0.9144, - 0.86, - 0.9056, - 0.9033, - 0.8989, - 0.8967, - 0.3444, - 0.89, - 0.8356, - 0.9083, - 0.02, - 0.8789, - 0.8744, - 0.87, - 0.8928, - 0.8633, - 0.3111, - 0.01 + 0.867, + 0.914, + 0.914, + 0.962, + 0.867, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 8, - "steps": 31, - "score": 0.7087, - "total_reward": 16.3004, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.701, + "steps": 15, + "score": 0.8425, + "total_reward": 9.2675, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -499,40 +487,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.3733, - 0.9211, - 0.8844, - 0.9122, - 0.91, - 0.3578, - 0.9056, - 0.9283, - 0.8667, - 0.8944, - 0.84, - 0.3456, - 0.8833, - 0.8811, - 0.8589, - 0.8767, - 0.0774, - 0.835, - 0.8856, - 0.8633, - 0.01 + 0.962, + 0.369, + 0.962, + 0.914, + 0.962, + 0.962, + 0.3645, + 0.962, + 0.982, + 0.914 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 9, - "steps": 37, - "score": 0.7151, - "total_reward": 17.877, - "completion_rate": 0.9, + "steps": 15, + "score": 0.7751, + "total_reward": 9.3011, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.558, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -547,42 +523,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9256, - 0.9233, - 0.8689, - 0.3597, - 0.8822, - 0.935, + 0.982, + 0.962, + 0.962, + 0.867, + 0.3616, + 0.914, + 0.3645, 0.02, - 0.3556, - 0.9011, - 0.8967, - 0.8944, - 0.84, - 0.8356, - 0.8489, - 0.8244, - 0.835, - 0.3178, - 0.8656, - 0.8261, - 0.8217, - 0.3044, - 0.85, - 0.7724 + 0.982, + 0.962, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 10, - "steps": 38, - "score": 0.6037, - "total_reward": 17.5072, - "completion_rate": 0.75, + "steps": 15, + "score": 0.7653, + "total_reward": 8.418, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.772, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -597,49 +560,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9233, - 0.9461, - 0.8844, - 0.3622, - 0.9078, - 0.02, - 0.3233, - 0.9261, - 0.8967, - 0.8944, - 0.86, - 0.3378, - 0.02, - 0.3263, - 0.8811, - 0.3289, - 0.2967, - 0.8994, - 0.8722, - 0.8678, - 0.3386, - 0.3463, + 0.962, + 0.962, + 0.982, + 0.914, + 0.982, + 0.962, 0.02, - 0.3089, - 0.8544, - 0.355, - 0.7709 + 0.3455, + 0.3645, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 11, - "steps": 32, - "score": 0.5881, - "total_reward": 14.703, - "completion_rate": 0.7, - "detection_rate": 0.3333, - "trust_calibration": 0.743, - "adversarial_detections": 1, - "adversarial_poisonings": 2, - "status": "failed", + "steps": 15, + "score": 0.8199, + "total_reward": 9.8394, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -651,45 +596,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9233, - 0.9211, - 0.3459, - 0.8844, - 0.3622, - 0.9078, - 0.9306, - 0.02, - 0.8689, - 0.8967, - 0.8944, - 0.8722, - 0.8878, - 0.8856, - 0.9228, - 0.0819, - 0.2967, - 0.3244, - 0.8722, + 0.867, + 0.962, + 0.962, + 0.3504, + 0.914, + 0.982, + 0.962, + 0.982, 0.02, - 0.3356, - 0.3011, - 0.01 + 0.914, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 12, - "steps": 36, - "score": 0.6249, - "total_reward": 14.9974, - "completion_rate": 0.75, - "detection_rate": 0.3333, - "trust_calibration": 0.716, - "adversarial_detections": 1, - "adversarial_poisonings": 2, - "status": "failed", + "steps": 15, + "score": 0.6163, + "total_reward": 7.3956, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -701,42 +633,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.8933, - 0.3411, - 0.9189, - 0.8644, - 0.9122, - 0.8756, + 0.982, + 0.914, + 0.3455, + 0.962, + 0.898, + 0.962, + 0.914, 0.02, - 0.3441, - 0.3189, - 0.3167, - 0.8622, - 0.8878, - 0.8511, - 0.8789, - 0.8244, - 0.8878, - 0.2856, - 0.0663, - 0.8589, - 0.8961, - 0.8772, - 0.01 + 0.3616, + 0.3455, + 0.3455 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 13, - "steps": 38, - "score": 0.7872, - "total_reward": 18.1053, - "completion_rate": 0.95, + "steps": 15, + "score": 0.7283, + "total_reward": 6.555, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.64, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -750,43 +670,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8911, - 0.8667, - 0.9122, - 0.3278, - 0.9056, - 0.8689, - 0.02, - 0.8622, - 0.8378, - 0.8533, - 0.8289, - 0.8767, - 0.8722, - 0.895, - 0.8856, - 0.8633, - 0.8611, - 0.9017, - 0.8939, - 0.3, - 0.8081 + 0.962, + 0.914, + 0.867, + 0.962, + 0.934, + 0.962, + 0.914, + 0.02 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 14, - "steps": 35, - "score": 0.6988, - "total_reward": 18.1679, - "completion_rate": 0.95, - "detection_rate": 0.0, - "trust_calibration": 0.661, + "steps": 17, + "score": 0.8867, + "total_reward": 10.6405, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -798,43 +704,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8733, - 0.8889, - 0.8844, - 0.3322, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.8989, - 0.8622, - 0.89, - 0.3408, - 0.8856, - 0.8833, - 0.3011, - 0.8267, - 0.8372, - 0.87, - 0.0708, - 0.8833, - 0.2811, - 0.8839, - 0.8544, - 0.01 + 0.962, + 0.867, + 0.914, + 0.914, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.914 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 15, - "steps": 38, - "score": 0.68, - "total_reward": 19.0388, - "completion_rate": 0.85, + "steps": 16, + "score": 0.6915, + "total_reward": 9.6809, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.774, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -849,48 +741,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3548, - 0.9256, - 0.9233, - 0.8689, - 0.3667, - 0.3644, - 0.8922, - 0.935, - 0.8556, - 0.3533, - 0.3211, - 0.3189, - 0.8944, - 0.89, - 0.3378, - 0.8333, - 0.8789, - 0.8767, - 0.8744, - 0.2922, - 0.333, - 0.8656, - 0.8261, - 0.8567, - 0.3, - 0.8478, - 0.8066 + 0.3504, + 0.962, + 0.962, + 0.867, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.867, + 0.982, + 0.3455, + 0.3455, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 16, - "steps": 33, - "score": 0.5966, - "total_reward": 15.511, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.691, + "steps": 15, + "score": 0.7164, + "total_reward": 9.313, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -902,45 +780,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.3044, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.2975, 0.02, - 0.935, - 0.02, - 0.3256, - 0.8711, - 0.8967, - 0.3444, + 0.982, 0.02, - 0.89, - 0.8556, - 0.8833, - 0.3311, - 0.3289, - 0.8744, - 0.8878, - 0.8633, - 0.3141, - 0.8589, - 0.01 + 0.3455, + 0.914, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 17, - "steps": 35, - "score": 0.5891, - "total_reward": 16.4939, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.795, + "steps": 15, + "score": 0.6495, + "total_reward": 8.4439, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -953,45 +818,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.02, - 0.3433, - 0.3319, - 0.8644, - 0.3622, - 0.91, - 0.3286, - 0.9033, - 0.9011, - 0.8789, - 0.3697, - 0.9194, - 0.84, - 0.8856, - 0.8811, - 0.8789, - 0.3597, - 0.2692, - 0.87, - 0.2878, - 0.8656, - 0.0663, - 0.8239, - 0.8817, + 0.962, 0.02, - 0.4835 + 0.3455, + 0.3136, + 0.867, + 0.982, + 0.962, + 0.3206, + 0.962, + 0.962, + 0.982, + 0.3826 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 18, - "steps": 35, - "score": 0.6548, - "total_reward": 16.3705, - "completion_rate": 0.75, + "steps": 15, + "score": 0.8235, + "total_reward": 9.8815, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.573, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -1006,45 +856,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9233, - 0.02, - 0.9167, - 0.3644, - 0.3622, - 0.91, - 0.8556, - 0.9033, - 0.8689, - 0.8967, - 0.8922, - 0.915, + 0.962, + 0.962, 0.02, - 0.8533, - 0.8789, - 0.2967, - 0.3422, - 0.3078, - 0.8656, - 0.8611, - 0.2789, - 0.3297, - 0.7281 + 0.962, + 0.982, + 0.3645, + 0.962, + 0.867, + 0.962, + 0.914, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 19, - "steps": 38, - "score": 0.6912, - "total_reward": 17.2799, - "completion_rate": 0.9, - "detection_rate": 0.0, - "trust_calibration": 0.834, + "steps": 15, + "score": 0.7588, + "total_reward": 8.347, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1056,44 +893,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3156, - 0.9211, - 0.9189, - 0.9144, - 0.9122, - 0.33, + 0.2975, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3455, 0.02, - 0.9133, - 0.9261, - 0.8967, - 0.8422, - 0.89, - 0.8356, - 0.8511, - 0.8789, - 0.8922, - 0.87, - 0.3178, - 0.8811, - 0.8589, - 0.8544, - 0.87, - 0.3108, - 0.01 + 0.93, + 0.982, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 20, - "steps": 44, - "score": 0.6149, - "total_reward": 19.0606, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.859, + "steps": 15, + "score": 0.6444, + "total_reward": 7.7329, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1106,51 +929,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.8711, - 0.8667, - 0.9144, - 0.9122, - 0.337, - 0.9078, - 0.9033, - 0.3281, - 0.3519, - 0.3467, - 0.8922, - 0.89, - 0.3608, - 0.8856, - 0.8833, - 0.8289, - 0.2714, - 0.87, - 0.3508, - 0.8656, - 0.3089, - 0.0597, - 0.8172, - 0.053, - 0.8728, - 0.8083, - 0.3439, - 0.2567, - 0.5146 + 0.3525, + 0.867, + 0.867, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.3504, + 0.3686, + 0.3645 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 21, - "steps": 30, - "score": 0.7401, - "total_reward": 15.543, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.709, + "steps": 15, + "score": 0.8756, + "total_reward": 9.6315, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1162,40 +966,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.8911, - 0.8867, - 0.9144, - 0.3322, - 0.9078, - 0.9056, - 0.8711, - 0.8467, - 0.8922, - 0.89, - 0.02, - 0.8856, - 0.8811, - 0.8589, - 0.8744, - 0.87, - 0.8856, - 0.01 + 0.962, + 0.962, + 0.914, + 0.914, + 0.962, + 0.3455, + 0.962, + 0.962, + 0.914, + 0.867 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 22, - "steps": 45, - "score": 0.5529, - "total_reward": 16.5871, - "completion_rate": 0.7, - "detection_rate": 0.0, - "trust_calibration": 0.709, + "steps": 15, + "score": 0.6437, + "total_reward": 7.0809, + "completion_rate": 0.5, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1208,49 +1002,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3548, + 0.3504, 0.02, - 0.9311, - 0.8844, - 0.3622, - 0.36, - 0.9078, - 0.2933, - 0.8689, - 0.9217, - 0.3444, - 0.32, - 0.8556, - 0.3033, - 0.3011, - 0.8767, - 0.3022, - 0.87, - 0.8678, - 0.8811, - 0.0619, - 0.8544, - 0.3022, - 0.8478, - 0.8083, - 0.3119, - 0.0397, - 0.8322, - 0.4222 + 0.93, + 0.914, + 0.3645, + 0.3645, + 0.962, + 0.2975, + 0.914, + 0.982 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 23, - "steps": 31, - "score": 0.7921, - "total_reward": 18.2193, - "completion_rate": 0.95, - "detection_rate": 0.5, - "trust_calibration": 0.847, - "adversarial_detections": 1, - "adversarial_poisonings": 1, + "steps": 15, + "score": 0.8985, + "total_reward": 10.7824, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1263,40 +1038,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9233, - 0.3481, - 0.9439, - 0.9167, - 0.9144, - 0.86, - 0.9078, - 0.9033, - 0.9261, - 0.8967, - 0.8944, - 0.8922, - 0.89, - 0.8878, - 0.02, - 0.9228, - 0.0819, - 0.8744, - 0.87, - 0.8833, - 0.7254 + 0.867, + 0.962, + 0.3504, + 0.982, + 0.962, + 0.962, + 0.867, + 0.962, + 0.962, + 0.982, + 0.962 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 24, - "steps": 45, - "score": 0.6293, - "total_reward": 16.3622, - "completion_rate": 0.75, + "steps": 16, + "score": 0.6933, + "total_reward": 7.6267, + "completion_rate": 0.6, "detection_rate": 1.0, - "trust_calibration": 0.813, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -1311,46 +1075,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3156, - 0.9483, - 0.9461, - 0.3197, - 0.9072, - 0.8756, - 0.9033, - 0.3541, - 0.02, - 0.9044, + 0.2975, + 0.982, + 0.982, + 0.3066, + 0.934, + 0.914, + 0.962, + 0.3686, 0.02, - 0.3608, - 0.8511, - 0.2967, - 0.8722, - 0.8356, - 0.8111, - 0.2867, - 0.8544, - 0.845, - 0.3156, - 0.8061, - 0.8367, - 0.7972, - 0.7658 + 0.93 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 25, - "steps": 34, - "score": 0.6745, - "total_reward": 16.8613, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.809, + "steps": 15, + "score": 0.8266, + "total_reward": 9.0928, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1362,44 +1111,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8933, - 0.9461, - 0.9189, - 0.8644, - 0.9122, - 0.91, - 0.8556, - 0.8711, - 0.8989, - 0.3237, - 0.8944, - 0.8922, - 0.8378, - 0.3356, - 0.8833, - 0.8489, - 0.0797, - 0.3244, - 0.3222, - 0.8878, - 0.8906, - 0.9061, - 0.2967, - 0.01 + 0.914, + 0.982, + 0.962, + 0.867, + 0.962, + 0.962, + 0.867, + 0.914, + 0.962, + 0.3504 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 26, - "steps": 43, - "score": 0.5813, - "total_reward": 17.4397, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.815, + "steps": 15, + "score": 0.7833, + "total_reward": 7.8326, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1412,50 +1147,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8933, - 0.9211, - 0.9439, - 0.9167, - 0.9144, + 0.914, + 0.962, + 0.982, + 0.962, + 0.962, 0.02, - 0.3308, - 0.8711, - 0.8467, - 0.8922, - 0.8878, - 0.3486, - 0.3033, - 0.2759, - 0.8767, - 0.3244, - 0.3452, - 0.29, - 0.8156, - 0.8633, - 0.2889, - 0.0597, - 0.8544, - 0.3372, - 0.8478, - 0.2956, - 0.2811, - 0.2889, - 0.4707 + 0.3206, + 0.914, + 0.898 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 27, - "steps": 40, - "score": 0.5674, - "total_reward": 15.3205, - "completion_rate": 0.7, - "detection_rate": 0.25, - "trust_calibration": 0.816, - "adversarial_detections": 1, - "adversarial_poisonings": 3, - "status": "failed", + "steps": 16, + "score": 0.8311, + "total_reward": 9.1421, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1467,47 +1182,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9206, - 0.3433, - 0.3541, - 0.9189, - 0.9244, - 0.3622, - 0.9078, - 0.9133, - 0.8689, - 0.8644, - 0.885, - 0.02, - 0.8533, - 0.9061, - 0.8267, - 0.9139, - 0.073, - 0.3356, - 0.3441, - 0.0619, - 0.8994, - 0.2722, - 0.323, - 0.3308, - 0.8433, - 0.01 + 0.934, + 0.3455, + 0.3546, + 0.962, + 0.93, + 0.982, + 0.962, + 0.93, + 0.914, + 0.914 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 28, - "steps": 32, - "score": 0.6685, - "total_reward": 16.0443, + "steps": 15, + "score": 0.7196, + "total_reward": 8.6356, "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.793, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1519,42 +1218,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9211, - 0.9189, - 0.8844, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3763, + 0.914, + 0.962, + 0.962, + 0.914, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3826, 0.02, - 0.3167, - 0.8944, - 0.89, - 0.9128, - 0.8856, - 0.8461, - 0.8589, - 0.3267, - 0.8372, - 0.9128, - 0.2933, - 0.3461, - 0.01 + 0.3165 ] }, { "policy": "random", - "task_type": "task3", + "task_type": "task1", "seed": 29, - "steps": 40, - "score": 0.6868, - "total_reward": 17.8577, - "completion_rate": 0.85, + "steps": 15, + "score": 0.5851, + "total_reward": 7.021, + "completion_rate": 0.6, "detection_rate": 1.0, - "trust_calibration": 0.907, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -1568,46 +1255,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.3411, - 0.9189, + 0.962, + 0.914, + 0.3455, + 0.962, 0.02, - 0.8822, - 0.9028, + 0.914, + 0.3165, 0.02, - 0.8511, - 0.8989, - 0.3467, - 0.3444, - 0.8578, - 0.8856, - 0.8511, - 0.2989, - 0.9094, - 0.87, - 0.8856, - 0.3641, - 0.8567, - 0.3572, - 0.8895, - 0.8083, - 0.8353 + 0.867, + 0.962, + 0.369 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 30, - "steps": 28, - "score": 0.4767, - "total_reward": 10.4876, - "completion_rate": 0.5, - "detection_rate": 0.0, - "trust_calibration": 0.745, + "policy": "heuristic", + "task_type": "task1", + "seed": 0, + "steps": 13, + "score": 0.753, + "total_reward": 10.5415, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 3, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1619,41 +1292,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9233, - 0.02, - 0.9189, - 0.02, - 0.9394, - 0.02, - 0.9078, - 0.8533, - 0.9011, - 0.02, - 0.8967, - 0.3144, - 0.2878, - 0.8533, - 0.3441, - 0.0819, - 0.2967, - 0.0774, - 0.835, - 0.01 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.3455, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 31, - "steps": 33, - "score": 0.7077, - "total_reward": 19.8143, - "completion_rate": 0.95, - "detection_rate": 0.5, - "trust_calibration": 0.68, - "adversarial_detections": 1, - "adversarial_poisonings": 1, + "policy": "heuristic", + "task_type": "task1", + "seed": 1, + "steps": 12, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1666,47 +1331,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9256, - 0.9483, - 0.3711, - 0.9189, - 0.3344, - 0.9122, - 0.91, - 0.02, - 0.3556, - 0.9033, - 0.3211, - 0.8989, - 0.8967, - 0.8944, - 0.89, - 0.3456, - 0.8833, - 0.8989, - 0.0797, - 0.8372, - 0.29, - 0.9073, - 0.8633, - 0.8611, - 0.8589, - 0.679 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 32, - "steps": 40, - "score": 0.5915, - "total_reward": 18.3369, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.392, + "policy": "heuristic", + "task_type": "task1", + "seed": 2, + "steps": 11, + "score": 0.8612, + "total_reward": 10.3345, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1719,50 +1369,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9233, - 0.9211, - 0.3689, - 0.3444, - 0.3622, - 0.33, - 0.9328, - 0.9056, - 0.3311, - 0.8667, - 0.3214, - 0.8922, - 0.8878, - 0.8856, - 0.3033, - 0.9061, - 0.8789, - 0.8944, - 0.2922, - 0.323, - 0.2956, - 0.3133, - 0.0641, - 0.8839, - 0.8744, - 0.3152, - 0.3178, - 0.3133, - 0.3715 + 0.369, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 33, - "steps": 41, - "score": 0.59, - "total_reward": 15.3412, - "completion_rate": 0.7, - "detection_rate": 0.6667, - "trust_calibration": 0.72, - "adversarial_detections": 2, - "adversarial_poisonings": 1, + "policy": "heuristic", + "task_type": "task1", + "seed": 3, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1775,46 +1406,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8911, - 0.9439, - 0.8644, - 0.02, - 0.8578, - 0.02, - 0.9033, - 0.8489, - 0.8967, - 0.3444, - 0.287, - 0.8556, - 0.8311, - 0.3497, - 0.8722, - 0.073, - 0.02, - 0.905, - 0.9006, - 0.8544, - 0.3022, - 0.3308, - 0.3186, - 0.6338 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 34, - "steps": 35, - "score": 0.6414, - "total_reward": 16.0344, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.865, + "policy": "heuristic", + "task_type": "task1", + "seed": 4, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1826,45 +1440,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.8689, - 0.02, - 0.3344, - 0.3392, - 0.353, - 0.9056, - 0.3303, - 0.8667, - 0.8622, - 0.89, - 0.8556, - 0.8511, - 0.8767, - 0.8744, - 0.8722, - 0.32, - 0.8678, - 0.8456, - 0.8611, - 0.8589, - 0.2944, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 35, - "steps": 39, - "score": 0.5508, - "total_reward": 15.9746, - "completion_rate": 0.7, - "detection_rate": 0.5, - "trust_calibration": 0.645, - "adversarial_detections": 1, - "adversarial_poisonings": 1, - "status": "failed", + "policy": "heuristic", + "task_type": "task1", + "seed": 5, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1876,49 +1477,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3133, - 0.3411, - 0.9167, - 0.8822, - 0.3056, - 0.8511, - 0.8989, - 0.3497, - 0.9194, - 0.3192, - 0.34, - 0.02, - 0.8856, - 0.3033, - 0.2989, - 0.8767, - 0.8744, - 0.2922, - 0.02, - 0.8478, - 0.3386, - 0.8789, - 0.8817, - 0.8172, - 0.8895, - 0.3286, - 0.01 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 36, - "steps": 31, - "score": 0.5216, - "total_reward": 12.5188, - "completion_rate": 0.55, - "detection_rate": 0.5, - "trust_calibration": 0.506, - "adversarial_detections": 1, - "adversarial_poisonings": 1, - "status": "failed", + "policy": "heuristic", + "task_type": "task1", + "seed": 6, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -1930,43 +1511,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.02, - 0.8711, - 0.3519, - 0.3367, - 0.3422, - 0.91, - 0.9328, - 0.02, - 0.8711, - 0.8467, - 0.8922, - 0.34, - 0.3078, - 0.8856, - 0.3333, - 0.3311, - 0.02, - 0.8744, - 0.2922, - 0.9095, - 0.8283, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 37, - "steps": 38, - "score": 0.5875, - "total_reward": 16.449, - "completion_rate": 0.7, - "detection_rate": 0.5, - "trust_calibration": 0.866, - "adversarial_detections": 1, - "adversarial_poisonings": 1, + "policy": "heuristic", + "task_type": "task1", + "seed": 7, + "steps": 16, + "score": 0.8166, + "total_reward": 9.7988, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -1979,47 +1547,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.9211, - 0.3689, - 0.02, - 0.9122, - 0.8578, - 0.9033, - 0.3211, - 0.8989, - 0.9217, - 0.86, - 0.3508, - 0.02, - 0.3563, - 0.3641, - 0.8789, - 0.8394, - 0.31, - 0.0708, - 0.3156, - 0.3011, - 0.8984, - 0.3222, - 0.335, - 0.8278, - 0.627 + 0.962, + 0.962, + 0.3686, + 0.962, + 0.3826, + 0.3896, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 38, - "steps": 35, - "score": 0.7022, - "total_reward": 18.2564, - "completion_rate": 0.9, - "detection_rate": 0.0, - "trust_calibration": 0.441, + "policy": "heuristic", + "task_type": "task1", + "seed": 8, + "steps": 16, + "score": 0.8399, + "total_reward": 8.3989, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2032,44 +1584,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.8889, - 0.9167, - 0.9072, - 0.8778, - 0.02, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.3574, - 0.84, - 0.0908, - 0.8833, - 0.3311, - 0.8417, - 0.8744, - 0.8722, - 0.8678, - 0.3486, - 0.3133, - 0.3289, - 0.4474 + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 39, - "steps": 33, - "score": 0.7547, - "total_reward": 18.1121, - "completion_rate": 0.9, + "policy": "heuristic", + "task_type": "task1", + "seed": 9, + "steps": 16, + "score": 0.785, + "total_reward": 10.2052, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.604, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2083,44 +1619,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9233, - 0.9139, - 0.9167, - 0.8822, - 0.02, - 0.9078, - 0.9056, - 0.9033, - 0.3189, - 0.8444, - 0.3122, - 0.8578, - 0.9106, - 0.8833, - 0.8489, - 0.9161, - 0.835, - 0.8928, - 0.9083, - 0.3141, - 0.3219, - 0.7927 + 0.962, + 0.962, + 0.962, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 40, - "steps": 37, - "score": 0.5943, - "total_reward": 15.4518, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.636, + "policy": "heuristic", + "task_type": "task1", + "seed": 10, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2132,43 +1657,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9233, - 0.8889, - 0.9167, - 0.3574, - 0.8778, - 0.8533, - 0.9011, - 0.8989, - 0.8622, - 0.34, - 0.8878, - 0.02, - 0.2711, - 0.8267, - 0.3244, - 0.33, - 0.8356, - 0.8633, - 0.2811, - 0.0619, - 0.8567, - 0.8172, - 0.27, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 41, - "steps": 38, - "score": 0.7027, - "total_reward": 18.9728, - "completion_rate": 0.9, + "policy": "heuristic", + "task_type": "task1", + "seed": 11, + "steps": 16, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.857, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -2183,45 +1693,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.8733, - 0.9461, - 0.9167, - 0.8622, - 0.337, - 0.8756, - 0.3233, - 0.02, - 0.8967, - 0.8944, - 0.32, - 0.8878, - 0.8511, - 0.8789, - 0.8422, - 0.87, - 0.8678, - 0.8833, - 0.3341, - 0.8567, - 0.2744, - 0.3352, - 0.85, - 0.3328, - 0.8448 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 42, - "steps": 34, - "score": 0.7826, - "total_reward": 18.7831, - "completion_rate": 0.95, + "policy": "heuristic", + "task_type": "task1", + "seed": 12, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.768, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2235,44 +1731,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.8911, - 0.9167, - 0.9144, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8967, - 0.8622, - 0.8578, - 0.8656, - 0.02, - 0.3311, - 0.8789, - 0.3267, - 0.9139, - 0.8678, - 0.3286, - 0.9061, - 0.8217, - 0.849 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 43, - "steps": 34, - "score": 0.6248, - "total_reward": 16.2447, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.572, + "policy": "heuristic", + "task_type": "task1", + "seed": 13, + "steps": 16, + "score": 0.9003, + "total_reward": 9.0035, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2284,44 +1765,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.02, - 0.8911, - 0.9189, - 0.3667, - 0.9122, - 0.91, - 0.8556, - 0.3533, - 0.8489, - 0.8967, - 0.8944, - 0.3422, - 0.34, - 0.8856, - 0.8833, - 0.3011, - 0.8417, - 0.8744, - 0.8722, - 0.02, - 0.8833, - 0.2811, - 0.8567, - 0.01 + 0.962, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 44, - "steps": 41, - "score": 0.6806, - "total_reward": 19.7386, - "completion_rate": 0.9, + "policy": "heuristic", + "task_type": "task1", + "seed": 14, + "steps": 13, + "score": 0.7534, + "total_reward": 10.5473, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.81, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2335,47 +1800,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9483, - 0.3411, - 0.9189, - 0.3667, - 0.8822, - 0.8778, - 0.9033, - 0.9011, - 0.8667, - 0.3214, - 0.8922, - 0.34, - 0.8878, - 0.3586, - 0.8811, - 0.8267, - 0.8744, - 0.31, - 0.9106, - 0.8789, - 0.8567, - 0.8522, - 0.8895, - 0.2656, - 0.02, - 0.3261, - 0.8264 + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 45, - "steps": 35, - "score": 0.8133, - "total_reward": 19.519, + "policy": "heuristic", + "task_type": "task1", + "seed": 15, + "steps": 13, + "score": 0.8312, + "total_reward": 11.6374, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.742, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -2389,41 +1839,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.8867, - 0.8622, - 0.8578, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8422, - 0.89, - 0.8556, - 0.3033, - 0.8789, - 0.2967, - 0.9139, - 0.9128, - 0.9083, - 0.2811, - 0.8567, - 0.8578 + 0.3546, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 46, - "steps": 36, - "score": 0.7646, - "total_reward": 19.8787, - "completion_rate": 0.95, + "policy": "heuristic", + "task_type": "task1", + "seed": 16, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.813, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -2438,45 +1878,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9233, - 0.9461, - 0.9439, - 0.9167, - 0.9144, - 0.88, - 0.9328, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.3167, - 0.8922, - 0.317, - 0.8856, - 0.3363, - 0.3089, - 0.8767, - 0.8372, - 0.32, - 0.8856, - 0.9061, - 0.8567, - 0.8559 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 47, - "steps": 31, - "score": 0.7026, - "total_reward": 16.8629, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.89, + "policy": "heuristic", + "task_type": "task1", + "seed": 17, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2489,44 +1912,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.9233, - 0.9189, - 0.8844, - 0.02, - 0.91, - 0.8733, - 0.8489, - 0.8967, - 0.8944, - 0.9172, - 0.89, - 0.3256, - 0.8833, - 0.3311, - 0.3319, - 0.0797, - 0.0774, - 0.87, - 0.8678, - 0.8283, - 0.5511 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 48, - "steps": 40, - "score": 0.6251, - "total_reward": 17.5018, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.739, + "policy": "heuristic", + "task_type": "task1", + "seed": 18, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2538,47 +1946,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.8867, - 0.9144, - 0.3622, - 0.91, - 0.3278, - 0.9056, - 0.3763, - 0.3211, - 0.3489, - 0.3544, - 0.8922, - 0.373, - 0.8533, - 0.8489, - 0.8744, - 0.82, - 0.8156, - 0.8633, - 0.8411, - 0.9017, - 0.3394, - 0.85, - 0.3328, - 0.3133, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 49, - "steps": 34, - "score": 0.7592, - "total_reward": 18.2216, - "completion_rate": 0.95, - "detection_rate": 0.0, - "trust_calibration": 0.658, + "policy": "heuristic", + "task_type": "task1", + "seed": 19, + "steps": 12, + "score": 0.8675, + "total_reward": 11.2779, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2591,44 +1982,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9211, - 0.8867, - 0.9144, - 0.9122, - 0.9078, - 0.9033, - 0.9011, - 0.8667, - 0.8944, - 0.9172, - 0.31, - 0.02, - 0.8833, - 0.3489, - 0.8744, - 0.0752, - 0.8678, - 0.8656, - 0.8633, - 0.8589, - 0.5214 + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 50, - "steps": 31, - "score": 0.5546, - "total_reward": 13.3116, - "completion_rate": 0.65, - "detection_rate": 0.0, - "trust_calibration": 0.768, + "policy": "heuristic", + "task_type": "task1", + "seed": 20, + "steps": 16, + "score": 0.7993, + "total_reward": 8.7927, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2640,43 +2020,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9233, - 0.8867, - 0.3344, - 0.9122, - 0.91, - 0.3356, - 0.02, - 0.9011, - 0.02, - 0.02, - 0.8944, - 0.84, - 0.8556, - 0.8511, - 0.9039, - 0.2967, - 0.3422, - 0.87, - 0.8928, - 0.0686, - 0.2833, - 0.01 + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 51, - "steps": 35, - "score": 0.6911, - "total_reward": 16.5857, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.634, + "policy": "heuristic", + "task_type": "task1", + "seed": 21, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2689,44 +2056,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9206, - 0.9233, - 0.9139, - 0.8644, - 0.9122, - 0.8778, - 0.9056, - 0.3311, - 0.8967, - 0.3222, - 0.8378, - 0.8856, - 0.8833, - 0.9061, - 0.8767, - 0.9172, - 0.073, - 0.3178, - 0.02, - 0.8633, - 0.02, - 0.8217, - 0.4783 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 52, - "steps": 39, - "score": 0.6835, - "total_reward": 19.1391, - "completion_rate": 0.95, - "detection_rate": 0.5, - "trust_calibration": 0.96, - "adversarial_detections": 1, - "adversarial_poisonings": 1, - "status": "failed", + "policy": "heuristic", + "task_type": "task1", + "seed": 22, + "steps": 15, + "score": 0.772, + "total_reward": 12.3526, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2738,47 +2090,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3608, - 0.9506, - 0.9233, - 0.9211, - 0.3144, - 0.9122, - 0.337, - 0.9056, - 0.3233, - 0.9011, - 0.8989, - 0.8944, - 0.84, - 0.8356, - 0.3333, - 0.8811, - 0.8789, - 0.8644, - 0.8722, - 0.8878, - 0.8811, - 0.8567, - 0.2744, - 0.8772, - 0.27, - 0.8873, - 0.01 + 0.369, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.3504, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 53, - "steps": 38, - "score": 0.636, - "total_reward": 16.5363, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.643, + "policy": "heuristic", + "task_type": "task1", + "seed": 23, + "steps": 11, + "score": 0.8606, + "total_reward": 10.3271, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -2791,46 +2131,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9483, - 0.8689, - 0.9167, - 0.8822, - 0.8578, - 0.8733, - 0.8489, - 0.3244, - 0.8922, - 0.34, - 0.3378, - 0.8856, - 0.02, - 0.8789, - 0.9017, - 0.3174, - 0.02, - 0.3378, - 0.8656, - 0.2833, - 0.0641, - 0.8217, - 0.8522, - 0.4564 + 0.962, + 0.3616, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 54, - "steps": 36, - "score": 0.6308, - "total_reward": 16.4, + "policy": "heuristic", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.8161, + "total_reward": 9.7931, "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.696, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2842,46 +2168,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.9233, - 0.9211, - 0.8667, - 0.3252, - 0.9078, - 0.9056, - 0.9011, - 0.8989, - 0.3237, - 0.3474, - 0.84, - 0.8878, - 0.8833, - 0.3089, - 0.8744, - 0.87, - 0.3308, - 0.8811, - 0.0619, - 0.02, - 0.3274, - 0.8522, - 0.01 + 0.962, + 0.3616, + 0.982, + 0.369, + 0.982, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 55, - "steps": 32, - "score": 0.5079, - "total_reward": 13.2064, - "completion_rate": 0.6, - "detection_rate": 0.0, - "trust_calibration": 0.674, + "policy": "heuristic", + "task_type": "task1", + "seed": 25, + "steps": 14, + "score": 0.6506, + "total_reward": 9.7585, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 3, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2893,46 +2205,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.02, - 0.9256, - 0.3241, - 0.9189, - 0.8644, - 0.3322, - 0.33, - 0.9156, - 0.02, - 0.3511, - 0.02, - 0.8622, - 0.8878, - 0.3126, - 0.8833, - 0.8811, - 0.8789, - 0.8767, - 0.3244, - 0.8722, - 0.333, - 0.0708, - 0.0686, - 0.8611, - 0.01 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.946, + 0.369, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 56, - "steps": 32, - "score": 0.5675, - "total_reward": 14.1867, - "completion_rate": 0.65, - "detection_rate": 0.0, - "trust_calibration": 0.73, + "policy": "heuristic", + "task_type": "task1", + "seed": 26, + "steps": 16, + "score": 0.7054, + "total_reward": 11.2865, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2944,45 +2245,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3608, - 0.9256, - 0.02, - 0.8867, - 0.9144, - 0.9122, - 0.02, - 0.3578, - 0.9056, - 0.9033, - 0.8667, - 0.8744, - 0.3422, - 0.363, - 0.9128, - 0.8833, - 0.8489, - 0.3774, - 0.3222, - 0.355, - 0.8678, - 0.0686, - 0.8261, - 0.01 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.3645, + 0.369, + 0.369, + 0.3645, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 57, - "steps": 37, - "score": 0.5298, - "total_reward": 13.7744, + "policy": "heuristic", + "task_type": "task1", + "seed": 27, + "steps": 15, + "score": 0.6937, + "total_reward": 9.0177, "completion_rate": 0.6, - "detection_rate": 0.0, - "trust_calibration": 0.716, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -2994,45 +2286,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9233, - 0.9189, - 0.3667, - 0.3644, - 0.3622, - 0.343, - 0.9056, - 0.02, - 0.8667, - 0.8922, - 0.8878, - 0.8856, - 0.02, - 0.8289, - 0.8922, - 0.8678, - 0.2933, - 0.3141, - 0.0619, - 0.3197, - 0.2744, - 0.8772, - 0.3, - 0.01 + 0.962, + 0.962, + 0.3686, + 0.962, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.391, + 0.962, + 0.3525, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 58, - "steps": 37, - "score": 0.6112, - "total_reward": 17.1148, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.808, + "policy": "heuristic", + "task_type": "task1", + "seed": 28, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3045,47 +2324,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.3433, - 0.3167, - 0.02, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.2911, - 0.3259, - 0.3467, - 0.8422, - 0.8878, - 0.3163, - 0.8811, - 0.8789, - 0.9194, - 0.8522, - 0.8856, - 0.0663, - 0.3341, - 0.8567, - 0.3044, - 0.3022, - 0.3, - 0.4822 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 59, - "steps": 39, - "score": 0.6007, - "total_reward": 16.2199, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.55, + "policy": "heuristic", + "task_type": "task1", + "seed": 29, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3098,44 +2360,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3533, - 0.9139, - 0.9167, - 0.9122, - 0.33, - 0.8756, - 0.02, - 0.02, - 0.8467, - 0.8944, - 0.86, - 0.3378, - 0.3063, - 0.8811, - 0.8789, - 0.8444, - 0.8722, - 0.32, - 0.2956, - 0.0663, - 0.2811, - 0.8839, - 0.8722, - 0.2878, - 0.4134 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 60, - "steps": 39, - "score": 0.7149, - "total_reward": 17.1579, - "completion_rate": 0.85, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 0, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.717, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -3150,43 +2397,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9233, - 0.9211, - 0.02, - 0.3044, - 0.92, - 0.9056, - 0.3233, - 0.3211, - 0.8967, - 0.9194, - 0.89, - 0.3056, - 0.8833, - 0.02, - 0.8767, - 0.8994, - 0.87, - 0.8306, - 0.8789, - 0.8194, - 0.8678, - 0.7902 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 61, - "steps": 34, - "score": 0.6745, - "total_reward": 17.5378, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.937, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 1, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3199,45 +2431,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8911, - 0.9439, - 0.9167, - 0.3644, - 0.3322, - 0.9078, - 0.8533, - 0.9011, - 0.9239, - 0.8944, - 0.8922, - 0.8828, - 0.3356, - 0.3333, - 0.8967, - 0.0774, - 0.8722, - 0.8678, - 0.8906, - 0.0663, - 0.02, - 0.2789, - 0.8567, - 0.5562 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 62, - "steps": 38, - "score": 0.5955, - "total_reward": 16.078, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.583, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 2, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3250,47 +2465,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8733, - 0.8689, - 0.9144, - 0.3622, - 0.02, - 0.8556, - 0.9033, - 0.9011, - 0.02, - 0.3544, - 0.8922, - 0.89, - 0.8878, - 0.8333, - 0.3089, - 0.0797, - 0.3022, - 0.3378, - 0.8633, - 0.8789, - 0.3297, - 0.8794, - 0.3022, - 0.8478, - 0.4237 + 0.3525, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 63, - "steps": 35, - "score": 0.6184, - "total_reward": 16.0796, - "completion_rate": 0.75, - "detection_rate": 0.5, - "trust_calibration": 0.677, - "adversarial_detections": 1, - "adversarial_poisonings": 1, - "status": "failed", + "policy": "oracle_lite", + "task_type": "task1", + "seed": 3, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3302,44 +2499,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3511, - 0.9167, - 0.8944, - 0.3, - 0.9078, - 0.9306, - 0.9033, - 0.02, - 0.8667, - 0.3122, - 0.89, - 0.8878, - 0.9106, - 0.8811, - 0.8789, - 0.8744, - 0.3222, - 0.3308, - 0.2933, - 0.2811, - 0.3119, - 0.8961, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 64, - "steps": 43, - "score": 0.6578, - "total_reward": 19.7347, - "completion_rate": 0.9, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 4, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.572, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3353,50 +2533,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9233, - 0.9211, - 0.3367, - 0.8822, - 0.33, - 0.9056, - 0.3463, - 0.8489, - 0.3167, - 0.02, - 0.3422, - 0.31, - 0.3086, - 0.8311, - 0.3367, - 0.8744, - 0.8722, - 0.9028, - 0.8656, - 0.3263, - 0.8611, - 0.8589, - 0.8744, - 0.87, - 0.8106, - 0.8061, - 0.2889, - 0.7624 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 65, - "steps": 34, - "score": 0.6769, - "total_reward": 16.2452, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.879, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 5, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3408,43 +2567,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.8933, - 0.9461, - 0.3689, - 0.8844, - 0.86, - 0.8556, - 0.9283, - 0.3511, - 0.8989, - 0.3244, - 0.8978, - 0.8856, - 0.8833, - 0.3011, - 0.8767, - 0.8544, - 0.8878, - 0.8656, - 0.3011, - 0.2789, - 0.8817, - 0.01 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 66, - "steps": 34, - "score": 0.5832, - "total_reward": 15.7473, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.662, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 6, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 3, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3457,47 +2601,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.9211, - 0.9167, - 0.3644, - 0.91, - 0.8756, - 0.9033, - 0.3511, - 0.8989, - 0.02, - 0.8944, - 0.86, - 0.02, - 0.3063, - 0.8789, - 0.8767, - 0.0774, - 0.02, - 0.8878, - 0.8906, - 0.0663, - 0.0641, - 0.8589, - 0.3067, - 0.4526 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 67, - "steps": 34, - "score": 0.5986, - "total_reward": 14.3671, - "completion_rate": 0.7, - "detection_rate": 0.0, - "trust_calibration": 0.891, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 7, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3509,42 +2635,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.02, - 0.9233, - 0.3489, - 0.9144, - 0.3622, - 0.36, - 0.9328, - 0.3003, - 0.9011, - 0.8667, - 0.8922, - 0.8378, - 0.02, - 0.8833, - 0.8489, - 0.9194, - 0.89, - 0.8678, - 0.8811, - 0.0619, - 0.3397, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 68, - "steps": 33, - "score": 0.7306, - "total_reward": 18.2654, - "completion_rate": 0.9, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 8, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, "detection_rate": 1.0, - "trust_calibration": 0.684, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3558,44 +2669,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9233, - 0.3481, - 0.9167, - 0.3644, - 0.9122, - 0.91, - 0.9078, - 0.9033, - 0.3511, - 0.8989, - 0.3297, - 0.8922, - 0.8878, - 0.02, - 0.02, - 0.8811, - 0.9039, - 0.8767, - 0.8722, - 0.9095, - 0.8656, - 0.8611, - 0.8126 + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 69, - "steps": 40, - "score": 0.7144, - "total_reward": 18.574, - "completion_rate": 0.95, - "detection_rate": 0.0, - "trust_calibration": 0.723, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 9, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3608,45 +2703,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9506, - 0.9233, - 0.9189, - 0.9167, - 0.3422, - 0.8756, - 0.9283, - 0.9011, - 0.8989, - 0.02, - 0.3122, - 0.8556, - 0.8833, - 0.8467, - 0.8222, - 0.8678, - 0.8656, - 0.3133, - 0.8239, - 0.0597, - 0.8544, - 0.87, - 0.8106, - 0.5244 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 70, - "steps": 34, - "score": 0.6867, - "total_reward": 17.1674, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.786, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 10, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -3659,45 +2737,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.02, - 0.8889, - 0.9144, - 0.33, - 0.9078, - 0.9056, - 0.9033, - 0.8489, - 0.3497, - 0.8944, - 0.8922, - 0.8828, - 0.8856, - 0.8833, - 0.8789, - 0.3774, - 0.87, - 0.3706, - 0.0663, - 0.8239, - 0.8567, - 0.5184 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 71, - "steps": 29, - "score": 0.5734, - "total_reward": 14.3348, - "completion_rate": 0.65, - "detection_rate": 0.0, - "trust_calibration": 0.702, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 11, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 3, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3709,45 +2771,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.8911, - 0.3459, - 0.9167, - 0.3644, - 0.88, - 0.9328, - 0.8711, - 0.8989, - 0.8967, - 0.3574, - 0.3422, - 0.89, - 0.8856, - 0.02, - 0.8811, - 0.0819, - 0.3267, - 0.3244, - 0.8722, - 0.353, - 0.0708, - 0.01 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 72, - "steps": 30, - "score": 0.7073, - "total_reward": 16.2683, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.584, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 12, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3759,41 +2805,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9483, - 0.3489, - 0.8844, - 0.91, - 0.9078, - 0.9306, - 0.9283, - 0.9011, - 0.3259, - 0.8644, - 0.84, - 0.8878, - 0.9106, - 0.3033, - 0.8811, - 0.9039, - 0.8394, - 0.87, - 0.02, - 0.8906, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 73, - "steps": 34, - "score": 0.7782, - "total_reward": 18.6761, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 13, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.803, - "adversarial_detections": 2, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3807,44 +2839,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9233, - 0.9189, - 0.9167, - 0.3644, - 0.91, - 0.9078, - 0.9056, - 0.3533, - 0.02, - 0.8467, - 0.8944, - 0.28, - 0.8878, - 0.8333, - 0.8811, - 0.8267, - 0.8372, - 0.8328, - 0.8656, - 0.8883, - 0.9006, - 0.8577 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 74, - "steps": 33, - "score": 0.7164, - "total_reward": 17.1935, - "completion_rate": 0.9, - "detection_rate": 0.3333, - "trust_calibration": 0.957, - "adversarial_detections": 1, - "adversarial_poisonings": 2, - "status": "failed", + "policy": "oracle_lite", + "task_type": "task1", + "seed": 14, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3856,42 +2873,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9506, - 0.9233, - 0.9211, - 0.8844, - 0.86, - 0.9056, - 0.3233, - 0.3189, - 0.9217, - 0.8944, - 0.3352, - 0.89, - 0.8856, - 0.8311, - 0.0819, - 0.9161, - 0.8722, - 0.8678, - 0.8906, - 0.8633, - 0.8589, - 0.01 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 75, - "steps": 40, - "score": 0.6588, - "total_reward": 18.4472, - "completion_rate": 0.8, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 15, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, "detection_rate": 1.0, - "trust_calibration": 0.642, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -3905,48 +2907,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.8911, - 0.8667, - 0.3344, - 0.86, - 0.3278, - 0.3256, - 0.9033, - 0.3511, - 0.3259, - 0.8444, - 0.89, - 0.8856, - 0.3103, - 0.8811, - 0.8867, - 0.8994, - 0.8722, - 0.3378, - 0.3333, - 0.2811, - 0.9017, - 0.8544, - 0.8917, - 0.2756, - 0.7515 + 0.3525, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 76, - "steps": 28, - "score": 0.7202, - "total_reward": 15.8447, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 16, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.793, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -3958,40 +2941,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3526, - 0.9233, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.935, - 0.9078, - 0.8983, - 0.8689, - 0.8967, - 0.8944, - 0.8922, - 0.8878, - 0.9033, - 0.3189, - 0.8394, - 0.3222, - 0.333, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 77, - "steps": 34, - "score": 0.6731, - "total_reward": 16.8286, - "completion_rate": 0.8, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 17, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.682, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -4005,44 +2975,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9233, - 0.9211, - 0.02, - 0.9167, - 0.9394, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.8989, - 0.8894, - 0.3422, - 0.89, - 0.2756, - 0.8311, - 0.3467, - 0.2944, - 0.9117, - 0.8678, - 0.3363, - 0.02, - 0.8567, - 0.775 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 78, - "steps": 44, - "score": 0.627, - "total_reward": 18.1844, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 18, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, - "detection_rate": 0.5, - "trust_calibration": 0.849, - "adversarial_detections": 1, - "adversarial_poisonings": 1, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4055,48 +3009,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.8933, - 0.3159, - 0.8822, - 0.8578, - 0.9033, - 0.9011, - 0.8917, - 0.8944, - 0.8578, - 0.3356, - 0.8833, - 0.2989, - 0.3267, - 0.3374, - 0.2922, - 0.32, - 0.8156, - 0.8711, - 0.8589, - 0.3022, - 0.3, - 0.0508, - 0.8883, - 0.8661, - 0.3119, - 0.2567, - 0.6445 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 79, - "steps": 37, - "score": 0.6279, - "total_reward": 16.9527, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.561, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 19, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4109,46 +3043,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9233, - 0.8867, - 0.9144, - 0.9122, - 0.02, - 0.8756, - 0.9011, - 0.8989, - 0.3467, - 0.8622, - 0.89, - 0.8556, - 0.8511, - 0.9039, - 0.3267, - 0.0774, - 0.02, - 0.343, - 0.8656, - 0.3011, - 0.8589, - 0.3067, - 0.8172, - 0.333, - 0.4381 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 80, - "steps": 40, - "score": 0.6766, - "total_reward": 18.9455, - "completion_rate": 0.9, - "detection_rate": 0.0, - "trust_calibration": 0.946, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 20, + "steps": 16, + "score": 0.7588, + "total_reward": 6.8295, + "completion_rate": 0.5, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4161,51 +3077,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9483, - 0.3489, - 0.8644, - 0.3622, - 0.91, - 0.9328, - 0.9056, - 0.8511, - 0.3189, - 0.3467, - 0.3144, - 0.32, - 0.8933, - 0.8811, - 0.8267, - 0.8994, - 0.8722, - 0.8678, - 0.0686, - 0.9061, - 0.0619, - 0.8194, - 0.323, - 0.8456, - 0.5627 + 0.3525, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.3525, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 81, - "steps": 33, - "score": 0.7394, - "total_reward": 17.745, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.704, - "adversarial_detections": 1, - "adversarial_poisonings": 1, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 21, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, @@ -4214,41 +3111,26 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9233, - 0.9211, - 0.9167, - 0.8822, - 0.91, - 0.9078, - 0.3256, - 0.3533, - 0.9261, - 0.8467, - 0.02, - 0.8922, - 0.3078, - 0.8856, - 0.8833, - 0.0841, - 0.8967, - 0.8744, - 0.87, - 0.9073, - 0.8261, - 0.6676 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 82, - "steps": 39, - "score": 0.6307, - "total_reward": 17.661, - "completion_rate": 0.75, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 22, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, "detection_rate": 1.0, - "trust_calibration": 0.581, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4263,48 +3145,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9233, - 0.8889, - 0.3667, - 0.9122, - 0.2978, - 0.3586, - 0.9033, - 0.3289, - 0.9294, - 0.89, - 0.8556, - 0.3663, - 0.3311, - 0.3289, - 0.8767, - 0.8994, - 0.3222, - 0.32, - 0.3528, - 0.3683, - 0.8411, - 0.8567, - 0.02, - 0.815, - 0.8478, - 0.7211 + 0.3525, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 83, - "steps": 33, - "score": 0.6648, - "total_reward": 15.9564, - "completion_rate": 0.8, - "detection_rate": 0.25, - "trust_calibration": 0.917, - "adversarial_detections": 1, - "adversarial_poisonings": 3, - "status": "failed", + "policy": "oracle_lite", + "task_type": "task1", + "seed": 23, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -4316,42 +3179,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.3663, - 0.9211, - 0.9439, - 0.9144, - 0.3322, - 0.9078, - 0.9306, - 0.8961, - 0.8789, - 0.8967, - 0.8944, - 0.86, - 0.8356, - 0.8811, - 0.9184, - 0.3474, - 0.3552, - 0.073, - 0.0708, - 0.8633, - 0.9039, - 0.01 + 0.3525, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 84, - "steps": 39, - "score": 0.7325, - "total_reward": 19.0457, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.851, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -4365,46 +3213,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.9233, - 0.02, - 0.3067, - 0.9122, - 0.8578, - 0.3256, - 0.8511, - 0.8989, - 0.8444, - 0.8922, - 0.8578, - 0.8833, - 0.8811, - 0.8767, - 0.8744, - 0.3252, - 0.8678, - 0.8656, - 0.3363, - 0.8589, - 0.8961, - 0.85, - 0.2978, - 0.8586 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 85, - "steps": 32, - "score": 0.661, - "total_reward": 15.2038, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.807, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 25, + "steps": 16, + "score": 0.7588, + "total_reward": 6.8295, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -4416,42 +3247,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9483, - 0.9211, - 0.9189, - 0.9144, - 0.9122, - 0.33, - 0.3356, - 0.3533, - 0.3211, - 0.8967, - 0.86, - 0.8533, - 0.8811, - 0.3289, - 0.8767, - 0.8744, - 0.8722, - 0.8856, - 0.0663, - 0.8861, - 0.01 + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 86, - "steps": 44, - "score": 0.6056, - "total_reward": 19.3805, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.837, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 26, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4464,52 +3281,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.8933, - 0.3489, - 0.8844, - 0.9372, - 0.9078, - 0.3556, - 0.8689, - 0.8967, - 0.3222, - 0.89, - 0.3078, - 0.8856, - 0.9083, - 0.3311, - 0.2967, - 0.3274, - 0.88, - 0.8678, - 0.2926, - 0.3133, - 0.8861, - 0.0619, - 0.8544, - 0.3022, - 0.8678, - 0.8456, - 0.8061, - 0.3119, - 0.02, - 0.5089 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 87, - "steps": 37, - "score": 0.6574, - "total_reward": 16.4362, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.807, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 27, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -4521,44 +3315,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8933, - 0.9211, - 0.9439, - 0.8644, - 0.86, - 0.9078, - 0.8733, - 0.9011, - 0.8667, - 0.8922, - 0.8556, - 0.8833, - 0.3311, - 0.8789, - 0.8767, - 0.3422, - 0.8878, - 0.2856, - 0.8261, - 0.0619, - 0.8994, - 0.02, - 0.303, - 0.01 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 88, - "steps": 36, - "score": 0.5911, - "total_reward": 15.369, - "completion_rate": 0.75, - "detection_rate": 0.0, - "trust_calibration": 0.855, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 28, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 3, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4571,44 +3349,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.02, - 0.8733, - 0.9461, - 0.8667, - 0.9144, - 0.905, - 0.9078, - 0.9056, - 0.3233, - 0.8989, - 0.8967, - 0.8922, - 0.3178, - 0.02, - 0.02, - 0.0841, - 0.8967, - 0.0774, - 0.0752, - 0.8678, - 0.8633, - 0.8789, - 0.8194, - 0.3022, - 0.4961 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 89, - "steps": 34, - "score": 0.7869, - "total_reward": 20.4589, - "completion_rate": 1.0, + "policy": "oracle_lite", + "task_type": "task1", + "seed": 29, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.809, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -4622,46 +3383,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8956, - 0.9233, - 0.3481, - 0.9439, - 0.3367, - 0.9144, - 0.9122, - 0.337, - 0.9056, - 0.9033, - 0.9011, - 0.9239, - 0.8644, - 0.8922, - 0.89, - 0.8878, - 0.925, - 0.3441, - 0.8967, - 0.3474, - 0.87, - 0.8928, - 0.8283, - 0.8789, - 0.8766 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 90, - "steps": 31, - "score": 0.6723, - "total_reward": 14.7899, + "policy": "trained", + "task_type": "task1", + "seed": 0, + "steps": 13, + "score": 0.753, + "total_reward": 10.5415, "completion_rate": 0.8, - "detection_rate": 0.3333, - "trust_calibration": 0.902, - "adversarial_detections": 1, - "adversarial_poisonings": 2, - "status": "failed", + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -4673,39 +3417,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9206, - 0.8889, - 0.3367, - 0.8822, - 0.8578, - 0.9056, - 0.9033, - 0.9089, - 0.02, - 0.8922, - 0.89, - 0.8878, - 0.8856, - 0.8833, - 0.9061, - 0.8767, - 0.0774, - 0.02, - 0.9095, - 0.8633, - 0.01 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.3455, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 91, - "steps": 44, - "score": 0.6543, - "total_reward": 18.9747, - "completion_rate": 0.85, + "policy": "trained", + "task_type": "task1", + "seed": 1, + "steps": 12, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.289, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4720,49 +3456,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3111, - 0.9189, - 0.3667, - 0.88, - 0.3278, - 0.9056, - 0.8711, - 0.8667, - 0.3222, - 0.34, - 0.9128, - 0.3133, - 0.3311, - 0.3367, - 0.84, - 0.8156, - 0.8633, - 0.8239, - 0.2767, - 0.8544, - 0.85, - 0.8728, - 0.8883, - 0.8039, - 0.2744, - 0.672 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 92, - "steps": 39, - "score": 0.6758, - "total_reward": 16.8941, - "completion_rate": 0.85, - "detection_rate": 0.0, - "trust_calibration": 0.804, + "policy": "trained", + "task_type": "task1", + "seed": 2, + "steps": 11, + "score": 0.8612, + "total_reward": 10.3345, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 1, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -4774,42 +3494,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9256, - 0.9233, - 0.9211, - 0.3689, - 0.3367, - 0.9144, - 0.3322, - 0.8578, - 0.9033, - 0.8667, - 0.3122, - 0.2856, - 0.8489, - 0.2967, - 0.8994, - 0.8378, - 0.8633, - 0.8239, - 0.8544, - 0.8522, - 0.85, - 0.8106, - 0.01 + 0.369, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 93, - "steps": 38, - "score": 0.7063, - "total_reward": 17.6574, - "completion_rate": 0.85, + "policy": "trained", + "task_type": "task1", + "seed": 3, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.542, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -4824,44 +3531,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.8756, - 0.8711, - 0.3389, - 0.9167, - 0.9144, - 0.9122, - 0.9178, - 0.9033, - 0.8689, - 0.8644, - 0.3422, - 0.8878, - 0.8833, - 0.8289, - 0.8767, - 0.8372, - 0.02, - 0.8306, - 0.8789, - 0.8567, - 0.3274, - 0.3352, - 0.27, - 0.7487 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 94, - "steps": 35, - "score": 0.6388, - "total_reward": 15.969, - "completion_rate": 0.75, - "detection_rate": 0.5, - "trust_calibration": 0.941, - "adversarial_detections": 1, - "adversarial_poisonings": 1, + "policy": "trained", + "task_type": "task1", + "seed": 4, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4874,44 +3565,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9233, - 0.9139, - 0.02, - 0.9144, - 0.02, - 0.935, - 0.9078, - 0.3256, - 0.8711, - 0.3489, - 0.3237, - 0.8422, - 0.8878, - 0.8333, - 0.8789, - 0.3044, - 0.3222, - 0.073, - 0.2878, - 0.8633, - 0.9006, - 0.8567, - 0.6701 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 95, - "steps": 35, - "score": 0.6469, - "total_reward": 17.465, - "completion_rate": 0.8, - "detection_rate": 0.0, - "trust_calibration": 0.834, + "policy": "trained", + "task_type": "task1", + "seed": 5, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -4924,45 +3602,27 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8933, - 0.8689, - 0.3667, - 0.3644, - 0.307, - 0.3578, - 0.9056, - 0.9283, - 0.9011, - 0.8967, - 0.8944, - 0.8922, - 0.89, - 0.8856, - 0.9161, - 0.2989, - 0.3267, - 0.3474, - 0.0752, - 0.073, - 0.8306, - 0.8261, - 0.3089, - 0.8194, - 0.5106 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 96, - "steps": 36, - "score": 0.7393, - "total_reward": 17.7436, - "completion_rate": 0.9, + "policy": "trained", + "task_type": "task1", + "seed": 6, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.619, - "adversarial_detections": 1, + "trust_calibration": 0.0, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -4976,41 +3636,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.8733, - 0.3167, - 0.92, - 0.9056, - 0.9033, - 0.9011, - 0.3189, - 0.8622, - 0.02, - 0.8878, - 0.8933, - 0.8811, - 0.9039, - 0.8767, - 0.8744, - 0.8722, - 0.87, - 0.8306, - 0.02, - 0.3289, - 0.8961, - 0.7897 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 97, - "steps": 36, - "score": 0.7834, - "total_reward": 18.8027, - "completion_rate": 0.95, + "policy": "trained", + "task_type": "task1", + "seed": 7, + "steps": 16, + "score": 0.8166, + "total_reward": 9.7988, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.813, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5025,41 +3672,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.8711, - 0.9439, - 0.8844, - 0.86, - 0.9078, - 0.9306, - 0.8711, - 0.8989, - 0.8967, - 0.3214, - 0.84, - 0.8856, - 0.8811, - 0.8767, - 0.3244, - 0.835, - 0.2878, - 0.3156, - 0.8611, - 0.8589, - 0.8544, - 0.8557 + 0.962, + 0.962, + 0.3686, + 0.962, + 0.3826, + 0.3896, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 98, - "steps": 37, - "score": 0.7572, - "total_reward": 18.9302, - "completion_rate": 0.95, + "policy": "trained", + "task_type": "task1", + "seed": 8, + "steps": 16, + "score": 0.8399, + "total_reward": 8.3989, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.605, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5074,44 +3709,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.02, - 0.8889, - 0.8967, - 0.8622, - 0.9078, - 0.3256, - 0.8711, - 0.8989, - 0.8944, - 0.3422, - 0.89, - 0.3308, - 0.8333, - 0.3341, - 0.8767, - 0.9172, - 0.8678, - 0.8656, - 0.8811, - 0.8839, - 0.8194, - 0.8014 + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { - "policy": "random", - "task_type": "task3", - "seed": 99, - "steps": 37, - "score": 0.6531, - "total_reward": 18.9394, - "completion_rate": 0.9, - "detection_rate": 0.0, - "trust_calibration": 0.623, + "policy": "trained", + "task_type": "task1", + "seed": 9, + "steps": 16, + "score": 0.785, + "total_reward": 10.2052, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 2, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5124,46 +3744,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9483, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.333, - 0.9078, - 0.3011, - 0.9239, - 0.3144, - 0.89, - 0.8556, - 0.3033, - 0.8811, - 0.8789, - 0.9017, - 0.0774, - 0.835, - 0.0708, - 0.8656, - 0.02, - 0.9039, - 0.3297, - 0.8172, - 0.3, - 0.4885 + 0.962, + 0.962, + 0.962, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 0, - "steps": 43, - "score": 0.7114, - "total_reward": 18.4969, - "completion_rate": 0.85, + "policy": "trained", + "task_type": "task1", + "seed": 10, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.729, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5178,45 +3782,30 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.3344, - 0.91, - 0.9056, - 0.9011, - 0.3267, - 0.8922, - 0.8878, - 0.8833, - 0.8789, - 0.8744, - 0.3, - 0.2956, - 0.8611, - 0.2867, - 0.8522, - 0.8478, - 0.8433, - 0.8389, - 0.7841 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 1, - "steps": 29, - "score": 0.7083, - "total_reward": 17.707, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.721, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "trained", + "task_type": "task1", + "seed": 11, + "steps": 16, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5229,44 +3818,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6632 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 2, - "steps": 29, - "score": 0.6919, - "total_reward": 17.2983, - "completion_rate": 0.85, - "detection_rate": 0.4444, - "trust_calibration": 0.561, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "policy": "trained", + "task_type": "task1", + "seed": 12, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5279,42 +3856,26 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3778, - 0.3456, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6065 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 3, - "steps": 42, - "score": 0.8546, - "total_reward": 18.8008, - "completion_rate": 1.0, + "policy": "trained", + "task_type": "task1", + "seed": 13, + "steps": 16, + "score": 0.9003, + "total_reward": 9.0035, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.843, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5329,43 +3890,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8675 + 0.962, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 4, - "steps": 29, - "score": 0.7165, - "total_reward": 17.9128, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.721, - "adversarial_detections": 4, - "adversarial_poisonings": 5, - "status": "completed", - "difficulty_profile": { + "policy": "trained", + "task_type": "task1", + "seed": 14, + "steps": 13, + "score": 0.7534, + "total_reward": 10.5473, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, @@ -5376,42 +3925,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.3414, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.664 + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 5, - "steps": 46, - "score": 0.7558, - "total_reward": 18.1385, - "completion_rate": 0.9, + "policy": "trained", + "task_type": "task1", + "seed": 15, + "steps": 13, + "score": 0.8312, + "total_reward": 11.6374, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.832, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5426,43 +3964,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.3422, - 0.9078, - 0.3333, - 0.8989, - 0.8944, - 0.32, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.2711, - 0.8367, - 0.8322, - 0.8229 + 0.3546, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 6, - "steps": 27, - "score": 0.6991, - "total_reward": 16.778, - "completion_rate": 0.85, - "detection_rate": 0.4, - "trust_calibration": 0.725, - "adversarial_detections": 4, - "adversarial_poisonings": 6, + "policy": "trained", + "task_type": "task1", + "seed": 16, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5475,41 +4003,26 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.1019, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.6387 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 7, - "steps": 42, - "score": 0.7756, - "total_reward": 19.3902, - "completion_rate": 0.95, + "policy": "trained", + "task_type": "task1", + "seed": 17, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, "detection_rate": 1.0, - "trust_calibration": 0.835, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5524,42 +4037,26 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.9211, - 0.3919, - 0.3997, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.2933, - 0.8589, - 0.8544, - 0.28, - 0.8456, - 0.8411, - 0.8478 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 8, - "steps": 44, - "score": 0.809, - "total_reward": 19.4157, + "policy": "trained", + "task_type": "task1", + "seed": 18, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.853, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5574,41 +4071,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3526, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.3333, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.2756, - 0.8411, - 0.8367, - 0.8654 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 9, - "steps": 40, - "score": 0.782, - "total_reward": 19.5499, - "completion_rate": 0.95, + "policy": "trained", + "task_type": "task1", + "seed": 19, + "steps": 12, + "score": 0.8675, + "total_reward": 11.2779, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.837, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5623,44 +4107,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3841, - 0.9189, - 0.3997, - 0.3994, - 0.3972, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.8528 + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 10, - "steps": 31, - "score": 0.712, - "total_reward": 17.8008, - "completion_rate": 0.85, - "detection_rate": 0.625, - "trust_calibration": 0.448, - "adversarial_detections": 5, - "adversarial_poisonings": 3, + "policy": "trained", + "task_type": "task1", + "seed": 20, + "steps": 16, + "score": 0.7993, + "total_reward": 8.7927, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5673,42 +4145,28 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3303, - 0.3281, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.9295, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.3356, - 0.6281 + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 11, - "steps": 40, - "score": 0.7732, - "total_reward": 18.5566, - "completion_rate": 0.9, + "policy": "trained", + "task_type": "task1", + "seed": 21, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.835, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5723,41 +4181,26 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.2978, - 0.2933, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.8349 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 12, - "steps": 42, - "score": 0.8546, - "total_reward": 18.8009, + "policy": "trained", + "task_type": "task1", + "seed": 22, + "steps": 15, + "score": 0.772, + "total_reward": 12.3526, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.844, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5772,39 +4215,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8676 + 0.369, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.3504, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 13, - "steps": 39, - "score": 0.833, - "total_reward": 18.3252, - "completion_rate": 0.95, + "policy": "trained", + "task_type": "task1", + "seed": 23, + "steps": 11, + "score": 0.8606, + "total_reward": 10.3271, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.811, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5819,41 +4256,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3456, - 0.3433, - 0.9189, - 0.9144, - 0.91, - 0.9056, - 0.9011, - 0.8967, - 0.8922, - 0.8878, - 0.8833, - 0.8789, - 0.8744, - 0.87, - 0.8656, - 0.8611, - 0.8567, - 0.8522, - 0.8478, - 0.8485 + 0.962, + 0.3616, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 14, - "steps": 29, - "score": 0.6889, - "total_reward": 17.9127, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.609, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "trained", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.8161, + "total_reward": 9.7931, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5866,45 +4293,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.3841, - 0.3689, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.3237, - 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.6353 + 0.962, + 0.3616, + 0.982, + 0.369, + 0.982, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 15, - "steps": 30, - "score": 0.6847, - "total_reward": 18.4869, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.635, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "policy": "trained", + "task_type": "task1", + "seed": 25, + "steps": 14, + "score": 0.6506, + "total_reward": 9.7585, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -5917,44 +4330,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3608, - 0.9256, - 0.9233, - 0.9211, - 0.3459, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.3348, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.0841, - 0.9184, - 0.9139, - 0.9095, - 0.6404 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.946, + 0.369, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 16, - "steps": 42, - "score": 0.8546, - "total_reward": 18.8009, - "completion_rate": 1.0, + "policy": "trained", + "task_type": "task1", + "seed": 26, + "steps": 16, + "score": 0.7054, + "total_reward": 11.2865, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.844, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -5969,39 +4370,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8676 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.3645, + 0.369, + 0.369, + 0.3645, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 17, - "steps": 46, - "score": 0.8048, - "total_reward": 19.316, - "completion_rate": 1.0, + "policy": "trained", + "task_type": "task1", + "seed": 27, + "steps": 15, + "score": 0.6937, + "total_reward": 9.0177, + "completion_rate": 0.6, "detection_rate": 1.0, - "trust_calibration": 0.842, + "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6016,43 +4411,32 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.3467, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.28, - 0.8456, - 0.8411, - 0.8367, - 0.8322, - 0.8605 + 0.962, + 0.962, + 0.3686, + 0.962, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.391, + 0.962, + 0.3525, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 18, - "steps": 26, - "score": 0.6967, - "total_reward": 16.7213, - "completion_rate": 0.85, - "detection_rate": 0.3333, - "trust_calibration": 0.701, - "adversarial_detections": 3, - "adversarial_poisonings": 6, + "policy": "trained", + "task_type": "task1", + "seed": 28, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6065,44 +4449,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.6149 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 19, - "steps": 20, - "score": 0.6193, - "total_reward": 13.0053, - "completion_rate": 0.65, - "detection_rate": 0.0, - "trust_calibration": 0.576, + "policy": "trained", + "task_type": "task1", + "seed": 29, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, "adversarial_detections": 0, - "adversarial_poisonings": 5, - "status": "failed", + "adversarial_poisonings": 0, + "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -6114,38 +4485,29 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.3437, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.01 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 20, - "steps": 46, - "score": 0.7498, - "total_reward": 19.4938, - "completion_rate": 0.95, + "policy": "random", + "task_type": "task2", + "seed": 0, + "steps": 25, + "score": 0.5298, + "total_reward": 9.5365, + "completion_rate": 0.667, "detection_rate": 1.0, - "trust_calibration": 0.835, + "trust_calibration": 0.461, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6160,43 +4522,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.3841, + 0.9083, + 0.925, 0.9167, - 0.9122, - 0.3378, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.3022, - 0.8678, - 0.8633, - 0.8589, - 0.2844, - 0.85, - 0.8456, - 0.8411, - 0.8367, - 0.8322, - 0.8412 + 0.02, + 0.8917, + 0.8833, + 0.8667, + 0.2233, + 0.02, + 0.02, + 0.725, + 0.8083, + 0.6917, + 0.775, + 0.1317, + 0.115, + 0.5949 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 21, - "steps": 42, - "score": 0.8546, - "total_reward": 18.8009, + "policy": "random", + "task_type": "task2", + "seed": 1, + "steps": 31, + "score": 0.7252, + "total_reward": 13.054, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.844, + "trust_calibration": 0.569, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6211,41 +4565,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.2756, - 0.8411, - 0.8676 + 0.9417, + 0.9833, + 0.8833, + 0.8583, + 0.875, + 0.7583, + 0.8417, + 0.165, + 0.7833, + 0.8, + 0.6833, + 0.735, + 0.1217, + 0.7167, + 0.7, + 0.7083, + 0.849 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 22, - "steps": 36, - "score": 0.7334, - "total_reward": 21.2675, - "completion_rate": 1.0, - "detection_rate": 0.8, - "trust_calibration": 0.747, - "adversarial_detections": 4, - "adversarial_poisonings": 1, + "policy": "random", + "task_type": "task2", + "seed": 2, + "steps": 27, + "score": 0.6551, + "total_reward": 11.7913, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.579, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6258,48 +4608,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3778, - 0.9256, - 0.9233, - 0.3481, - 0.9189, + 0.3517, + 0.262, 0.9167, - 0.9144, - 0.9122, - 0.337, - 0.9078, - 0.3326, - 0.9033, - 0.9011, - 0.3259, - 0.8967, - 0.3214, - 0.8922, - 0.89, - 0.3148, - 0.8856, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.9073, - 0.8789, - 0.8544, - 0.7968 + 0.9083, + 0.8583, + 0.875, + 0.7583, + 0.7417, + 0.825, + 0.7083, + 0.8, + 0.6833, + 0.6667, + 0.75, + 0.0967, + 0.0983, + 0.766 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 23, + "policy": "random", + "task_type": "task2", + "seed": 3, "steps": 28, - "score": 0.6922, - "total_reward": 17.3057, - "completion_rate": 0.85, - "detection_rate": 0.4444, - "trust_calibration": 0.645, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "score": 0.6154, + "total_reward": 12.9233, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.586, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6312,42 +4651,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3686, - 0.3433, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6298 + 0.99, + 0.9833, + 0.925, + 0.2717, + 0.8, + 0.8917, + 0.9333, + 0.875, + 0.2217, + 0.2233, + 0.7417, + 0.8333, + 0.1483, + 0.7917, + 0.1383, + 0.7333, + 0.75, + 0.7083, + 0.123, + 0.7686 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 24, - "steps": 46, - "score": 0.7725, - "total_reward": 20.0838, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task2", + "seed": 4, + "steps": 28, + "score": 0.6575, + "total_reward": 12.4928, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.836, + "trust_calibration": 0.458, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6362,43 +4697,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3686, - 0.9483, - 0.3711, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.3022, - 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, - 0.8456, - 0.2711, - 0.8367, - 0.8322, - 0.8591 - ] - }, - { - "policy": "heuristic", - "task_type": "task3", - "seed": 25, - "steps": 34, - "score": 0.6755, - "total_reward": 18.9148, - "completion_rate": 0.8, + 0.9417, + 0.9833, + 0.925, + 0.2817, + 0.8667, + 0.8833, + 0.23, + 0.2113, + 0.8417, + 0.8, + 0.775, + 0.7917, + 0.775, + 0.7583, + 0.75, + 0.1067, + 0.1313, + 0.7235 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 5, + "steps": 26, + "score": 0.6206, + "total_reward": 12.4128, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.71, + "trust_calibration": 0.408, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6413,47 +4741,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, - 0.3322, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.3189, - 0.3167, - 0.8944, - 0.3122, - 0.31, - 0.3078, - 0.8856, - 0.9011, - 0.8967, - 0.8922, - 0.3078, - 0.3033, - 0.8789, - 0.782 + 0.9417, + 0.9, + 0.9167, + 0.24, + 0.2467, + 0.2383, + 0.7667, + 0.9083, + 0.85, + 0.1967, + 0.725, + 0.8667, + 0.165, + 0.7917, + 0.7833, + 0.7667, + 0.02, + 0.65, + 0.7061 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 26, - "steps": 34, - "score": 0.6561, - "total_reward": 19.0282, - "completion_rate": 0.8, - "detection_rate": 0.6667, - "trust_calibration": 0.467, - "adversarial_detections": 2, - "adversarial_poisonings": 1, + "policy": "random", + "task_type": "task2", + "seed": 6, + "steps": 31, + "score": 0.7065, + "total_reward": 12.7163, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.576, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6466,46 +4786,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.8989, - 0.3667, - 0.3644, - 0.9372, - 0.935, - 0.3578, - 0.3556, - 0.3533, - 0.3289, - 0.8967, - 0.8944, - 0.8922, - 0.317, - 0.8878, - 0.3126, + 0.9083, 0.8833, - 0.0841, - 0.9184, - 0.9139, - 0.8878, - 0.8656, - 0.3163, - 0.3419, - 0.6213 + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8833, + 0.7417, + 0.188, + 0.8667, + 0.13, + 0.7583, + 0.7667, + 0.7583, + 0.7083, + 0.7, + 0.8083 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 27, - "steps": 46, - "score": 0.7256, - "total_reward": 20.3155, - "completion_rate": 0.95, + "policy": "random", + "task_type": "task2", + "seed": 7, + "steps": 25, + "score": 0.7366, + "total_reward": 12.5218, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.828, + "trust_calibration": 0.776, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6520,47 +4829,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.9211, - 0.9189, - 0.3997, - 0.3994, - 0.3972, - 0.9078, - 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.2844, + 0.8333, + 0.8917, + 0.875, + 0.8917, + 0.775, + 0.8583, 0.85, - 0.2756, - 0.8411, - 0.8367, - 0.8322, - 0.8395 + 0.8333, + 0.825, + 0.1817, + 0.8, + 0.6833, + 0.825, + 0.02, + 0.7583, + 0.8784 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 28, - "steps": 28, - "score": 0.7355, - "total_reward": 17.6509, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.725, - "adversarial_detections": 5, - "adversarial_poisonings": 5, + "policy": "random", + "task_type": "task2", + "seed": 8, + "steps": 25, + "score": 0.7329, + "total_reward": 13.9253, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.425, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6573,43 +4871,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9333, + 0.335, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.1019, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.9273, - 0.9228, - 0.9184, - 0.9139, - 0.6841 + 0.8667, + 0.8833, + 0.875, + 0.2767, + 0.8583, + 0.9, + 0.8, + 0.8167, + 0.7, + 0.1233, + 0.775, + 0.7667, + 0.8083, + 0.75, + 0.7987 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 29, - "steps": 28, - "score": 0.7258, - "total_reward": 18.144, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.709, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "random", + "task_type": "task2", + "seed": 9, + "steps": 27, + "score": 0.7062, + "total_reward": 12.712, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.597, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6622,42 +4915,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.3392, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.3281, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6801 + 0.99, + 0.9333, + 0.925, + 0.8083, + 0.278, + 0.8583, + 0.925, + 0.02, + 0.2233, + 0.8417, + 0.825, + 0.8167, + 0.735, + 0.6833, + 0.7333, + 0.65, + 0.8157 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 30, - "steps": 39, - "score": 0.6238, - "total_reward": 18.7149, - "completion_rate": 0.75, + "policy": "random", + "task_type": "task2", + "seed": 10, + "steps": 26, + "score": 0.5723, + "total_reward": 12.0174, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.712, + "trust_calibration": 0.855, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6672,48 +4958,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3411, - 0.9189, - 0.9167, - 0.9144, - 0.3322, - 0.33, - 0.9078, - 0.3256, - 0.3233, - 0.9011, - 0.8989, - 0.8967, - 0.3144, - 0.8922, - 0.31, - 0.3078, - 0.3233, - 0.8989, - 0.8944, - 0.89, - 0.3056, - 0.3011, - 0.2967, - 0.8722, - 0.2878, - 0.7538 + 0.9417, + 0.925, + 0.9667, + 0.8667, + 0.2483, + 0.8667, + 0.02, + 0.205, + 0.8917, + 0.825, + 0.8167, + 0.775, + 0.1567, + 0.02, + 0.153, + 0.7667, + 0.8083, + 0.105, + 0.1067, + 0.8194 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 31, - "steps": 37, - "score": 0.6915, - "total_reward": 20.0529, - "completion_rate": 0.85, + "policy": "random", + "task_type": "task2", + "seed": 11, + "steps": 23, + "score": 0.7197, + "total_reward": 12.955, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.911, - "adversarial_detections": 1, + "trust_calibration": 0.57, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -6727,48 +5004,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, - 0.3322, - 0.91, - 0.3278, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.3697, - 0.3774, - 0.3772, - 0.89, - 0.3728, - 0.3706, - 0.9228, - 0.8944, - 0.87, - 0.8656, - 0.2911, - 0.8567, - 0.8522, - 0.843 + 0.8333, + 0.925, + 0.9167, + 0.2703, + 0.8667, + 0.2483, + 0.8667, + 0.9083, + 0.02, + 0.8083, + 0.825, + 0.8167, + 0.7683, + 0.7917, + 0.7833, + 0.7417, + 0.8063 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 32, + "policy": "random", + "task_type": "task2", + "seed": 12, "steps": 30, - "score": 0.689, - "total_reward": 17.9142, - "completion_rate": 0.8, - "detection_rate": 0.6, - "trust_calibration": 0.712, - "adversarial_detections": 3, - "adversarial_poisonings": 2, + "score": 0.6047, + "total_reward": 12.0935, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.344, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6781,45 +5047,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.3326, - 0.3303, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.9273, - 0.9228, - 0.9184, - 0.8922, - 0.87, - 0.3208, - 0.6714 + 0.99, + 0.9, + 0.2717, + 0.9083, + 0.8267, + 0.8833, + 0.8333, + 0.02, + 0.2197, + 0.1883, + 0.18, + 0.7833, + 0.7917, + 0.7417, + 0.7583, + 0.65, + 0.7, + 0.0633, + 0.6839 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 33, - "steps": 31, - "score": 0.7464, - "total_reward": 18.6604, - "completion_rate": 0.9, - "detection_rate": 0.6667, - "trust_calibration": 0.845, - "adversarial_detections": 4, - "adversarial_poisonings": 2, + "policy": "random", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.6649, + "total_reward": 11.9681, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.271, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6832,42 +5092,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.3437, - 0.3414, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3303, - 0.9011, - 0.8989, - 0.3237, - 0.8944, - 0.0952, - 0.093, - 0.9273, - 0.9228, - 0.9184, - 0.9139, - 0.8856, - 0.7574 + 0.9417, + 0.8917, + 0.8, + 0.8833, + 0.2067, + 0.8583, + 0.8083, + 0.02, + 0.7833, + 0.6917, + 0.75, + 0.6583, + 0.75, + 0.7333, + 0.09, + 0.7, + 0.7014 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 34, - "steps": 40, - "score": 0.8105, - "total_reward": 18.6408, - "completion_rate": 0.95, + "policy": "random", + "task_type": "task2", + "seed": 14, + "steps": 26, + "score": 0.7146, + "total_reward": 13.5771, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.838, + "trust_calibration": 0.416, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6882,42 +5135,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3433, - 0.3411, - 0.9167, - 0.9122, - 0.3378, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9417, + 0.825, + 0.8833, + 0.8667, + 0.2383, + 0.875, + 0.8667, + 0.8583, 0.85, - 0.8456, - 0.853 + 0.8333, + 0.7833, + 0.8, + 0.1797, + 0.7833, + 0.775, + 0.1217, + 0.65, + 0.7957 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 35, - "steps": 28, - "score": 0.7257, - "total_reward": 18.1436, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.708, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "random", + "task_type": "task2", + "seed": 15, + "steps": 27, + "score": 0.5573, + "total_reward": 12.2603, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.606, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -6930,42 +5179,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.3437, - 0.9144, - 0.9122, - 0.91, - 0.3348, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6797 + 0.3037, + 0.9333, + 0.925, + 0.8083, + 0.265, + 0.2567, + 0.8433, + 0.925, + 0.7583, + 0.215, + 0.1967, + 0.1883, + 0.8167, + 0.8, + 0.1567, + 0.675, + 0.7583, + 0.75, + 0.7417, + 0.0883, + 0.732 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 36, - "steps": 42, - "score": 0.7886, - "total_reward": 18.9271, - "completion_rate": 0.95, + "policy": "random", + "task_type": "task2", + "seed": 16, + "steps": 23, + "score": 0.5543, + "total_reward": 11.0864, + "completion_rate": 0.733, "detection_rate": 1.0, - "trust_calibration": 0.854, + "trust_calibration": 0.437, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -6980,45 +5226,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3686, - 0.3433, - 0.3481, + 0.9417, + 0.9333, + 0.925, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.2978, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.8411, - 0.8527 + 0.9083, + 0.2217, + 0.02, + 0.925, + 0.02, + 0.2133, + 0.8167, + 0.825, + 0.1817, + 0.02, + 0.8, + 0.7583, + 0.775, + 0.1317, + 0.6298 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 37, - "steps": 28, - "score": 0.6822, - "total_reward": 17.056, - "completion_rate": 0.85, - "detection_rate": 0.4, - "trust_calibration": 0.659, - "adversarial_detections": 4, - "adversarial_poisonings": 6, - "status": "completed", - "difficulty_profile": { + "policy": "random", + "task_type": "task2", + "seed": 17, + "steps": 27, + "score": 0.5694, + "total_reward": 11.9565, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.688, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, @@ -7029,42 +5271,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.62 + 0.9417, + 0.02, + 0.28, + 0.2613, + 0.7917, + 0.2483, + 0.875, + 0.2213, + 0.85, + 0.8417, + 0.7933, + 0.233, + 0.8667, + 0.7, + 0.7833, + 0.7667, + 0.7583, + 0.168, + 0.0703, + 0.7609 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 38, - "steps": 43, - "score": 0.8297, - "total_reward": 19.0823, - "completion_rate": 1.0, + "policy": "random", + "task_type": "task2", + "seed": 18, + "steps": 23, + "score": 0.6662, + "total_reward": 11.3256, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.774, + "trust_calibration": 0.602, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7079,42 +5317,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9233, - 0.9189, - 0.9144, - 0.91, - 0.9056, - 0.9011, - 0.8967, - 0.8922, - 0.8878, - 0.8833, - 0.8789, - 0.8744, - 0.3, - 0.8656, - 0.8611, - 0.8567, - 0.8522, - 0.8478, - 0.8433, - 0.8389, - 0.8479 + 0.9417, + 0.925, + 0.02, + 0.9, + 0.2567, + 0.2483, + 0.875, + 0.7583, + 0.85, + 0.8083, + 0.825, + 0.8083, + 0.85, + 0.02, + 0.75, + 0.7306 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 39, - "steps": 28, - "score": 0.7161, - "total_reward": 17.9036, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.71, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "policy": "random", + "task_type": "task2", + "seed": 19, + "steps": 27, + "score": 0.7003, + "total_reward": 12.6055, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.625, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7127,42 +5359,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.2633, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3303, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6637 + 0.9083, + 0.8917, + 0.8833, + 0.23, + 0.02, + 0.825, + 0.8917, + 0.825, + 0.7083, + 0.8, + 0.6833, + 0.7417, + 0.7583, + 0.7083, + 0.8255 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 40, - "steps": 37, - "score": 0.6368, - "total_reward": 19.7412, - "completion_rate": 0.8, + "policy": "random", + "task_type": "task2", + "seed": 20, + "steps": 32, + "score": 0.5557, + "total_reward": 12.7811, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.678, + "trust_calibration": 0.636, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7177,48 +5402,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, - 0.9122, - 0.33, - 0.9078, - 0.9056, - 0.3233, - 0.9011, - 0.3189, - 0.3167, - 0.8944, - 0.3122, - 0.89, - 0.3078, - 0.8856, - 0.3033, - 0.8811, - 0.8967, - 0.3122, - 0.3078, + 0.2983, + 0.8167, + 0.8, + 0.8917, 0.8833, - 0.2989, - 0.2944, - 0.87, - 0.7673 + 0.237, + 0.8667, + 0.85, + 0.2037, + 0.2213, + 0.19, + 0.8083, + 0.8, + 0.1997, + 0.7833, + 0.775, + 0.6583, + 0.0787, + 0.725, + 0.1347, + 0.7083, + 0.7861 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 41, - "steps": 38, - "score": 0.6416, - "total_reward": 20.5321, - "completion_rate": 0.85, + "policy": "random", + "task_type": "task2", + "seed": 21, + "steps": 23, + "score": 0.7485, + "total_reward": 12.7247, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.732, + "trust_calibration": 0.523, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7233,51 +5450,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3411, - 0.9189, - 0.3367, - 0.9144, - 0.9122, - 0.33, - 0.9078, - 0.3256, - 0.3233, - 0.3211, - 0.8989, - 0.8967, - 0.8944, - 0.3122, - 0.89, - 0.3078, - 0.3056, - 0.8833, - 0.3189, - 0.8944, - 0.31, - 0.3056, - 0.8811, - 0.8589, - 0.3097, - 0.8722, - 0.323, - 0.7961 + 0.9417, + 0.9333, + 0.8917, + 0.875, + 0.8917, + 0.2383, + 0.8667, + 0.8583, + 0.8167, + 0.725, + 0.8083, + 0.8, + 0.02, + 0.7833, + 0.7667, + 0.7897 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 42, + "policy": "random", + "task_type": "task2", + "seed": 22, "steps": 31, - "score": 0.6759, - "total_reward": 17.5723, - "completion_rate": 0.85, - "detection_rate": 0.4444, - "trust_calibration": 0.442, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "score": 0.4891, + "total_reward": 10.2706, + "completion_rate": 0.667, + "detection_rate": 1.0, + "trust_calibration": 0.544, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7290,45 +5492,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.3414, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.3056, - 0.5724 + 0.3037, + 0.02, + 0.8917, + 0.8667, + 0.2483, + 0.24, + 0.8667, + 0.18, + 0.8083, + 0.875, + 0.1817, + 0.165, + 0.7583, + 0.13, + 0.1217, + 0.75, + 0.0983, + 0.725, + 0.7167, + 0.6236 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 43, - "steps": 32, - "score": 0.6896, - "total_reward": 17.9304, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.681, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "random", + "task_type": "task2", + "seed": 23, + "steps": 20, + "score": 0.7708, + "total_reward": 13.1031, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.755, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7341,43 +5538,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3456, - 0.9233, - 0.9211, - 0.3719, - 0.3797, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, + 0.8333, 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.3356, - 0.6467 + 0.2787, + 0.9583, + 0.9, + 0.8917, + 0.775, + 0.8667, + 0.85, + 0.8917, + 0.825, + 0.8167, + 0.8083, + 0.8, + 0.7917, + 0.8711 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 44, - "steps": 38, - "score": 0.6652, - "total_reward": 19.2895, - "completion_rate": 0.8, + "policy": "random", + "task_type": "task2", + "seed": 24, + "steps": 31, + "score": 0.5498, + "total_reward": 9.8972, + "completion_rate": 0.667, "detection_rate": 1.0, - "trust_calibration": 0.721, + "trust_calibration": 0.809, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7392,46 +5580,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.8967, - 0.9394, - 0.9372, - 0.36, - 0.3578, - 0.3556, - 0.9283, - 0.3511, - 0.3489, - 0.3167, - 0.8944, - 0.8922, - 0.343, - 0.3508, - 0.9011, - 0.8767, - 0.8722, - 0.2978, - 0.8633, - 0.8589, - 0.2844, + 0.2633, + 0.975, + 0.9667, + 0.243, + 0.9083, + 0.8333, 0.85, - 0.7757 + 0.2297, + 0.02, + 0.7917, + 0.02, + 0.1647, + 0.7417, + 0.09, + 0.7333, + 0.7, + 0.7165 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 45, - "steps": 40, - "score": 0.8827, - "total_reward": 18.5376, + "policy": "random", + "task_type": "task2", + "seed": 25, + "steps": 23, + "score": 0.791, + "total_reward": 13.4466, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.839, + "trust_calibration": 0.623, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7446,39 +5623,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, - 0.8456, - 0.8709 + 0.9, + 0.9667, + 0.9083, + 0.7917, + 0.8833, + 0.875, + 0.7583, + 0.8167, + 0.8333, + 0.187, + 0.8167, + 0.8083, + 0.6917, + 0.8333, + 0.775, + 0.8679 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 46, - "steps": 40, - "score": 0.6245, - "total_reward": 18.7359, - "completion_rate": 0.7, + "policy": "random", + "task_type": "task2", + "seed": 26, + "steps": 29, + "score": 0.5823, + "total_reward": 11.646, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.429, - "adversarial_detections": 3, + "trust_calibration": 0.895, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -7492,49 +5665,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9506, - 0.9483, - 0.9461, - 0.3689, - 0.3667, - 0.3644, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3763, - 0.3841, - 0.3839, - 0.3817, - 0.8944, - 0.3192, - 0.3178, - 0.8856, - 0.9228, - 0.9184, - 0.3422, - 0.3728, - 0.3683, - 0.9006, - 0.8994, - 0.85, - 0.3156, - 0.6634 + 0.9, + 0.9167, + 0.9583, + 0.9, + 0.8917, + 0.02, + 0.2297, + 0.8167, + 0.76, + 0.8083, + 0.7917, + 0.1813, + 0.13, + 0.0953, + 0.75, + 0.7917, + 0.1413, + 0.08, + 0.8333 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 47, + "policy": "random", + "task_type": "task2", + "seed": 27, "steps": 29, - "score": 0.7165, - "total_reward": 17.9132, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.722, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "score": 0.6402, + "total_reward": 12.1644, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.703, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7547,44 +5710,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3303, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6644 + 0.9583, + 0.28, + 0.2847, + 0.9083, + 0.8667, + 0.2483, + 0.8667, + 0.825, + 0.8083, + 0.7917, + 0.825, + 0.02, + 0.75, + 0.8167, + 0.65, + 0.0703, + 0.725, + 0.8094 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 48, - "steps": 28, - "score": 0.7257, - "total_reward": 18.1437, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.708, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "random", + "task_type": "task2", + "seed": 28, + "steps": 23, + "score": 0.7219, + "total_reward": 12.9944, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.671, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7597,44 +5754,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3459, + 0.9083, 0.9167, - 0.9144, - 0.9122, - 0.337, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6798 + 0.9083, + 0.8667, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.258, + 0.02, + 0.165, + 0.8167, + 0.8, + 0.8417, + 0.7833, + 0.6667, + 0.8414 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 49, - "steps": 28, - "score": 0.7355, - "total_reward": 17.6512, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.726, - "adversarial_detections": 5, - "adversarial_poisonings": 5, + "policy": "random", + "task_type": "task2", + "seed": 29, + "steps": 27, + "score": 0.5586, + "total_reward": 10.614, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.478, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7647,41 +5797,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.1019, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.9273, - 0.9228, - 0.9184, - 0.9139, - 0.6844 + 0.9417, + 0.9, + 0.2717, + 0.9083, + 0.02, + 0.8583, + 0.8917, + 0.02, + 0.7417, + 0.8333, + 0.19, + 0.1817, + 0.7667, + 0.7833, + 0.7417, + 0.1133, + 0.0817, + 0.644 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 50, - "steps": 42, - "score": 0.717, - "total_reward": 18.643, - "completion_rate": 0.85, + "task_type": "task2", + "seed": 0, + "steps": 31, + "score": 0.6145, + "total_reward": 12.2902, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.82, + "trust_calibration": 0.72, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7696,45 +5841,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3919, - 0.3997, - 0.3994, - 0.3972, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.32, - 0.8856, - 0.8811, - 0.3067, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.2844, - 0.28, - 0.8456, - 0.8411, - 0.8092 + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.2467, + 0.875, + 0.8583, + 0.8417, + 0.19, + 0.8083, + 0.7917, + 0.775, + 0.7583, + 0.7417, + 0.09, + 0.0733, + 0.7719 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 51, - "steps": 34, - "score": 0.7274, - "total_reward": 19.6391, - "completion_rate": 0.95, - "detection_rate": 0.625, - "trust_calibration": 0.682, - "adversarial_detections": 5, - "adversarial_poisonings": 3, + "task_type": "task2", + "seed": 1, + "steps": 17, + "score": 0.768, + "total_reward": 13.8236, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.282, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7747,44 +5886,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.9167, - 0.3414, - 0.9122, - 0.337, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.3237, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.9073, - 0.3311, - 0.715 + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7053 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 52, - "steps": 46, - "score": 0.5965, - "total_reward": 19.0884, - "completion_rate": 0.7, + "task_type": "task2", + "seed": 2, + "steps": 17, + "score": 0.7237, + "total_reward": 13.0266, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.686, + "trust_calibration": 0.284, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -7799,47 +5929,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3548, - 0.9256, - 0.3433, - 0.9211, - 0.3619, - 0.9144, - 0.34, - 0.3578, - 0.3333, - 0.9011, - 0.8989, - 0.8967, - 0.3774, - 0.8922, - 0.375, - 0.8878, - 0.3706, - 0.3683, - 0.3661, - 0.3639, - 0.2967, - 0.3594, - 0.3572, - 0.355, - 0.8656, - 0.3089, - 0.8722, - 0.8656, - 0.8589, - 0.8522, - 0.7165 + 0.3517, + 0.2883, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.6626 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 53, - "steps": 44, - "score": 0.8286, - "total_reward": 19.0588, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 3, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, @@ -7856,42 +5972,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.3378, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.3067, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9, + 0.8833, + 0.8667, 0.85, - 0.8456, - 0.8411, - 0.8367, - 0.8632 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 54, - "steps": 35, - "score": 0.6607, - "total_reward": 18.4987, - "completion_rate": 0.8, - "detection_rate": 0.75, - "trust_calibration": 0.358, - "adversarial_detections": 6, - "adversarial_poisonings": 2, + "task_type": "task2", + "seed": 4, + "steps": 17, + "score": 0.7999, + "total_reward": 14.3981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.426, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7904,47 +6013,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.9233, - 0.9211, - 0.9189, - 0.3797, - 0.3874, - 0.3952, - 0.36, - 0.9078, - 0.3326, - 0.3233, - 0.9011, - 0.8989, - 0.3237, - 0.8944, - 0.8922, - 0.89, - 0.8878, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.9073, - 0.3661, - 0.8984, - 0.6167 + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.7991 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 55, - "steps": 29, - "score": 0.728, - "total_reward": 18.1995, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.713, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "task_type": "task2", + "seed": 5, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4723, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -7957,42 +6056,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.3756, - 0.9233, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.3348, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6787 + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.7623 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 56, - "steps": 37, - "score": 0.6768, - "total_reward": 20.9793, - "completion_rate": 0.9, + "task_type": "task2", + "seed": 6, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9981, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.796, + "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8007,49 +6097,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3548, - 0.9506, - 0.9483, - 0.9461, - 0.3689, - 0.3667, - 0.3644, - 0.9122, - 0.91, - 0.9078, - 0.3256, - 0.3233, - 0.9011, - 0.3189, - 0.8967, - 0.3144, - 0.8922, - 0.89, - 0.8878, - 0.8856, - 0.3033, - 0.8989, - 0.2967, - 0.8922, - 0.8878, - 0.2856, - 0.8811, - 0.2967, - 0.8722, - 0.8317 + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7481 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 57, - "steps": 38, - "score": 0.6683, - "total_reward": 20.0487, - "completion_rate": 0.85, + "task_type": "task2", + "seed": 7, + "steps": 28, + "score": 0.7465, + "total_reward": 13.4373, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.771, - "adversarial_detections": 1, + "trust_calibration": 0.833, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -8063,49 +6138,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3389, + 0.9417, + 0.9333, + 0.313, 0.9167, - 0.3344, - 0.3322, - 0.33, - 0.9078, - 0.3256, - 0.9033, - 0.3211, - 0.8989, - 0.3167, - 0.3144, - 0.8922, - 0.89, - 0.8878, - 0.3486, - 0.3563, - 0.3819, - 0.9161, - 0.34, + 0.3163, + 0.318, 0.8833, - 0.8589, - 0.8544, + 0.8667, 0.85, - 0.8058 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8983 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 58, - "steps": 29, - "score": 0.7278, - "total_reward": 18.1944, - "completion_rate": 0.9, - "detection_rate": 0.5556, - "trust_calibration": 0.731, - "adversarial_detections": 5, - "adversarial_poisonings": 4, + "task_type": "task2", + "seed": 8, + "steps": 17, + "score": 0.7992, + "total_reward": 14.3856, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.39, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8118,42 +6181,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3481, - 0.9189, - 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.3348, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, + 0.9417, + 0.2953, 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6999 + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7866 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 59, - "steps": 43, - "score": 0.7547, - "total_reward": 19.6215, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 9, + "steps": 28, + "score": 0.7248, + "total_reward": 13.7712, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.735, + "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8168,43 +6224,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3526, - 0.9233, - 0.9211, - 0.3719, - 0.3797, - 0.3644, - 0.91, - 0.9056, - 0.9011, - 0.8967, - 0.8922, - 0.8878, - 0.8833, - 0.8789, - 0.8744, - 0.3, - 0.8656, - 0.2911, - 0.8567, - 0.8522, - 0.8478, - 0.8433, - 0.8389, - 0.8208 + 0.9417, + 0.9333, + 0.925, + 0.3147, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8986 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 60, - "steps": 36, - "score": 0.7716, - "total_reward": 18.5184, - "completion_rate": 0.9, + "task_type": "task2", + "seed": 10, + "steps": 16, + "score": 0.797, + "total_reward": 13.5485, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.836, + "trust_calibration": 0.432, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8219,41 +6268,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3411, - 0.9189, + 0.9417, + 0.9333, + 0.925, 0.9167, - 0.3344, - 0.3322, - 0.33, - 0.3278, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.8439 + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.212, + 0.2037, + 0.8333, + 0.825, + 0.7578 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 61, - "steps": 42, - "score": 0.7616, - "total_reward": 19.8021, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 11, + "steps": 30, + "score": 0.6763, + "total_reward": 12.8505, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.836, + "trust_calibration": 0.825, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8268,43 +6310,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.9211, - 0.9189, - 0.3997, - 0.3994, - 0.3972, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.28, - 0.8456, - 0.2711, - 0.8483 + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.0817, + 0.8522 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 62, - "steps": 43, - "score": 0.7892, - "total_reward": 18.9402, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.925, + "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8319,41 +6354,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3548, - 0.9506, - 0.3733, - 0.9189, - 0.9144, - 0.91, - 0.9056, - 0.9011, - 0.8967, - 0.3222, - 0.3178, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.8789, - 0.8744, - 0.87, - 0.8656, - 0.8611, - 0.8567, - 0.8522, - 0.8478, - 0.8433, - 0.8389, - 0.8682 + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 63, - "steps": 44, - "score": 0.8031, - "total_reward": 18.4705, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.7935, + "total_reward": 13.4903, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.841, + "trust_calibration": 0.839, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8368,40 +6395,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.3467, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9417, + 0.2883, + 0.9167, + 0.9, + 0.8833, + 0.8667, 0.85, - 0.2756, - 0.2711, - 0.8367, - 0.8449 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9436 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 64, - "steps": 37, - "score": 0.7634, - "total_reward": 19.0848, - "completion_rate": 0.9, + "task_type": "task2", + "seed": 14, + "steps": 18, + "score": 0.7309, + "total_reward": 13.8869, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.76, + "trust_calibration": 0.264, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8416,42 +6437,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3841, - 0.9189, - 0.3997, - 0.3994, - 0.3972, - 0.33, - 0.9328, - 0.3556, - 0.9011, - 0.8967, - 0.8922, - 0.8878, + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.2733, + 0.9, + 0.8917, 0.8833, - 0.8789, - 0.8744, - 0.87, - 0.8656, - 0.8611, - 0.8567, - 0.8522, - 0.8227 - ] - }, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.187, + 0.8167, + 0.8083, + 0.6989 + ] + }, { "policy": "heuristic", - "task_type": "task3", - "seed": 65, - "steps": 40, - "score": 0.797, - "total_reward": 19.1287, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 15, + "steps": 18, + "score": 0.7649, + "total_reward": 14.5326, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.838, + "trust_calibration": 0.33, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8466,43 +6481,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.9211, - 0.3919, - 0.3997, - 0.9122, - 0.9078, - 0.3333, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.3097, + 0.9333, + 0.925, + 0.9167, + 0.2703, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.2287, + 0.8583, 0.85, - 0.8456, - 0.853 + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.8083, + 0.7656 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 66, - "steps": 29, - "score": 0.7165, - "total_reward": 17.9135, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.723, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "task_type": "task2", + "seed": 16, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8515,44 +6525,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.337, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6647 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 67, - "steps": 32, - "score": 0.6707, - "total_reward": 18.1095, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.44, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "task_type": "task2", + "seed": 17, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8565,44 +6566,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3503, - 0.9211, - 0.3459, + 0.9333, 0.9167, - 0.9144, - 0.3392, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.9095, - 0.3333, - 0.5864 + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 68, - "steps": 42, - "score": 0.7575, - "total_reward": 19.6962, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 18, + "steps": 15, + "score": 0.8749, + "total_reward": 13.998, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.641, + "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8617,45 +6607,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9256, - 0.9233, - 0.3741, - 0.9189, - 0.3897, - 0.3974, - 0.3622, - 0.9078, - 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, 0.85, - 0.8456, - 0.8411, - 0.7994 + 0.8417, + 0.8333, + 0.748 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 69, - "steps": 30, - "score": 0.6917, - "total_reward": 17.9836, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.658, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "task_type": "task2", + "seed": 19, + "steps": 17, + "score": 0.7966, + "total_reward": 14.3395, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.279, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8668,43 +6648,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9506, - 0.9483, - 0.9461, - 0.3689, - 0.3667, - 0.3644, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.9095, - 0.6454 + 0.2967, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.262, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7475 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 70, - "steps": 27, - "score": 0.7, - "total_reward": 19.5996, - "completion_rate": 0.85, + "task_type": "task2", + "seed": 20, + "steps": 30, + "score": 0.6558, + "total_reward": 11.8048, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.716, + "trust_calibration": 0.82, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8719,47 +6691,37 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3919, - 0.9167, - 0.3994, - 0.3972, - 0.395, - 0.3928, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.3144, - 0.8922, - 0.89, - 0.8878, - 0.8856, - 0.3033, - 0.8811, - 0.8789, - 0.2967, - 0.8744, - 0.2922, - 0.8166 + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.9, + 0.8833, + 0.2317, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.0983, + 0.7167, + 0.8071 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 71, - "steps": 29, - "score": 0.6808, - "total_reward": 17.7021, - "completion_rate": 0.85, - "detection_rate": 0.375, - "trust_calibration": 0.687, - "adversarial_detections": 3, - "adversarial_poisonings": 5, + "task_type": "task2", + "seed": 21, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8772,45 +6734,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.3841, - 0.3919, - 0.3997, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.6174 + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 72, - "steps": 34, - "score": 0.6937, - "total_reward": 19.4234, - "completion_rate": 0.9, - "detection_rate": 0.5714, - "trust_calibration": 0.723, - "adversarial_detections": 4, - "adversarial_poisonings": 3, + "task_type": "task2", + "seed": 22, + "steps": 22, + "score": 0.6604, + "total_reward": 15.1886, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.471, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8823,47 +6775,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3763, - 0.9211, - 0.3919, - 0.3997, - 0.3644, - 0.9122, - 0.91, - 0.3348, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.9095, + 0.3517, + 0.9333, + 0.925, + 0.2787, + 0.9083, + 0.9, + 0.8917, 0.8833, - 0.2811, - 0.6916 + 0.237, + 0.8667, + 0.2203, + 0.85, + 0.8417, + 0.1953, + 0.825, + 0.1787, + 0.8083, + 0.8, + 0.1537, + 0.7833, + 0.775, + 0.8149 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 73, - "steps": 26, - "score": 0.6967, - "total_reward": 16.7204, - "completion_rate": 0.85, - "detection_rate": 0.3333, - "trust_calibration": 0.697, - "adversarial_detections": 3, - "adversarial_poisonings": 6, + "task_type": "task2", + "seed": 23, + "steps": 16, + "score": 0.793, + "total_reward": 13.4804, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.212, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -8876,41 +6823,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9417, + 0.3113, + 0.28, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.614 + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.6808 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 74, - "steps": 28, - "score": 0.675, - "total_reward": 18.8991, - "completion_rate": 0.8, + "task_type": "task2", + "seed": 24, + "steps": 30, + "score": 0.6768, + "total_reward": 12.8598, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.806, + "trust_calibration": 0.824, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8925,45 +6865,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.3414, - 0.9122, - 0.91, - 0.9078, - 0.3256, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.8922, - 0.31, - 0.8878, - 0.3056, + 0.9417, + 0.3113, + 0.975, + 0.3267, + 0.9583, + 0.265, 0.8833, - 0.8811, - 0.2989, - 0.2967, - 0.8744, - 0.2922, - 0.8193 + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.7333, + 0.7167, + 0.8518 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 75, - "steps": 40, - "score": 0.7977, - "total_reward": 19.1445, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 25, + "steps": 21, + "score": 0.5985, + "total_reward": 13.1666, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.837, + "trust_calibration": 0.709, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -8978,41 +6909,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.2383, + 0.875, + 0.8667, + 0.8583, 0.85, - 0.2756, - 0.8529 + 0.8417, + 0.1883, + 0.18, + 0.8167, + 0.1633, + 0.155, + 0.1467, + 0.7833, + 0.7683 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 76, - "steps": 42, - "score": 0.8335, - "total_reward": 19.1712, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 26, + "steps": 22, + "score": 0.5962, + "total_reward": 13.1159, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.842, + "trust_calibration": 0.324, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9027,42 +6956,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3686, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.2844, - 0.85, - 0.8456, - 0.8411, - 0.8671 + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.8683, + 0.265, + 0.2567, + 0.9333, + 0.925, + 0.2317, + 0.2233, + 0.26, + 0.1983, + 0.825, + 0.8167, + 0.8083, + 0.162, + 0.7917, + 0.1453, + 0.775, + 0.6336 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 77, - "steps": 29, - "score": 0.6992, - "total_reward": 18.1794, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.645, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "task_type": "task2", + "seed": 27, + "steps": 30, + "score": 0.6573, + "total_reward": 13.146, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.829, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9075,45 +7003,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9417, + 0.9333, + 0.313, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.3259, - 0.8967, - 0.8944, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.6451 + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8533 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 78, - "steps": 28, - "score": 0.7161, - "total_reward": 17.9032, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.709, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "task_type": "task2", + "seed": 28, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9979, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9126,42 +7048,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3503, - 0.9211, - 0.9189, + 0.9417, + 0.9333, + 0.925, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6633 + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7479 ] }, { "policy": "heuristic", - "task_type": "task3", - "seed": 79, - "steps": 44, - "score": 0.7563, - "total_reward": 19.6626, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 29, + "steps": 17, + "score": 0.7998, + "total_reward": 14.3965, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.829, + "trust_calibration": 0.421, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9176,43 +7089,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9122, - 0.9078, - 0.3333, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.2933, - 0.8589, - 0.2844, + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.2453, + 0.875, + 0.8667, + 0.8583, 0.85, - 0.8456, - 0.8411, - 0.8367, - 0.8421 + 0.2037, + 0.8333, + 0.825, + 0.8167, + 0.7975 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 80, - "steps": 46, - "score": 0.7065, - "total_reward": 16.9564, - "completion_rate": 0.8, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 0, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.803, + "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9227,39 +7132,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.3422, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.3111, - 0.3067, - 0.8722, - 0.8678, - 0.2933, - 0.8589, - 0.8544, + 0.9, + 0.8833, + 0.8667, 0.85, - 0.2756, - 0.8411, - 0.8367, - 0.8322, - 0.7808 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 81, - "steps": 44, - "score": 0.8031, - "total_reward": 18.4703, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 1, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, @@ -9276,40 +7173,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9, + 0.8833, + 0.8667, 0.85, - 0.2756, - 0.2711, - 0.8367, - 0.8447 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 82, - "steps": 44, - "score": 0.8286, - "total_reward": 19.0588, - "completion_rate": 1.0, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 2, + "steps": 30, + "score": 0.7399, + "total_reward": 11.8385, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.844, + "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9324,40 +7214,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.3422, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.32, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.2983, + 0.2817, + 0.9, + 0.8833, + 0.8667, 0.85, - 0.8456, - 0.8411, - 0.8367, - 0.8632 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8585 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 83, - "steps": 33, - "score": 0.7082, - "total_reward": 18.4133, - "completion_rate": 0.8, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 3, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.826, + "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9372,45 +7255,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3456, - 0.9483, - 0.9461, - 0.8989, - 0.3667, - 0.3644, - 0.9372, - 0.935, - 0.8878, - 0.9306, - 0.3533, - 0.3511, - 0.3489, - 0.3467, - 0.9194, - 0.3422, - 0.34, - 0.9033, - 0.8789, - 0.8744, - 0.87, - 0.8656, - 0.8611, - 0.8133 + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 84, - "steps": 29, - "score": 0.71, - "total_reward": 18.4609, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.716, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 4, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9423,45 +7296,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.3686, - 0.9233, - 0.9211, - 0.3459, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.3214, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.6795 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9021 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 85, - "steps": 29, - "score": 0.6923, - "total_reward": 17.999, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.718, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 5, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4723, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9474,45 +7337,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9144, - 0.3392, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.6627 + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.7623 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 86, - "steps": 33, - "score": 0.6856, - "total_reward": 18.5122, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.605, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 6, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9525,46 +7378,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9256, - 0.3663, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.9095, - 0.3333, - 0.6263 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 87, - "steps": 27, - "score": 0.6991, - "total_reward": 16.7786, - "completion_rate": 0.85, - "detection_rate": 0.4, - "trust_calibration": 0.727, - "adversarial_detections": 4, - "adversarial_poisonings": 6, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 7, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1942, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9577,41 +7419,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.1019, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.6393 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9442 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 88, - "steps": 42, - "score": 0.7762, - "total_reward": 19.4062, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 8, + "steps": 30, + "score": 0.7399, + "total_reward": 11.8383, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.835, + "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9626,44 +7460,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9122, - 0.9078, - 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.28, - 0.8456, - 0.8411, - 0.8479 + 0.9333, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8583 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 89, - "steps": 29, - "score": 0.7259, - "total_reward": 18.1473, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.696, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 9, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9676,44 +7501,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.3503, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.3392, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6745 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 90, - "steps": 28, - "score": 0.7161, - "total_reward": 17.9035, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.71, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 10, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8373, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9726,44 +7542,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.3392, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.0886, - 0.9228, - 0.9184, - 0.9139, - 0.6635 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.0983, + 0.7167, + 0.8573 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 91, - "steps": 29, - "score": 0.7262, - "total_reward": 18.1541, - "completion_rate": 0.9, - "detection_rate": 0.5, - "trust_calibration": 0.723, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 11, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9776,42 +7583,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3459, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.3259, - 0.8967, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6813 + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 92, - "steps": 38, - "score": 0.7692, - "total_reward": 18.4611, - "completion_rate": 0.9, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.842, + "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9826,41 +7624,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3389, - 0.3367, - 0.3344, - 0.3322, - 0.9078, - 0.9033, - 0.8989, - 0.3244, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, 0.85, - 0.8411 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 93, - "steps": 45, - "score": 0.6795, - "total_reward": 19.7049, - "completion_rate": 0.85, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.749, + "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9875,48 +7665,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3478, - 0.9256, - 0.9233, - 0.9211, - 0.9189, - 0.3897, - 0.9144, - 0.3972, - 0.395, - 0.9078, - 0.3906, - 0.3883, - 0.3861, - 0.8967, - 0.3222, - 0.8878, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.8789, - 0.8744, - 0.3, - 0.2956, - 0.8789, - 0.8722, - 0.8478, - 0.8433, - 0.8389, - 0.2644, - 0.7847 + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 94, - "steps": 32, - "score": 0.6725, - "total_reward": 18.1568, - "completion_rate": 0.8, - "detection_rate": 0.3333, - "trust_calibration": 0.85, - "adversarial_detections": 1, - "adversarial_poisonings": 2, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 14, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8376, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.841, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -9929,44 +7706,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.8989, - 0.3667, - 0.3644, - 0.9372, - 0.935, - 0.3578, - 0.8856, - 0.9283, - 0.3511, - 0.3489, - 0.3467, - 0.3444, - 0.8922, - 0.89, - 0.8878, - 0.0886, - 0.0863, - 0.9206, - 0.8922, - 0.8678, - 0.8633, - 0.6213 + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8576 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 95, - "steps": 40, - "score": 0.7977, - "total_reward": 19.1447, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 15, + "steps": 30, + "score": 0.6973, + "total_reward": 11.1569, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.838, + "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -9981,43 +7747,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.3733, - 0.9461, - 0.3689, - 0.3667, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.2983, + 0.9167, + 0.9, + 0.8833, + 0.2317, 0.85, - 0.2756, - 0.853 + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8119 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 96, - "steps": 31, - "score": 0.6978, - "total_reward": 17.4444, - "completion_rate": 0.85, - "detection_rate": 0.5, - "trust_calibration": 0.726, - "adversarial_detections": 4, - "adversarial_poisonings": 4, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 16, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -10030,42 +7788,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.3511, - 0.9189, + 0.9333, 0.9167, - 0.3344, - 0.3322, - 0.91, - 0.9078, - 0.9056, - 0.9033, - 0.9011, - 0.8989, - 0.8967, - 0.8944, - 0.8922, - 0.093, - 0.0908, - 0.0886, - 0.0863, - 0.9206, - 0.9161, - 0.9117, - 0.9073, - 0.66 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 97, - "steps": 38, - "score": 0.8207, - "total_reward": 18.8758, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 17, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.829, + "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -10080,42 +7829,35 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9528, - 0.9506, - 0.9483, - 0.3711, - 0.3689, - 0.3667, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, 0.85, - 0.8552 + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 98, - "steps": 29, - "score": 0.7165, - "total_reward": 17.9132, - "completion_rate": 0.9, - "detection_rate": 0.4444, - "trust_calibration": 0.722, - "adversarial_detections": 4, - "adversarial_poisonings": 5, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 18, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -10128,42 +7870,33 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.9189, + 0.9333, 0.9167, - 0.9144, - 0.9122, - 0.91, - 0.9078, - 0.9056, - 0.3303, - 0.9011, - 0.8989, - 0.0997, - 0.0974, - 0.0952, - 0.093, - 0.0908, - 0.925, - 0.9206, - 0.9161, - 0.9117, - 0.6644 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { - "policy": "heuristic", - "task_type": "task3", - "seed": 99, - "steps": 38, - "score": 0.7037, - "total_reward": 21.1121, - "completion_rate": 0.95, + "policy": "oracle_lite", + "task_type": "task2", + "seed": 19, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, "detection_rate": 1.0, - "trust_calibration": 0.723, + "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", @@ -10178,48 +7911,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9278, - 0.9256, - 0.9233, - 0.9211, - 0.3389, + 0.9333, 0.9167, - 0.9144, - 0.3322, - 0.91, - 0.3278, - 0.9056, - 0.9033, - 0.3211, - 0.8989, - 0.3167, - 0.8944, - 0.8922, - 0.31, - 0.8878, - 0.3056, - 0.3211, - 0.8967, - 0.3122, - 0.8878, - 0.3033, - 0.8789, - 0.8744, - 0.87, - 0.8287 + 0.9, + 0.8833, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 0, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 20, + "steps": 30, + "score": 0.6117, + "total_reward": 9.7864, + "completion_rate": 0.667, "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, + "trust_calibration": 0.795, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10233,40 +7952,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.2983, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8904 + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.7333, + 0.7167, + 0.7114 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 1, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, + "task_type": "task2", + "seed": 21, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.84, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10280,39 +7993,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 2, - "steps": 42, - "score": 0.8422, - "total_reward": 18.5276, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 22, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4728, + "completion_rate": 0.733, "detection_rate": 1.0, - "trust_calibration": 0.933, - "adversarial_detections": 7, + "trust_calibration": 0.817, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10326,40 +8034,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.3511, + 0.2983, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8724 + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.0817, + 0.7628 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 3, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 23, + "steps": 30, + "score": 0.7399, + "total_reward": 11.8385, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.843, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10373,40 +8075,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.2983, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8585 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 4, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1154, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 24, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8376, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.841, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10420,40 +8116,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8576 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 5, - "steps": 46, - "score": 0.7916, - "total_reward": 18.9976, - "completion_rate": 0.95, + "task_type": "task2", + "seed": 25, + "steps": 30, + "score": 0.7399, + "total_reward": 11.838, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.917, - "adversarial_detections": 6, + "trust_calibration": 0.842, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10467,42 +8157,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.3422, - 0.9078, - 0.3333, - 0.8989, - 0.8944, - 0.32, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8618 + 0.9333, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.1817, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.858 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 6, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8523, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 26, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8373, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.84, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10516,39 +8198,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.0983, + 0.7167, + 0.8573 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 7, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 27, + "steps": 30, + "score": 0.6972, + "total_reward": 11.156, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, + "trust_calibration": 0.831, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10562,39 +8239,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.0817, + 0.811 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 8, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3315, + "task_type": "task2", + "seed": 28, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 6, + "trust_calibration": 0.84, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10608,41 +8280,34 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.3511, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8857 + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 ] }, { "policy": "oracle_lite", - "task_type": "task3", - "seed": 9, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, - "completion_rate": 1.0, + "task_type": "task2", + "seed": 29, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8373, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, + "trust_calibration": 0.84, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10656,39 +8321,122 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9333, 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.1817, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.0983, + 0.7167, + 0.8573 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 0, + "steps": 31, + "score": 0.6145, + "total_reward": 12.2902, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.72, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.2467, + 0.875, + 0.8583, + 0.8417, + 0.19, + 0.8083, + 0.7917, + 0.775, + 0.7583, + 0.7417, + 0.09, + 0.0733, + 0.7719 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 1, + "steps": 17, + "score": 0.768, + "total_reward": 13.8236, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.282, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, 0.8917, - 0.8873, - 0.894 + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7053 ] }, { - "policy": "oracle_lite", - "task_type": "task3", - "seed": 10, - "steps": 42, - "score": 0.8421, - "total_reward": 18.5263, - "completion_rate": 0.95, + "policy": "trained", + "task_type": "task2", + "seed": 2, + "steps": 17, + "score": 0.7237, + "total_reward": 13.0266, + "completion_rate": 0.867, "detection_rate": 1.0, - "trust_calibration": 0.928, - "adversarial_detections": 7, + "trust_calibration": 0.284, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10702,40 +8450,2714 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.3517, + 0.2883, + 0.925, 0.9167, - 0.9122, - 0.9078, - 0.9033, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.6626 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 3, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 4, + "steps": 17, + "score": 0.7999, + "total_reward": 14.3981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.426, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.7991 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 5, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4723, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.7623 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 6, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7481 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 7, + "steps": 28, + "score": 0.7465, + "total_reward": 13.4373, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.833, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.3163, + 0.318, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8983 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 8, + "steps": 17, + "score": 0.7992, + "total_reward": 14.3856, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.39, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.2953, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7866 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 9, + "steps": 28, + "score": 0.7248, + "total_reward": 13.7712, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.834, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.3147, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8986 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 10, + "steps": 16, + "score": 0.797, + "total_reward": 13.5485, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.432, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.212, + 0.2037, + 0.8333, + 0.825, + 0.7578 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 11, + "steps": 30, + "score": 0.6763, + "total_reward": 12.8505, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.825, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.0817, + 0.8522 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.7935, + "total_reward": 13.4903, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.839, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.2883, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9436 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 14, + "steps": 18, + "score": 0.7309, + "total_reward": 13.8869, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.264, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.2733, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.187, + 0.8167, + 0.8083, + 0.6989 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 15, + "steps": 18, + "score": 0.7649, + "total_reward": 14.5326, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.33, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3097, + 0.9333, + 0.925, + 0.9167, + 0.2703, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.2287, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.8083, + 0.7656 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 16, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 17, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 18, + "steps": 15, + "score": 0.8749, + "total_reward": 13.998, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.748 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 19, + "steps": 17, + "score": 0.7966, + "total_reward": 14.3395, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.279, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2967, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.262, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7475 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 20, + "steps": 30, + "score": 0.6558, + "total_reward": 11.8048, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.82, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.9, + 0.8833, + 0.2317, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.0983, + 0.7167, + 0.8071 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 21, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 22, + "steps": 22, + "score": 0.6604, + "total_reward": 15.1886, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.471, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3517, + 0.9333, + 0.925, + 0.2787, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.237, + 0.8667, + 0.2203, + 0.85, + 0.8417, + 0.1953, + 0.825, + 0.1787, + 0.8083, + 0.8, + 0.1537, + 0.7833, + 0.775, + 0.8149 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 23, + "steps": 16, + "score": 0.793, + "total_reward": 13.4804, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.212, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.3113, + 0.28, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.6808 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 24, + "steps": 30, + "score": 0.6768, + "total_reward": 12.8598, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.824, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.3113, + 0.975, + 0.3267, + 0.9583, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.7333, + 0.7167, + 0.8518 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 25, + "steps": 21, + "score": 0.5985, + "total_reward": 13.1666, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.2383, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.1883, + 0.18, + 0.8167, + 0.1633, + 0.155, + 0.1467, + 0.7833, + 0.7683 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 26, + "steps": 22, + "score": 0.5962, + "total_reward": 13.1159, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.324, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.8683, + 0.265, + 0.2567, + 0.9333, + 0.925, + 0.2317, + 0.2233, + 0.26, + 0.1983, + 0.825, + 0.8167, + 0.8083, + 0.162, + 0.7917, + 0.1453, + 0.775, + 0.6336 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 27, + "steps": 30, + "score": 0.6573, + "total_reward": 13.146, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.829, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8533 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 28, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9979, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7479 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 29, + "steps": 17, + "score": 0.7998, + "total_reward": 14.3965, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.421, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.2453, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.2037, + 0.8333, + 0.825, + 0.8167, + 0.7975 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 0, + "steps": 36, + "score": 0.6105, + "total_reward": 15.2622, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.884, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8956, + 0.9233, + 0.9211, + 0.02, + 0.9144, + 0.9122, + 0.9078, + 0.3556, + 0.02, + 0.02, + 0.8467, + 0.8922, + 0.8378, + 0.8833, + 0.9061, + 0.3067, + 0.02, + 0.8722, + 0.073, + 0.8306, + 0.9061, + 0.3397, + 0.3044, + 0.5035 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 1, + "steps": 40, + "score": 0.7205, + "total_reward": 18.0135, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.662, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9506, + 0.8889, + 0.8822, + 0.91, + 0.8556, + 0.9011, + 0.3167, + 0.8622, + 0.89, + 0.8356, + 0.8633, + 0.3011, + 0.8444, + 0.8378, + 0.8656, + 0.8261, + 0.8589, + 0.8961, + 0.02, + 0.3, + 0.8106, + 0.2933, + 0.7916 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 2, + "steps": 37, + "score": 0.7627, + "total_reward": 17.5411, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.752, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3778, + 0.3203, + 0.9211, + 0.9189, + 0.8822, + 0.91, + 0.8556, + 0.8511, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8311, + 0.8767, + 0.2944, + 0.8972, + 0.87, + 0.8656, + 0.8789, + 0.02, + 0.8722, + 0.8207 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 3, + "steps": 36, + "score": 0.6303, + "total_reward": 16.3887, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.798, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9233, + 0.3411, + 0.8667, + 0.9144, + 0.9372, + 0.91, + 0.3278, + 0.3556, + 0.8511, + 0.8989, + 0.2922, + 0.8878, + 0.3056, + 0.8489, + 0.8767, + 0.8922, + 0.333, + 0.2878, + 0.8283, + 0.8589, + 0.3297, + 0.3552, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 4, + "steps": 41, + "score": 0.6894, + "total_reward": 18.6138, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.405, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9506, + 0.9233, + 0.3711, + 0.8844, + 0.9122, + 0.33, + 0.3186, + 0.9011, + 0.8667, + 0.86, + 0.8878, + 0.8833, + 0.8789, + 0.8767, + 0.8994, + 0.3352, + 0.8678, + 0.3033, + 0.8239, + 0.8744, + 0.8678, + 0.2656, + 0.2933, + 0.2911, + 0.7076 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 5, + "steps": 31, + "score": 0.6062, + "total_reward": 15.1538, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8933, + 0.9211, + 0.3367, + 0.3344, + 0.3322, + 0.8578, + 0.9306, + 0.9033, + 0.3211, + 0.8467, + 0.9194, + 0.32, + 0.8878, + 0.8856, + 0.8811, + 0.02, + 0.8394, + 0.0752, + 0.87, + 0.8678, + 0.02, + 0.8883, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 6, + "steps": 39, + "score": 0.6337, + "total_reward": 15.8429, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.872, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8956, + 0.8889, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9006, + 0.8511, + 0.3197, + 0.9194, + 0.28, + 0.8556, + 0.8811, + 0.8789, + 0.8422, + 0.8856, + 0.3311, + 0.8589, + 0.0597, + 0.3222, + 0.27, + 0.8728, + 0.02, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 7, + "steps": 32, + "score": 0.7179, + "total_reward": 15.793, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.869, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8756, + 0.8911, + 0.8867, + 0.9144, + 0.86, + 0.9056, + 0.9033, + 0.8989, + 0.8967, + 0.3444, + 0.89, + 0.8356, + 0.9083, + 0.02, + 0.8789, + 0.8744, + 0.87, + 0.8928, + 0.8633, + 0.3111, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 8, + "steps": 31, + "score": 0.7087, + "total_reward": 16.3004, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.701, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3733, + 0.9211, + 0.8844, + 0.9122, + 0.91, + 0.3578, + 0.9056, + 0.9283, + 0.8667, + 0.8944, + 0.84, + 0.3456, + 0.8833, + 0.8811, + 0.8589, + 0.8767, + 0.0774, + 0.835, + 0.8856, + 0.8633, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 9, + "steps": 37, + "score": 0.7151, + "total_reward": 17.877, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.558, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9256, + 0.9233, + 0.8689, + 0.3597, + 0.8822, + 0.935, + 0.02, + 0.3556, + 0.9011, + 0.8967, + 0.8944, + 0.84, + 0.8356, + 0.8489, + 0.8244, + 0.835, + 0.3178, + 0.8656, + 0.8261, + 0.8217, + 0.3044, + 0.85, + 0.7724 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 10, + "steps": 38, + "score": 0.6037, + "total_reward": 17.5072, + "completion_rate": 0.75, + "detection_rate": 1.0, + "trust_calibration": 0.772, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9233, + 0.9461, + 0.8844, + 0.3622, + 0.9078, + 0.02, + 0.3233, + 0.9261, + 0.8967, + 0.8944, + 0.86, + 0.3378, + 0.02, + 0.3263, + 0.8811, + 0.3289, + 0.2967, + 0.8994, + 0.8722, + 0.8678, + 0.3386, + 0.3463, + 0.02, + 0.3089, + 0.8544, + 0.355, + 0.7709 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 11, + "steps": 32, + "score": 0.5881, + "total_reward": 14.703, + "completion_rate": 0.7, + "detection_rate": 0.3333, + "trust_calibration": 0.743, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8756, + 0.9233, + 0.9211, + 0.3459, + 0.8844, + 0.3622, + 0.9078, + 0.9306, + 0.02, + 0.8689, + 0.8967, + 0.8944, + 0.8722, + 0.8878, + 0.8856, + 0.9228, + 0.0819, + 0.2967, + 0.3244, + 0.8722, + 0.02, + 0.3356, + 0.3011, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 12, + "steps": 36, + "score": 0.6249, + "total_reward": 14.9974, + "completion_rate": 0.75, + "detection_rate": 0.3333, + "trust_calibration": 0.716, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.8933, + 0.3411, + 0.9189, + 0.8644, + 0.9122, + 0.8756, + 0.02, + 0.3441, + 0.3189, + 0.3167, + 0.8622, + 0.8878, + 0.8511, + 0.8789, + 0.8244, + 0.8878, + 0.2856, + 0.0663, + 0.8589, + 0.8961, + 0.8772, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 13, + "steps": 38, + "score": 0.7872, + "total_reward": 18.1053, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.64, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8911, + 0.8667, + 0.9122, + 0.3278, + 0.9056, + 0.8689, + 0.02, + 0.8622, + 0.8378, + 0.8533, + 0.8289, + 0.8767, + 0.8722, + 0.895, + 0.8856, + 0.8633, + 0.8611, + 0.9017, + 0.8939, + 0.3, + 0.8081 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 14, + "steps": 35, + "score": 0.6988, + "total_reward": 18.1679, + "completion_rate": 0.95, + "detection_rate": 0.0, + "trust_calibration": 0.661, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8733, + 0.8889, + 0.8844, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.8989, + 0.8622, + 0.89, + 0.3408, + 0.8856, + 0.8833, + 0.3011, + 0.8267, + 0.8372, + 0.87, + 0.0708, + 0.8833, + 0.2811, + 0.8839, + 0.8544, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 15, + "steps": 38, + "score": 0.68, + "total_reward": 19.0388, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.774, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.9256, + 0.9233, + 0.8689, + 0.3667, + 0.3644, + 0.8922, + 0.935, + 0.8556, + 0.3533, + 0.3211, + 0.3189, + 0.8944, + 0.89, + 0.3378, + 0.8333, + 0.8789, + 0.8767, + 0.8744, + 0.2922, + 0.333, + 0.8656, + 0.8261, + 0.8567, + 0.3, + 0.8478, + 0.8066 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 16, + "steps": 33, + "score": 0.5966, + "total_reward": 15.511, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.691, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3044, + 0.02, + 0.935, + 0.02, + 0.3256, + 0.8711, + 0.8967, + 0.3444, + 0.02, + 0.89, + 0.8556, + 0.8833, + 0.3311, + 0.3289, + 0.8744, + 0.8878, + 0.8633, + 0.3141, + 0.8589, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 17, + "steps": 35, + "score": 0.5891, + "total_reward": 16.4939, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.795, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.02, + 0.3433, + 0.3319, + 0.8644, + 0.3622, + 0.91, + 0.3286, + 0.9033, + 0.9011, + 0.8789, + 0.3697, + 0.9194, + 0.84, + 0.8856, + 0.8811, + 0.8789, + 0.3597, + 0.2692, + 0.87, + 0.2878, + 0.8656, + 0.0663, + 0.8239, + 0.8817, + 0.02, + 0.4835 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 18, + "steps": 35, + "score": 0.6548, + "total_reward": 16.3705, + "completion_rate": 0.75, + "detection_rate": 1.0, + "trust_calibration": 0.573, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9233, + 0.02, + 0.9167, + 0.3644, + 0.3622, + 0.91, + 0.8556, + 0.9033, + 0.8689, + 0.8967, + 0.8922, + 0.915, + 0.02, + 0.8533, + 0.8789, + 0.2967, + 0.3422, + 0.3078, + 0.8656, + 0.8611, + 0.2789, + 0.3297, + 0.7281 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 19, + "steps": 38, + "score": 0.6912, + "total_reward": 17.2799, + "completion_rate": 0.9, + "detection_rate": 0.0, + "trust_calibration": 0.834, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3156, + 0.9211, + 0.9189, + 0.9144, + 0.9122, + 0.33, + 0.02, + 0.9133, + 0.9261, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8511, + 0.8789, + 0.8922, + 0.87, + 0.3178, + 0.8811, + 0.8589, + 0.8544, + 0.87, + 0.3108, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 20, + "steps": 44, + "score": 0.6149, + "total_reward": 19.0606, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.859, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.8711, + 0.8667, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.9033, + 0.3281, + 0.3519, + 0.3467, + 0.8922, + 0.89, + 0.3608, + 0.8856, + 0.8833, + 0.8289, + 0.2714, + 0.87, + 0.3508, + 0.8656, + 0.3089, + 0.0597, + 0.8172, + 0.053, + 0.8728, + 0.8083, + 0.3439, + 0.2567, + 0.5146 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 21, + "steps": 30, + "score": 0.7401, + "total_reward": 15.543, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.8911, + 0.8867, + 0.9144, + 0.3322, + 0.9078, + 0.9056, + 0.8711, + 0.8467, + 0.8922, + 0.89, + 0.02, + 0.8856, + 0.8811, + 0.8589, + 0.8744, + 0.87, + 0.8856, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 22, + "steps": 45, + "score": 0.5529, + "total_reward": 16.5871, + "completion_rate": 0.7, + "detection_rate": 0.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.02, + 0.9311, + 0.8844, + 0.3622, + 0.36, + 0.9078, + 0.2933, + 0.8689, + 0.9217, + 0.3444, + 0.32, + 0.8556, + 0.3033, + 0.3011, + 0.8767, + 0.3022, + 0.87, + 0.8678, + 0.8811, + 0.0619, + 0.8544, + 0.3022, + 0.8478, + 0.8083, + 0.3119, + 0.0397, + 0.8322, + 0.4222 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 23, + "steps": 31, + "score": 0.7921, + "total_reward": 18.2193, + "completion_rate": 0.95, + "detection_rate": 0.5, + "trust_calibration": 0.847, + "adversarial_detections": 1, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8756, + 0.9233, + 0.3481, + 0.9439, + 0.9167, + 0.9144, + 0.86, + 0.9078, + 0.9033, + 0.9261, + 0.8967, + 0.8944, + 0.8922, + 0.89, + 0.8878, + 0.02, + 0.9228, + 0.0819, + 0.8744, + 0.87, + 0.8833, + 0.7254 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 24, + "steps": 45, + "score": 0.6293, + "total_reward": 16.3622, + "completion_rate": 0.75, + "detection_rate": 1.0, + "trust_calibration": 0.813, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3156, + 0.9483, + 0.9461, + 0.3197, + 0.9072, + 0.8756, + 0.9033, + 0.3541, + 0.02, + 0.9044, + 0.02, + 0.3608, + 0.8511, + 0.2967, + 0.8722, + 0.8356, + 0.8111, + 0.2867, + 0.8544, + 0.845, + 0.3156, + 0.8061, + 0.8367, + 0.7972, + 0.7658 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 25, + "steps": 34, + "score": 0.6745, + "total_reward": 16.8613, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.809, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8933, + 0.9461, + 0.9189, + 0.8644, + 0.9122, + 0.91, + 0.8556, + 0.8711, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.8378, + 0.3356, + 0.8833, + 0.8489, + 0.0797, + 0.3244, + 0.3222, + 0.8878, + 0.8906, + 0.9061, + 0.2967, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 26, + "steps": 43, + "score": 0.5813, + "total_reward": 17.4397, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.815, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8933, + 0.9211, + 0.9439, + 0.9167, + 0.9144, + 0.02, + 0.3308, + 0.8711, + 0.8467, + 0.8922, + 0.8878, + 0.3486, + 0.3033, + 0.2759, + 0.8767, + 0.3244, + 0.3452, + 0.29, + 0.8156, + 0.8633, + 0.2889, + 0.0597, + 0.8544, + 0.3372, + 0.8478, + 0.2956, + 0.2811, + 0.2889, + 0.4707 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 27, + "steps": 40, + "score": 0.5674, + "total_reward": 15.3205, + "completion_rate": 0.7, + "detection_rate": 0.25, + "trust_calibration": 0.816, + "adversarial_detections": 1, + "adversarial_poisonings": 3, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9206, + 0.3433, + 0.3541, + 0.9189, + 0.9244, + 0.3622, + 0.9078, + 0.9133, + 0.8689, + 0.8644, + 0.885, + 0.02, + 0.8533, + 0.9061, + 0.8267, + 0.9139, + 0.073, + 0.3356, + 0.3441, + 0.0619, + 0.8994, + 0.2722, + 0.323, + 0.3308, + 0.8433, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 28, + "steps": 32, + "score": 0.6685, + "total_reward": 16.0443, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.793, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8956, + 0.9211, + 0.9189, + 0.8844, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3763, + 0.02, + 0.3167, + 0.8944, + 0.89, + 0.9128, + 0.8856, + 0.8461, + 0.8589, + 0.3267, + 0.8372, + 0.9128, + 0.2933, + 0.3461, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 29, + "steps": 40, + "score": 0.6868, + "total_reward": 17.8577, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.907, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8933, + 0.3411, + 0.9189, + 0.02, + 0.8822, + 0.9028, + 0.02, + 0.8511, 0.8989, - 0.8944, - 0.89, + 0.3467, + 0.3444, + 0.8578, 0.8856, - 0.8811, - 0.3067, - 0.3022, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.871 + 0.8511, + 0.2989, + 0.9094, + 0.87, + 0.8856, + 0.3641, + 0.8567, + 0.3572, + 0.8895, + 0.8083, + 0.8353 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 11, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, + "seed": 0, + "steps": 43, + "score": 0.7114, + "total_reward": 18.4969, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.729, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10749,41 +11171,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.3467, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3344, + 0.91, + 0.9056, + 0.9011, + 0.3267, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8611, + 0.2867, + 0.8522, + 0.8478, + 0.8433, + 0.8389, + 0.7841 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 12, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 1, + "steps": 29, + "score": 0.7083, + "total_reward": 17.707, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.721, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -10796,40 +11222,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6632 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 13, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 2, + "steps": 29, + "score": 0.6919, + "total_reward": 17.2983, + "completion_rate": 0.85, + "detection_rate": 0.4444, + "trust_calibration": 0.561, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -10842,39 +11272,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.3778, + 0.3456, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6065 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 14, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3311, + "seed": 3, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8008, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, + "trust_calibration": 0.843, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10891,39 +11325,38 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, 0.8678, - 0.2933, + 0.8633, 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8853 + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8675 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 15, - "steps": 46, - "score": 0.8162, - "total_reward": 19.5883, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.93, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 4, + "steps": 29, + "score": 0.7165, + "total_reward": 17.9128, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.721, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -10936,42 +11369,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9278, + 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.3414, 0.9122, - 0.3378, + 0.91, + 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8825 + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.664 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 16, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8523, - "completion_rate": 1.0, + "seed": 5, + "steps": 46, + "score": 0.7558, + "total_reward": 18.1385, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.832, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -10988,37 +11422,40 @@ 0.9256, 0.9211, 0.9167, - 0.9122, + 0.3422, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, - 0.89, + 0.32, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8229 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 17, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 6, + "steps": 27, + "score": 0.6991, + "total_reward": 16.778, + "completion_rate": 0.85, + "detection_rate": 0.4, + "trust_calibration": 0.725, + "adversarial_detections": 4, + "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11031,40 +11468,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, - 0.3467, + 0.9189, + 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8903 + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6387 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 18, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8108, - "completion_rate": 1.0, + "seed": 7, + "steps": 42, + "score": 0.7756, + "total_reward": 19.3902, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, + "trust_calibration": 0.835, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11078,9 +11517,12 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.3763, 0.9211, - 0.9167, + 0.3919, + 0.3997, 0.9122, 0.9078, 0.9033, @@ -11092,25 +11534,26 @@ 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.2933, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8478 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 19, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, + "seed": 8, + "steps": 44, + "score": 0.809, + "total_reward": 19.4157, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.853, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11124,7 +11567,8 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.9278, + 0.3526, 0.9211, 0.9167, 0.9122, @@ -11138,26 +11582,27 @@ 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8654 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 20, - "steps": 46, - "score": 0.7653, - "total_reward": 18.3663, - "completion_rate": 0.9, + "seed": 9, + "steps": 40, + "score": 0.782, + "total_reward": 19.5499, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.909, - "adversarial_detections": 5, + "trust_calibration": 0.837, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11171,43 +11616,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.9211, - 0.9167, - 0.3422, + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, 0.9078, 0.9033, - 0.3289, + 0.8989, 0.8944, 0.89, 0.8856, 0.8811, - 0.3067, + 0.8767, 0.8722, 0.8678, - 0.2933, - 0.8589, + 0.8633, + 0.2889, 0.8544, 0.85, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8423 + 0.8456, + 0.8528 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 21, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8109, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 10, + "steps": 31, + "score": 0.712, + "total_reward": 17.8008, + "completion_rate": 0.85, + "detection_rate": 0.625, + "trust_calibration": 0.448, + "adversarial_detections": 5, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11220,39 +11666,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, - 0.9033, + 0.9056, + 0.3303, + 0.3281, 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.3356, + 0.6281 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 22, - "steps": 46, - "score": 0.7652, - "total_reward": 18.3659, + "seed": 11, + "steps": 40, + "score": 0.7732, + "total_reward": 18.5566, "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.908, - "adversarial_detections": 5, + "trust_calibration": 0.835, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11266,42 +11716,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.9211, - 0.9167, - 0.3422, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, 0.2978, - 0.8633, - 0.2889, + 0.2933, + 0.8589, 0.8544, 0.85, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8419 + 0.8456, + 0.8349 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 23, + "seed": 12, "steps": 42, - "score": 0.8403, - "total_reward": 18.4862, - "completion_rate": 0.95, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 6, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11315,8 +11765,8 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, - 0.3511, + 0.9256, + 0.9211, 0.9167, 0.9122, 0.9078, @@ -11330,25 +11780,25 @@ 0.8722, 0.8678, 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8727 + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 24, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3311, - "completion_rate": 1.0, + "seed": 13, + "steps": 39, + "score": 0.833, + "total_reward": 18.3252, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.933, - "adversarial_detections": 6, + "trust_calibration": 0.811, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11362,42 +11812,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.3422, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.2933, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8854 + 0.9278, + 0.3456, + 0.3433, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8485 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 25, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3314, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 14, + "steps": 29, + "score": 0.6889, + "total_reward": 17.9127, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.609, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11410,42 +11859,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, - 0.3511, + 0.3763, + 0.3841, + 0.3689, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.3244, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8857 + 0.3237, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6353 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 26, - "steps": 42, - "score": 0.8403, - "total_reward": 18.4855, - "completion_rate": 0.95, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 15, + "steps": 30, + "score": 0.6847, + "total_reward": 18.4869, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.635, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11458,40 +11910,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.3608, 0.9256, + 0.9233, 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.3067, - 0.3022, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.872 + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.0841, + 0.9184, + 0.9139, + 0.9095, + 0.6404 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 27, - "steps": 46, - "score": 0.8179, - "total_reward": 19.6285, + "seed": 16, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.924, - "adversarial_detections": 7, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11507,7 +11964,7 @@ "rewards": [ 0.9256, 0.9211, - 0.3467, + 0.9167, 0.9122, 0.9078, 0.9033, @@ -11515,32 +11972,30 @@ 0.8944, 0.89, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, - 0.2978, + 0.8678, 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.881 + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 28, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, + "seed": 17, + "steps": 46, + "score": 0.8048, + "total_reward": 19.316, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.842, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11556,7 +12011,7 @@ "rewards": [ 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, 0.9078, 0.9033, @@ -11567,27 +12022,30 @@ 0.8811, 0.8767, 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8605 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 29, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3311, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.933, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 18, + "steps": 26, + "score": 0.6967, + "total_reward": 16.7213, + "completion_rate": 0.85, + "detection_rate": 0.3333, + "trust_calibration": 0.701, + "adversarial_detections": 3, + "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11600,43 +12058,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.3244, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.3022, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8854 + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.6149 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 30, - "steps": 44, - "score": 0.8148, - "total_reward": 18.741, - "completion_rate": 0.95, - "detection_rate": 1.0, - "trust_calibration": 0.922, - "adversarial_detections": 6, - "adversarial_poisonings": 0, - "status": "completed", + "seed": 19, + "steps": 20, + "score": 0.6193, + "total_reward": 13.0053, + "completion_rate": 0.65, + "detection_rate": 0.0, + "trust_calibration": 0.576, + "adversarial_detections": 0, + "adversarial_poisonings": 5, + "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, @@ -11648,41 +12107,39 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.3478, 0.9256, + 0.9233, 0.9211, - 0.9167, + 0.9189, + 0.3437, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.32, - 0.8856, - 0.3111, - 0.3067, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8653 + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.01 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 31, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, + "seed": 20, + "steps": 46, + "score": 0.7498, + "total_reward": 19.4938, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.835, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11696,39 +12153,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, - 0.9211, + 0.3763, + 0.3841, 0.9167, 0.9122, - 0.9078, + 0.3378, 0.9033, 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.3022, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8412 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 32, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, + "seed": 21, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11745,39 +12207,38 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, - 0.3111, + 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8851 + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 33, - "steps": 46, - "score": 0.8178, - "total_reward": 19.6279, + "seed": 22, + "steps": 36, + "score": 0.7334, + "total_reward": 21.2675, "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.922, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "detection_rate": 0.8, + "trust_calibration": 0.747, + "adversarial_detections": 4, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11790,43 +12251,48 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.3778, 0.9256, - 0.9211, + 0.9233, + 0.3481, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.337, 0.9078, + 0.3326, 0.9033, - 0.3289, - 0.8944, + 0.9011, + 0.3259, + 0.8967, + 0.3214, + 0.8922, 0.89, + 0.3148, 0.8856, - 0.8811, - 0.3067, - 0.8722, - 0.8678, - 0.2933, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8804 + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.9073, + 0.8789, + 0.8544, + 0.7968 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 34, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 23, + "steps": 28, + "score": 0.6922, + "total_reward": 17.3057, + "completion_rate": 0.85, + "detection_rate": 0.4444, + "trust_calibration": 0.645, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11839,40 +12305,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.9278, + 0.3686, + 0.3433, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8904 + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6298 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 35, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3313, + "seed": 24, + "steps": 46, + "score": 0.7725, + "total_reward": 20.0838, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.933, - "adversarial_detections": 6, + "trust_calibration": 0.836, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11886,41 +12355,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9278, + 0.3686, + 0.9483, + 0.3711, 0.9167, 0.9122, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, - 0.8722, + 0.3022, 0.8678, 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8855 + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8591 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 36, - "steps": 44, - "score": 0.8167, - "total_reward": 18.7838, - "completion_rate": 0.95, + "seed": 25, + "steps": 34, + "score": 0.6755, + "total_reward": 18.9148, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.927, - "adversarial_detections": 7, + "trust_calibration": 0.71, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -11934,42 +12406,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.3511, - 0.3467, - 0.3422, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.3322, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.8989, + 0.9011, + 0.3189, + 0.3167, 0.8944, - 0.89, + 0.3122, + 0.31, + 0.3078, 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8664 + 0.9011, + 0.8967, + 0.8922, + 0.3078, + 0.3033, + 0.8789, + 0.782 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 37, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 26, + "steps": 34, + "score": 0.6561, + "total_reward": 19.0282, + "completion_rate": 0.8, + "detection_rate": 0.6667, + "trust_calibration": 0.467, + "adversarial_detections": 2, + "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -11982,39 +12459,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, + 0.9528, + 0.9506, + 0.9483, + 0.3711, 0.8989, + 0.3667, + 0.3644, + 0.9372, + 0.935, + 0.3578, + 0.3556, + 0.3533, + 0.3289, + 0.8967, 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8922, + 0.317, + 0.8878, + 0.3126, + 0.8833, + 0.0841, + 0.9184, + 0.9139, + 0.8878, + 0.8656, + 0.3163, + 0.3419, + 0.6213 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 38, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, + "seed": 27, + "steps": 46, + "score": 0.7256, + "total_reward": 20.3155, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.828, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12028,40 +12513,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.3763, 0.9211, - 0.9167, - 0.9122, + 0.9189, + 0.3997, + 0.3994, + 0.3972, 0.9078, 0.9033, - 0.8989, + 0.3289, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8322, + 0.8395 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 39, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8109, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 28, + "steps": 28, + "score": 0.7355, + "total_reward": 17.6509, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.725, + "adversarial_detections": 5, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -12074,40 +12566,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.6841 ] }, { - "policy": "oracle_lite", + "policy": "heuristic", "task_type": "task3", - "seed": 40, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 29, + "steps": 28, + "score": 0.7258, + "total_reward": 18.144, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.709, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -12120,33 +12615,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, - 0.9122, - 0.3378, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, 0.9033, + 0.3281, 0.8989, + 0.8967, 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8904 + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6801 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 41, + "seed": 0, "steps": 42, "score": 0.867, "total_reward": 19.0739, @@ -12170,14 +12668,14 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, 0.8678, @@ -12193,53 +12691,7 @@ { "policy": "oracle_lite", "task_type": "task3", - "seed": 42, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task3", - "seed": 43, + "seed": 1, "steps": 40, "score": 0.8977, "total_reward": 18.8524, @@ -12285,13 +12737,13 @@ { "policy": "oracle_lite", "task_type": "task3", - "seed": 44, + "seed": 2, "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, + "score": 0.8422, + "total_reward": 18.5276, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.934, + "trust_calibration": 0.933, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -12306,55 +12758,8 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 - ] - }, - { - "policy": "oracle_lite", - "task_type": "task3", - "seed": 45, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8109, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, - "adversarial_poisonings": 0, - "status": "completed", - "difficulty_profile": { - "adaptive": false, - "episodes_seen": 0, - "rolling_detection_rate": 0.0, - "adversarial_threshold": 0.7, - "high_stakes_ratio": 0.35, - "verify_budget_penalty": 0, - "adversary_benign_confidence": 0.88, - "adversary_poison_confidence": 0.92 - }, - "rewards": [ - 0.9256, - 0.9211, + 0.3556, + 0.3511, 0.9167, 0.9122, 0.9078, @@ -12372,16 +12777,17 @@ 0.8961, 0.8917, 0.8873, - 0.894 + 0.8828, + 0.8724 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 46, + "seed": 3, "steps": 42, "score": 0.8689, - "total_reward": 19.1154, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, @@ -12406,10 +12812,10 @@ 0.9078, 0.9033, 0.8989, - 0.3244, + 0.8944, 0.89, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, 0.8678, @@ -12425,10 +12831,10 @@ { "policy": "oracle_lite", "task_type": "task3", - "seed": 47, + "seed": 4, "steps": 42, "score": 0.8689, - "total_reward": 19.1153, + "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, @@ -12452,12 +12858,12 @@ 0.9122, 0.9078, 0.9033, - 0.8989, + 0.3289, 0.8944, 0.89, 0.8856, 0.8811, - 0.3067, + 0.8767, 0.8722, 0.8678, 0.905, @@ -12472,13 +12878,13 @@ { "policy": "oracle_lite", "task_type": "task3", - "seed": 48, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3312, - "completion_rate": 1.0, + "seed": 5, + "steps": 46, + "score": 0.7916, + "total_reward": 18.9976, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.933, + "trust_calibration": 0.917, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -12496,34 +12902,35 @@ 0.9256, 0.9211, 0.9167, - 0.9122, - 0.3378, - 0.9033, + 0.3422, + 0.9078, + 0.3333, 0.8989, 0.8944, 0.32, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, - 0.8961, + 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, - 0.8855 + 0.8739, + 0.8618 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 49, + "seed": 6, "steps": 40, "score": 0.8977, - "total_reward": 18.8524, + "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, @@ -12566,14 +12973,14 @@ { "policy": "oracle_lite", "task_type": "task3", - "seed": 50, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, + "seed": 7, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, - "adversarial_detections": 7, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12592,36 +12999,34 @@ 0.9167, 0.9122, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, - 0.3022, + 0.8722, 0.8678, - 0.8633, + 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8784, - 0.885 + 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 51, - "steps": 46, - "score": 0.8178, - "total_reward": 19.6279, + "seed": 8, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.922, - "adversarial_detections": 7, + "trust_calibration": 0.934, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12636,41 +13041,40 @@ }, "rewards": [ 0.9256, - 0.9211, + 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, - 0.32, + 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.2933, + 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, - 0.8739, - 0.8804 + 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 52, - "steps": 46, - "score": 0.7899, - "total_reward": 18.9582, - "completion_rate": 0.95, + "seed": 9, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.926, - "adversarial_detections": 5, + "trust_calibration": 0.931, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12684,41 +13088,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9256, 0.9211, 0.9167, 0.9122, - 0.3378, + 0.9078, 0.9033, - 0.3289, + 0.8989, 0.8944, - 0.32, + 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.8633, - 0.8589, - 0.8544, - 0.85, + 0.905, + 0.9006, + 0.8961, + 0.8917, 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8641 + 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 53, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3723, - "completion_rate": 1.0, + "seed": 10, + "steps": 42, + "score": 0.8421, + "total_reward": 18.5263, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.93, + "trust_calibration": 0.928, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -12737,7 +13138,7 @@ 0.9211, 0.9167, 0.9122, - 0.3378, + 0.9078, 0.9033, 0.8989, 0.8944, @@ -12745,29 +13146,28 @@ 0.8856, 0.8811, 0.3067, - 0.8722, + 0.3022, 0.8678, - 0.8633, + 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, - 0.8784, - 0.8848 + 0.871 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 54, - "steps": 46, - "score": 0.7916, - "total_reward": 18.9976, - "completion_rate": 0.95, + "seed": 11, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.918, - "adversarial_detections": 6, + "trust_calibration": 0.934, + "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12781,38 +13181,36 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, - 0.3378, + 0.9078, 0.9033, 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, 0.8722, - 0.2978, - 0.8633, - 0.8589, - 0.8544, + 0.8678, + 0.905, + 0.9006, + 0.8961, 0.8917, 0.8873, 0.8828, - 0.8784, - 0.8739, - 0.8619 + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 55, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, + "seed": 12, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, @@ -12831,7 +13229,7 @@ }, "rewards": [ 0.9256, - 0.3511, + 0.9211, 0.9167, 0.9122, 0.9078, @@ -12839,32 +13237,30 @@ 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, 0.8722, - 0.8678, - 0.8633, + 0.9095, + 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8784, - 0.8851 + 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 56, - "steps": 46, - "score": 0.7899, - "total_reward": 18.957, - "completion_rate": 0.95, + "seed": 13, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.922, - "adversarial_detections": 5, + "trust_calibration": 0.931, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12878,42 +13274,87 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, - 0.3067, + 0.8767, 0.8722, 0.8678, - 0.8633, - 0.2889, - 0.8544, - 0.85, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 14, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, 0.8873, 0.8828, 0.8784, - 0.8739, - 0.8629 + 0.8853 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 57, + "seed": 15, "steps": 46, - "score": 0.8179, - "total_reward": 19.6284, + "score": 0.8162, + "total_reward": 19.5883, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.924, - "adversarial_detections": 7, + "trust_calibration": 0.93, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -12927,38 +13368,38 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.3556, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, - 0.3289, + 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, - 0.2933, + 0.8633, 0.8589, - 0.8961, + 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, - 0.881 + 0.8825 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 58, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, + "seed": 16, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, @@ -12979,37 +13420,35 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, 0.8722, - 0.8678, - 0.8633, + 0.9095, + 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8784, - 0.885 + 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 59, + "seed": 17, "steps": 42, - "score": 0.8403, - "total_reward": 18.4861, - "completion_rate": 0.95, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, + "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -13025,7 +13464,7 @@ }, "rewards": [ 0.9256, - 0.3511, + 0.9211, 0.3467, 0.9122, 0.9078, @@ -13044,20 +13483,20 @@ 0.8917, 0.8873, 0.8828, - 0.8726 + 0.8903 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 60, + "seed": 18, "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, + "score": 0.8958, + "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, - "adversarial_detections": 7, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13084,25 +13523,25 @@ 0.8811, 0.8767, 0.8722, - 0.9095, + 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8938 + 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 61, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, + "seed": 19, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, + "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -13122,7 +13561,7 @@ 0.9167, 0.9122, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, 0.89, @@ -13130,26 +13569,27 @@ 0.8811, 0.8767, 0.8722, - 0.9095, + 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8938 + 0.8828, + 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 62, + "seed": 20, "steps": 46, - "score": 0.7933, - "total_reward": 19.0389, - "completion_rate": 0.95, + "score": 0.7653, + "total_reward": 18.3663, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.916, - "adversarial_detections": 7, + "trust_calibration": 0.909, + "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13165,40 +13605,40 @@ "rewards": [ 0.3556, 0.9211, - 0.3467, - 0.9122, + 0.9167, + 0.3422, 0.9078, 0.9033, - 0.8989, + 0.3289, 0.8944, 0.89, - 0.3156, - 0.3111, - 0.8767, + 0.8856, + 0.8811, + 0.3067, 0.8722, 0.8678, - 0.8633, + 0.2933, 0.8589, - 0.8961, - 0.8917, + 0.8544, + 0.85, 0.8873, 0.8828, 0.8784, 0.8739, - 0.8615 + 0.8423 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 63, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, + "seed": 21, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.932, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13214,7 +13654,7 @@ "rewards": [ 0.9256, 0.9211, - 0.3467, + 0.9167, 0.9122, 0.9078, 0.9033, @@ -13231,21 +13671,20 @@ 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8901 + 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 64, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, - "completion_rate": 1.0, + "seed": 22, + "steps": 46, + "score": 0.7652, + "total_reward": 18.3659, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.908, + "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13259,41 +13698,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.3556, 0.9211, 0.9167, 0.3422, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, - 0.8678, + 0.2978, 0.8633, - 0.9006, - 0.8961, - 0.8917, + 0.2889, + 0.8544, + 0.85, 0.8873, 0.8828, 0.8784, - 0.8851 + 0.8739, + 0.8419 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 65, + "seed": 23, "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, + "score": 0.8403, + "total_reward": 18.4862, + "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.934, - "adversarial_detections": 7, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13307,40 +13747,40 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.3556, + 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, - 0.32, + 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.905, + 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, - 0.8901 + 0.8727 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 66, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, + "seed": 24, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.933, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13357,37 +13797,38 @@ 0.9256, 0.9211, 0.9167, - 0.9122, + 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, - 0.32, + 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, + 0.2933, + 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, - 0.8901 + 0.8784, + 0.8854 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 67, - "steps": 46, - "score": 0.8179, - "total_reward": 19.6293, + "seed": 25, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3314, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.927, - "adversarial_detections": 7, + "trust_calibration": 0.934, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13402,10 +13843,10 @@ }, "rewards": [ 0.9256, - 0.9211, - 0.3467, + 0.3511, + 0.9167, 0.9122, - 0.3378, + 0.9078, 0.9033, 0.8989, 0.3244, @@ -13422,20 +13863,19 @@ 0.8873, 0.8828, 0.8784, - 0.8739, - 0.8818 + 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 68, - "steps": 46, - "score": 0.8162, - "total_reward": 19.5881, - "completion_rate": 1.0, + "seed": 26, + "steps": 42, + "score": 0.8403, + "total_reward": 18.4855, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.929, + "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", @@ -13451,9 +13891,56 @@ }, "rewards": [ 0.9256, - 0.3511, + 0.9211, 0.9167, - 0.3422, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.3022, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.872 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 27, + "steps": 46, + "score": 0.8179, + "total_reward": 19.6285, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.924, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, 0.9078, 0.9033, 0.8989, @@ -13463,28 +13950,28 @@ 0.3111, 0.8767, 0.8722, - 0.8678, + 0.2978, 0.8633, 0.8589, - 0.8544, + 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, - 0.8823 + 0.881 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 69, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, + "seed": 28, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.934, + "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", @@ -13504,7 +13991,7 @@ 0.9167, 0.9122, 0.9078, - 0.3333, + 0.9033, 0.8989, 0.8944, 0.89, @@ -13512,27 +13999,26 @@ 0.8811, 0.8767, 0.8722, - 0.8678, + 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, - 0.8828, - 0.8901 + 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", - "seed": 70, + "seed": 29, "steps": 44, - "score": 0.8423, - "total_reward": 19.3728, + "score": 0.8405, + "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, - "adversarial_detections": 7, + "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13546,22 +14032,22 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9256, 0.9211, 0.9167, 0.9122, - 0.3378, + 0.9078, 0.9033, 0.8989, - 0.8944, + 0.3244, 0.89, 0.8856, 0.8811, 0.8767, - 0.8722, + 0.3022, 0.8678, 0.8633, - 0.9006, + 0.8589, 0.8961, 0.8917, 0.8873, @@ -13571,16 +14057,16 @@ ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 71, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3313, - "completion_rate": 1.0, + "seed": 0, + "steps": 46, + "score": 0.7123, + "total_reward": 18.5191, + "completion_rate": 0.85, "detection_rate": 1.0, - "trust_calibration": 0.933, - "adversarial_detections": 6, + "trust_calibration": 0.729, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13594,42 +14080,95 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.3422, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3344, + 0.91, + 0.9056, + 0.9011, + 0.3267, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8789, + 0.2844, + 0.8678, + 0.8611, + 0.8367, + 0.8322, + 0.7797 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 1, + "steps": 30, + "score": 0.7434, + "total_reward": 18.5852, + "completion_rate": 0.9, + "detection_rate": 0.625, + "trust_calibration": 0.721, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.9122, + 0.91, 0.9078, - 0.3333, + 0.9056, + 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8856 + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.716 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 72, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3724, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 2, + "steps": 28, + "score": 0.7198, + "total_reward": 17.2756, + "completion_rate": 0.85, + "detection_rate": 0.5556, + "trust_calibration": 0.559, + "adversarial_detections": 5, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -13642,41 +14181,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.3778, + 0.3456, + 0.9233, 0.9211, - 0.3467, + 0.9189, + 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8849 + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.6418 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 73, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8109, + "seed": 3, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8008, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, + "trust_calibration": 0.843, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13700,30 +14240,31 @@ 0.8944, 0.89, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8675 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 74, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3724, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 4, + "steps": 28, + "score": 0.7455, + "total_reward": 17.8908, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.722, + "adversarial_detections": 5, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -13736,41 +14277,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.3414, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.3289, - 0.8944, - 0.89, - 0.8856, - 0.3111, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8849 + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.7 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 75, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, + "seed": 5, + "steps": 46, + "score": 0.7558, + "total_reward": 18.1385, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.832, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13787,37 +14329,40 @@ 0.9256, 0.9211, 0.9167, - 0.9122, + 0.3422, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, - 0.89, + 0.32, 0.8856, - 0.8811, + 0.3111, 0.8767, 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8229 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 76, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 6, + "steps": 27, + "score": 0.7645, + "total_reward": 17.5844, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.711, + "adversarial_detections": 5, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -13830,40 +14375,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, - 0.3511, + 0.9233, + 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.6995 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 77, + "seed": 7, "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, + "score": 0.7756, + "total_reward": 19.3902, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, + "trust_calibration": 0.835, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13877,9 +14423,12 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.3763, 0.9211, - 0.9167, + 0.3919, + 0.3997, 0.9122, 0.9078, 0.9033, @@ -13890,27 +14439,27 @@ 0.8811, 0.8767, 0.8722, - 0.2978, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8904 + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8478 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 78, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, + "seed": 8, + "steps": 44, + "score": 0.809, + "total_reward": 19.4157, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, + "trust_calibration": 0.853, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13924,12 +14473,13 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, + 0.9278, + 0.3526, 0.9211, - 0.3467, + 0.9167, 0.9122, 0.9078, - 0.9033, + 0.3333, 0.8989, 0.8944, 0.89, @@ -13939,25 +14489,26 @@ 0.8722, 0.8678, 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8903 + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8654 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 79, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, + "seed": 9, + "steps": 40, + "score": 0.782, + "total_reward": 19.5499, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.837, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -13971,41 +14522,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, - 0.9211, - 0.9167, - 0.9122, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, 0.9078, 0.9033, 0.8989, 0.8944, - 0.32, + 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8528 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 80, - "steps": 46, - "score": 0.7652, - "total_reward": 18.3653, - "completion_rate": 0.9, - "detection_rate": 1.0, - "trust_calibration": 0.905, + "seed": 10, + "steps": 30, + "score": 0.7394, + "total_reward": 17.7466, + "completion_rate": 0.85, + "detection_rate": 0.7143, + "trust_calibration": 0.434, "adversarial_detections": 5, - "adversarial_poisonings": 0, + "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14018,42 +14572,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, - 0.3422, + 0.9144, + 0.9122, + 0.91, 0.9078, - 0.9033, + 0.9056, + 0.3303, + 0.3281, 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.3111, - 0.3067, - 0.8722, - 0.8678, - 0.2933, - 0.8589, - 0.8544, - 0.85, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8412 + 0.0997, + 0.0974, + 0.9317, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.3378, + 0.6536 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 81, - "steps": 42, - "score": 0.867, - "total_reward": 19.0739, - "completion_rate": 1.0, + "seed": 11, + "steps": 40, + "score": 0.7732, + "total_reward": 18.5566, + "completion_rate": 0.9, "detection_rate": 1.0, - "trust_calibration": 0.935, - "adversarial_detections": 6, + "trust_calibration": 0.835, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14067,40 +14621,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8904 + 0.2978, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8349 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 82, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, + "seed": 12, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14117,38 +14673,37 @@ 0.9256, 0.9211, 0.9167, - 0.3422, + 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, - 0.32, + 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.885 + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 83, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8524, - "completion_rate": 1.0, + "seed": 13, + "steps": 39, + "score": 0.833, + "total_reward": 18.3252, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.811, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14162,40 +14717,41 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.9167, - 0.9122, - 0.9078, - 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.9278, + 0.3456, + 0.3433, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8485 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 84, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3315, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 14, + "steps": 30, + "score": 0.7228, + "total_reward": 18.7931, + "completion_rate": 0.9, + "detection_rate": 0.625, + "trust_calibration": 0.609, + "adversarial_detections": 5, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14208,42 +14764,45 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, - 0.3511, + 0.3763, + 0.3841, + 0.3689, 0.9167, + 0.9144, 0.9122, - 0.3378, + 0.91, + 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, + 0.3237, 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8857 + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.6881 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 85, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3313, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.933, - "adversarial_detections": 6, - "adversarial_poisonings": 0, + "seed": 15, + "steps": 30, + "score": 0.7426, + "total_reward": 19.3077, + "completion_rate": 0.95, + "detection_rate": 0.625, + "trust_calibration": 0.622, + "adversarial_detections": 5, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14256,41 +14815,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.3608, 0.9256, + 0.9233, 0.9211, + 0.3459, 0.9167, - 0.3422, - 0.9078, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.3244, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8856 + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.7087 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 86, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8523, + "seed": 16, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14317,26 +14879,27 @@ 0.8811, 0.8767, 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 87, - "steps": 40, - "score": 0.8977, - "total_reward": 18.8523, + "seed": 17, + "steps": 46, + "score": 0.8048, + "total_reward": 19.316, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.842, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14352,7 +14915,7 @@ "rewards": [ 0.9256, 0.9211, - 0.9167, + 0.3467, 0.9122, 0.9078, 0.9033, @@ -14363,27 +14926,30 @@ 0.8811, 0.8767, 0.8722, - 0.9095, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8938 + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8605 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 88, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 18, + "steps": 27, + "score": 0.7333, + "total_reward": 17.5998, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.701, + "adversarial_detections": 4, + "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14396,42 +14962,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, - 0.3467, + 0.9189, + 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.885 + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6635 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 89, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 19, + "steps": 29, + "score": 0.7268, + "total_reward": 18.1697, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.66, + "adversarial_detections": 5, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14444,40 +15011,43 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.3478, 0.9256, + 0.9233, 0.9211, - 0.3467, + 0.9189, + 0.3437, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6822 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 90, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8109, - "completion_rate": 1.0, + "seed": 20, + "steps": 46, + "score": 0.7498, + "total_reward": 19.4938, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, + "trust_calibration": 0.835, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14491,39 +15061,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, - 0.9211, + 0.3763, + 0.3841, 0.9167, 0.9122, - 0.9078, + 0.3378, 0.9033, 0.8989, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, - 0.8722, + 0.3022, 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8412 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 91, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3721, + "seed": 21, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.93, - "adversarial_detections": 7, + "trust_calibration": 0.844, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14541,7 +15116,7 @@ 0.9211, 0.9167, 0.9122, - 0.3378, + 0.9078, 0.9033, 0.8989, 0.8944, @@ -14550,28 +15125,27 @@ 0.8811, 0.8767, 0.8722, - 0.2978, + 0.8678, 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8846 + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8676 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 92, - "steps": 44, - "score": 0.8423, - "total_reward": 19.3725, + "seed": 22, + "steps": 35, + "score": 0.7608, + "total_reward": 21.3031, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.931, - "adversarial_detections": 7, + "trust_calibration": 0.748, + "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14585,42 +15159,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.3778, 0.9256, - 0.9211, + 0.9233, + 0.3481, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.337, 0.9078, - 0.3333, - 0.8989, - 0.8944, + 0.3326, + 0.9033, + 0.9011, + 0.3259, + 0.8967, + 0.3214, + 0.8922, 0.89, + 0.3148, 0.8856, - 0.8811, - 0.3067, - 0.8722, - 0.8678, - 0.8633, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.885 + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.905, + 0.8767, + 0.8592 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 93, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 23, + "steps": 27, + "score": 0.7184, + "total_reward": 17.2411, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.629, + "adversarial_detections": 4, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14633,40 +15212,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.3556, + 0.9278, + 0.3686, + 0.3433, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.9011, 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6449 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 94, + "seed": 24, "steps": 46, - "score": 0.8161, - "total_reward": 19.5872, + "score": 0.7725, + "total_reward": 20.0838, "completion_rate": 1.0, "detection_rate": 1.0, - "trust_calibration": 0.926, - "adversarial_detections": 6, + "trust_calibration": 0.836, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14680,42 +15261,44 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, + 0.9278, + 0.3686, + 0.9483, + 0.3711, 0.9167, 0.9122, 0.9078, 0.9033, - 0.3289, + 0.8989, 0.8944, 0.89, - 0.3156, + 0.8856, 0.8811, 0.8767, - 0.8722, - 0.2978, + 0.3022, + 0.8678, 0.8633, - 0.8589, + 0.2889, 0.8544, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8739, - 0.8814 + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8591 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 95, - "steps": 44, - "score": 0.8405, - "total_reward": 19.3315, - "completion_rate": 1.0, + "seed": 25, + "steps": 34, + "score": 0.6755, + "total_reward": 18.9148, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 6, + "trust_calibration": 0.71, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14729,41 +15312,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.9211, - 0.3467, - 0.9122, - 0.3378, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.3322, + 0.91, + 0.9078, + 0.9056, 0.9033, - 0.8989, + 0.9011, + 0.3189, + 0.3167, 0.8944, - 0.89, + 0.3122, + 0.31, + 0.3078, 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.8633, - 0.8589, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8784, - 0.8857 + 0.9011, + 0.8967, + 0.8922, + 0.3078, + 0.3033, + 0.8789, + 0.782 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 96, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, + "seed": 26, + "steps": 33, + "score": 0.6816, + "total_reward": 19.0854, + "completion_rate": 0.8, "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, + "trust_calibration": 0.492, + "adversarial_detections": 3, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14777,40 +15365,46 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ - 0.9256, - 0.3511, - 0.9167, - 0.9122, - 0.9078, - 0.9033, + 0.9528, + 0.9506, + 0.9483, + 0.3711, 0.8989, + 0.3667, + 0.3644, + 0.9372, + 0.935, + 0.3578, + 0.3556, + 0.3533, + 0.3289, + 0.8967, 0.8944, - 0.89, + 0.8922, + 0.317, + 0.8878, + 0.3126, + 0.8833, + 0.9206, + 0.9161, + 0.9117, 0.8856, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.8633, + 0.3141, + 0.7297 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 97, - "steps": 40, - "score": 0.8958, - "total_reward": 18.8109, - "completion_rate": 1.0, + "seed": 27, + "steps": 46, + "score": 0.7256, + "total_reward": 20.3155, + "completion_rate": 0.95, "detection_rate": 1.0, - "trust_calibration": 0.932, - "adversarial_detections": 6, + "trust_calibration": 0.828, + "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { @@ -14824,40 +15418,47 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.3763, 0.9211, - 0.9167, - 0.9122, + 0.9189, + 0.3997, + 0.3994, + 0.3972, 0.9078, 0.9033, - 0.8989, + 0.3289, 0.8944, 0.89, - 0.8856, + 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.894 + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8322, + 0.8395 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 98, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1153, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 28, + "steps": 27, + "score": 0.7645, + "total_reward": 17.5845, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.712, + "adversarial_detections": 5, + "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14870,41 +15471,42 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, + 0.9144, 0.9122, + 0.91, 0.9078, + 0.9056, 0.9033, - 0.8989, - 0.8944, - 0.89, - 0.8856, - 0.8811, - 0.3067, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.6996 ] }, { - "policy": "oracle_lite", + "policy": "trained", "task_type": "task3", - "seed": 99, - "steps": 42, - "score": 0.8689, - "total_reward": 19.1154, - "completion_rate": 1.0, - "detection_rate": 1.0, - "trust_calibration": 0.934, - "adversarial_detections": 7, - "adversarial_poisonings": 0, + "seed": 29, + "steps": 29, + "score": 0.761, + "total_reward": 19.0244, + "completion_rate": 0.95, + "detection_rate": 0.625, + "trust_calibration": 0.709, + "adversarial_detections": 5, + "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, @@ -14917,29 +15519,31 @@ "adversary_poison_confidence": 0.92 }, "rewards": [ + 0.9278, 0.9256, + 0.9233, 0.9211, + 0.9189, 0.9167, - 0.9122, + 0.9144, + 0.3392, + 0.91, 0.9078, + 0.9056, 0.9033, + 0.3281, 0.8989, + 0.8967, 0.8944, - 0.89, - 0.3156, - 0.8811, - 0.8767, - 0.8722, - 0.8678, - 0.905, - 0.9006, - 0.8961, - 0.8917, - 0.8873, - 0.8828, - 0.8901 + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.7329 ] } - ], - "chart": "outputs/baseline_comparison.png" + ] }