{ "task": "task3", "tasks": [ "task3" ], "episodes_per_policy": 100, "adaptive": false, "difficulty_controller": { "adaptive": true, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "difficulty_controller_by_task_policy": { "task3": { "random": {}, "heuristic": {}, "oracle_lite": {} } }, "summary": { "random": { "episodes": 100, "avg_score": 0.6601, "avg_completion_rate": 0.8165, "avg_detection_rate": 0.375, "avg_trust_calibration": 0.7349, "avg_steps": 36.13 }, "heuristic": { "episodes": 100, "avg_score": 0.7314, "avg_completion_rate": 0.8935, "avg_detection_rate": 0.7621, "avg_trust_calibration": 0.74, "avg_steps": 35.54 }, "oracle_lite": { "episodes": 100, "avg_score": 0.8562, "avg_completion_rate": 0.991, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.9304, "avg_steps": 42.62 } }, "by_task": { "task3": { "random": { "episodes": 100, "avg_score": 0.6601, "avg_completion_rate": 0.8165, "avg_detection_rate": 0.375, "avg_trust_calibration": 0.7349, "avg_steps": 36.13 }, "heuristic": { "episodes": 100, "avg_score": 0.7314, "avg_completion_rate": 0.8935, "avg_detection_rate": 0.7621, "avg_trust_calibration": 0.74, "avg_steps": 35.54 }, "oracle_lite": { "episodes": 100, "avg_score": 0.8562, "avg_completion_rate": 0.991, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.9304, "avg_steps": 42.62 } } }, "episodes": [ { "policy": "random", "task_type": "task3", "seed": 0, "steps": 36, "score": 0.6105, "total_reward": 15.2622, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.884, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9233, 0.9211, 0.02, 0.9144, 0.9122, 0.9078, 0.3556, 0.02, 0.02, 0.8467, 0.8922, 0.8378, 0.8833, 0.9061, 0.3067, 0.02, 0.8722, 0.073, 0.8306, 0.9061, 0.3397, 0.3044, 0.5035 ] }, { "policy": "random", "task_type": "task3", "seed": 1, "steps": 40, "score": 0.7205, "total_reward": 18.0135, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.662, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.8889, 0.8822, 0.91, 0.8556, 0.9011, 0.3167, 0.8622, 0.89, 0.8356, 0.8633, 0.3011, 0.8444, 0.8378, 0.8656, 0.8261, 0.8589, 0.8961, 0.02, 0.3, 0.8106, 0.2933, 0.7916 ] }, { "policy": "random", "task_type": "task3", "seed": 2, "steps": 37, "score": 0.7627, "total_reward": 17.5411, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.752, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3778, 0.3203, 0.9211, 0.9189, 0.8822, 0.91, 0.8556, 0.8511, 0.8967, 0.8422, 0.89, 0.8356, 0.8311, 0.8767, 0.2944, 0.8972, 0.87, 0.8656, 0.8789, 0.02, 0.8722, 0.8207 ] }, { "policy": "random", "task_type": "task3", "seed": 3, "steps": 36, "score": 0.6303, "total_reward": 16.3887, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.798, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9233, 0.3411, 0.8667, 0.9144, 0.9372, 0.91, 0.3278, 0.3556, 0.8511, 0.8989, 0.2922, 0.8878, 0.3056, 0.8489, 0.8767, 0.8922, 0.333, 0.2878, 0.8283, 0.8589, 0.3297, 0.3552, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 4, "steps": 41, "score": 0.6894, "total_reward": 18.6138, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.405, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.9233, 0.3711, 0.8844, 0.9122, 0.33, 0.3186, 0.9011, 0.8667, 0.86, 0.8878, 0.8833, 0.8789, 0.8767, 0.8994, 0.3352, 0.8678, 0.3033, 0.8239, 0.8744, 0.8678, 0.2656, 0.2933, 0.2911, 0.7076 ] }, { "policy": "random", "task_type": "task3", "seed": 5, "steps": 31, "score": 0.6062, "total_reward": 15.1538, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.9211, 0.3367, 0.3344, 0.3322, 0.8578, 0.9306, 0.9033, 0.3211, 0.8467, 0.9194, 0.32, 0.8878, 0.8856, 0.8811, 0.02, 0.8394, 0.0752, 0.87, 0.8678, 0.02, 0.8883, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 6, "steps": 39, "score": 0.6337, "total_reward": 15.8429, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.872, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.8889, 0.9167, 0.9144, 0.9122, 0.91, 0.9006, 0.8511, 0.3197, 0.9194, 0.28, 0.8556, 0.8811, 0.8789, 0.8422, 0.8856, 0.3311, 0.8589, 0.0597, 0.3222, 0.27, 0.8728, 0.02, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 7, "steps": 32, "score": 0.7179, "total_reward": 15.793, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.869, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.8911, 0.8867, 0.9144, 0.86, 0.9056, 0.9033, 0.8989, 0.8967, 0.3444, 0.89, 0.8356, 0.9083, 0.02, 0.8789, 0.8744, 0.87, 0.8928, 0.8633, 0.3111, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 8, "steps": 31, "score": 0.7087, "total_reward": 16.3004, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.701, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3733, 0.9211, 0.8844, 0.9122, 0.91, 0.3578, 0.9056, 0.9283, 0.8667, 0.8944, 0.84, 0.3456, 0.8833, 0.8811, 0.8589, 0.8767, 0.0774, 0.835, 0.8856, 0.8633, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 9, "steps": 37, "score": 0.7151, "total_reward": 17.877, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.558, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9256, 0.9233, 0.8689, 0.3597, 0.8822, 0.935, 0.02, 0.3556, 0.9011, 0.8967, 0.8944, 0.84, 0.8356, 0.8489, 0.8244, 0.835, 0.3178, 0.8656, 0.8261, 0.8217, 0.3044, 0.85, 0.7724 ] }, { "policy": "random", "task_type": "task3", "seed": 10, "steps": 38, "score": 0.6037, "total_reward": 17.5072, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.772, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.9461, 0.8844, 0.3622, 0.9078, 0.02, 0.3233, 0.9261, 0.8967, 0.8944, 0.86, 0.3378, 0.02, 0.3263, 0.8811, 0.3289, 0.2967, 0.8994, 0.8722, 0.8678, 0.3386, 0.3463, 0.02, 0.3089, 0.8544, 0.355, 0.7709 ] }, { "policy": "random", "task_type": "task3", "seed": 11, "steps": 32, "score": 0.5881, "total_reward": 14.703, "completion_rate": 0.7, "detection_rate": 0.3333, "trust_calibration": 0.743, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.9211, 0.3459, 0.8844, 0.3622, 0.9078, 0.9306, 0.02, 0.8689, 0.8967, 0.8944, 0.8722, 0.8878, 0.8856, 0.9228, 0.0819, 0.2967, 0.3244, 0.8722, 0.02, 0.3356, 0.3011, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 12, "steps": 36, "score": 0.6249, "total_reward": 14.9974, "completion_rate": 0.75, "detection_rate": 0.3333, "trust_calibration": 0.716, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.8933, 0.3411, 0.9189, 0.8644, 0.9122, 0.8756, 0.02, 0.3441, 0.3189, 0.3167, 0.8622, 0.8878, 0.8511, 0.8789, 0.8244, 0.8878, 0.2856, 0.0663, 0.8589, 0.8961, 0.8772, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 13, "steps": 38, "score": 0.7872, "total_reward": 18.1053, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.64, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8911, 0.8667, 0.9122, 0.3278, 0.9056, 0.8689, 0.02, 0.8622, 0.8378, 0.8533, 0.8289, 0.8767, 0.8722, 0.895, 0.8856, 0.8633, 0.8611, 0.9017, 0.8939, 0.3, 0.8081 ] }, { "policy": "random", "task_type": "task3", "seed": 14, "steps": 35, "score": 0.6988, "total_reward": 18.1679, "completion_rate": 0.95, "detection_rate": 0.0, "trust_calibration": 0.661, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8733, 0.8889, 0.8844, 0.3322, 0.91, 0.9078, 0.9056, 0.9033, 0.8989, 0.8622, 0.89, 0.3408, 0.8856, 0.8833, 0.3011, 0.8267, 0.8372, 0.87, 0.0708, 0.8833, 0.2811, 0.8839, 0.8544, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 15, "steps": 38, "score": 0.68, "total_reward": 19.0388, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.774, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.9256, 0.9233, 0.8689, 0.3667, 0.3644, 0.8922, 0.935, 0.8556, 0.3533, 0.3211, 0.3189, 0.8944, 0.89, 0.3378, 0.8333, 0.8789, 0.8767, 0.8744, 0.2922, 0.333, 0.8656, 0.8261, 0.8567, 0.3, 0.8478, 0.8066 ] }, { "policy": "random", "task_type": "task3", "seed": 16, "steps": 33, "score": 0.5966, "total_reward": 15.511, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.691, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.3044, 0.02, 0.935, 0.02, 0.3256, 0.8711, 0.8967, 0.3444, 0.02, 0.89, 0.8556, 0.8833, 0.3311, 0.3289, 0.8744, 0.8878, 0.8633, 0.3141, 0.8589, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 17, "steps": 35, "score": 0.5891, "total_reward": 16.4939, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.795, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.02, 0.3433, 0.3319, 0.8644, 0.3622, 0.91, 0.3286, 0.9033, 0.9011, 0.8789, 0.3697, 0.9194, 0.84, 0.8856, 0.8811, 0.8789, 0.3597, 0.2692, 0.87, 0.2878, 0.8656, 0.0663, 0.8239, 0.8817, 0.02, 0.4835 ] }, { "policy": "random", "task_type": "task3", "seed": 18, "steps": 35, "score": 0.6548, "total_reward": 16.3705, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.573, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.02, 0.9167, 0.3644, 0.3622, 0.91, 0.8556, 0.9033, 0.8689, 0.8967, 0.8922, 0.915, 0.02, 0.8533, 0.8789, 0.2967, 0.3422, 0.3078, 0.8656, 0.8611, 0.2789, 0.3297, 0.7281 ] }, { "policy": "random", "task_type": "task3", "seed": 19, "steps": 38, "score": 0.6912, "total_reward": 17.2799, "completion_rate": 0.9, "detection_rate": 0.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3156, 0.9211, 0.9189, 0.9144, 0.9122, 0.33, 0.02, 0.9133, 0.9261, 0.8967, 0.8422, 0.89, 0.8356, 0.8511, 0.8789, 0.8922, 0.87, 0.3178, 0.8811, 0.8589, 0.8544, 0.87, 0.3108, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 20, "steps": 44, "score": 0.6149, "total_reward": 19.0606, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.859, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.8711, 0.8667, 0.9144, 0.9122, 0.337, 0.9078, 0.9033, 0.3281, 0.3519, 0.3467, 0.8922, 0.89, 0.3608, 0.8856, 0.8833, 0.8289, 0.2714, 0.87, 0.3508, 0.8656, 0.3089, 0.0597, 0.8172, 0.053, 0.8728, 0.8083, 0.3439, 0.2567, 0.5146 ] }, { "policy": "random", "task_type": "task3", "seed": 21, "steps": 30, "score": 0.7401, "total_reward": 15.543, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.709, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.8911, 0.8867, 0.9144, 0.3322, 0.9078, 0.9056, 0.8711, 0.8467, 0.8922, 0.89, 0.02, 0.8856, 0.8811, 0.8589, 0.8744, 0.87, 0.8856, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 22, "steps": 45, "score": 0.5529, "total_reward": 16.5871, "completion_rate": 0.7, "detection_rate": 0.0, "trust_calibration": 0.709, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.02, 0.9311, 0.8844, 0.3622, 0.36, 0.9078, 0.2933, 0.8689, 0.9217, 0.3444, 0.32, 0.8556, 0.3033, 0.3011, 0.8767, 0.3022, 0.87, 0.8678, 0.8811, 0.0619, 0.8544, 0.3022, 0.8478, 0.8083, 0.3119, 0.0397, 0.8322, 0.4222 ] }, { "policy": "random", "task_type": "task3", "seed": 23, "steps": 31, "score": 0.7921, "total_reward": 18.2193, "completion_rate": 0.95, "detection_rate": 0.5, "trust_calibration": 0.847, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.3481, 0.9439, 0.9167, 0.9144, 0.86, 0.9078, 0.9033, 0.9261, 0.8967, 0.8944, 0.8922, 0.89, 0.8878, 0.02, 0.9228, 0.0819, 0.8744, 0.87, 0.8833, 0.7254 ] }, { "policy": "random", "task_type": "task3", "seed": 24, "steps": 45, "score": 0.6293, "total_reward": 16.3622, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3156, 0.9483, 0.9461, 0.3197, 0.9072, 0.8756, 0.9033, 0.3541, 0.02, 0.9044, 0.02, 0.3608, 0.8511, 0.2967, 0.8722, 0.8356, 0.8111, 0.2867, 0.8544, 0.845, 0.3156, 0.8061, 0.8367, 0.7972, 0.7658 ] }, { "policy": "random", "task_type": "task3", "seed": 25, "steps": 34, "score": 0.6745, "total_reward": 16.8613, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.809, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8933, 0.9461, 0.9189, 0.8644, 0.9122, 0.91, 0.8556, 0.8711, 0.8989, 0.3237, 0.8944, 0.8922, 0.8378, 0.3356, 0.8833, 0.8489, 0.0797, 0.3244, 0.3222, 0.8878, 0.8906, 0.9061, 0.2967, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 26, "steps": 43, "score": 0.5813, "total_reward": 17.4397, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.815, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8933, 0.9211, 0.9439, 0.9167, 0.9144, 0.02, 0.3308, 0.8711, 0.8467, 0.8922, 0.8878, 0.3486, 0.3033, 0.2759, 0.8767, 0.3244, 0.3452, 0.29, 0.8156, 0.8633, 0.2889, 0.0597, 0.8544, 0.3372, 0.8478, 0.2956, 0.2811, 0.2889, 0.4707 ] }, { "policy": "random", "task_type": "task3", "seed": 27, "steps": 40, "score": 0.5674, "total_reward": 15.3205, "completion_rate": 0.7, "detection_rate": 0.25, "trust_calibration": 0.816, "adversarial_detections": 1, "adversarial_poisonings": 3, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9206, 0.3433, 0.3541, 0.9189, 0.9244, 0.3622, 0.9078, 0.9133, 0.8689, 0.8644, 0.885, 0.02, 0.8533, 0.9061, 0.8267, 0.9139, 0.073, 0.3356, 0.3441, 0.0619, 0.8994, 0.2722, 0.323, 0.3308, 0.8433, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 28, "steps": 32, "score": 0.6685, "total_reward": 16.0443, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.793, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9211, 0.9189, 0.8844, 0.9122, 0.91, 0.9078, 0.9056, 0.3763, 0.02, 0.3167, 0.8944, 0.89, 0.9128, 0.8856, 0.8461, 0.8589, 0.3267, 0.8372, 0.9128, 0.2933, 0.3461, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 29, "steps": 40, "score": 0.6868, "total_reward": 17.8577, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.907, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.3411, 0.9189, 0.02, 0.8822, 0.9028, 0.02, 0.8511, 0.8989, 0.3467, 0.3444, 0.8578, 0.8856, 0.8511, 0.2989, 0.9094, 0.87, 0.8856, 0.3641, 0.8567, 0.3572, 0.8895, 0.8083, 0.8353 ] }, { "policy": "random", "task_type": "task3", "seed": 30, "steps": 28, "score": 0.4767, "total_reward": 10.4876, "completion_rate": 0.5, "detection_rate": 0.0, "trust_calibration": 0.745, "adversarial_detections": 0, "adversarial_poisonings": 3, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9233, 0.02, 0.9189, 0.02, 0.9394, 0.02, 0.9078, 0.8533, 0.9011, 0.02, 0.8967, 0.3144, 0.2878, 0.8533, 0.3441, 0.0819, 0.2967, 0.0774, 0.835, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 31, "steps": 33, "score": 0.7077, "total_reward": 19.8143, "completion_rate": 0.95, "detection_rate": 0.5, "trust_calibration": 0.68, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9256, 0.9483, 0.3711, 0.9189, 0.3344, 0.9122, 0.91, 0.02, 0.3556, 0.9033, 0.3211, 0.8989, 0.8967, 0.8944, 0.89, 0.3456, 0.8833, 0.8989, 0.0797, 0.8372, 0.29, 0.9073, 0.8633, 0.8611, 0.8589, 0.679 ] }, { "policy": "random", "task_type": "task3", "seed": 32, "steps": 40, "score": 0.5915, "total_reward": 18.3369, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.392, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.9211, 0.3689, 0.3444, 0.3622, 0.33, 0.9328, 0.9056, 0.3311, 0.8667, 0.3214, 0.8922, 0.8878, 0.8856, 0.3033, 0.9061, 0.8789, 0.8944, 0.2922, 0.323, 0.2956, 0.3133, 0.0641, 0.8839, 0.8744, 0.3152, 0.3178, 0.3133, 0.3715 ] }, { "policy": "random", "task_type": "task3", "seed": 33, "steps": 41, "score": 0.59, "total_reward": 15.3412, "completion_rate": 0.7, "detection_rate": 0.6667, "trust_calibration": 0.72, "adversarial_detections": 2, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8911, 0.9439, 0.8644, 0.02, 0.8578, 0.02, 0.9033, 0.8489, 0.8967, 0.3444, 0.287, 0.8556, 0.8311, 0.3497, 0.8722, 0.073, 0.02, 0.905, 0.9006, 0.8544, 0.3022, 0.3308, 0.3186, 0.6338 ] }, { "policy": "random", "task_type": "task3", "seed": 34, "steps": 35, "score": 0.6414, "total_reward": 16.0344, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.865, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.8689, 0.02, 0.3344, 0.3392, 0.353, 0.9056, 0.3303, 0.8667, 0.8622, 0.89, 0.8556, 0.8511, 0.8767, 0.8744, 0.8722, 0.32, 0.8678, 0.8456, 0.8611, 0.8589, 0.2944, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 35, "steps": 39, "score": 0.5508, "total_reward": 15.9746, "completion_rate": 0.7, "detection_rate": 0.5, "trust_calibration": 0.645, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3133, 0.3411, 0.9167, 0.8822, 0.3056, 0.8511, 0.8989, 0.3497, 0.9194, 0.3192, 0.34, 0.02, 0.8856, 0.3033, 0.2989, 0.8767, 0.8744, 0.2922, 0.02, 0.8478, 0.3386, 0.8789, 0.8817, 0.8172, 0.8895, 0.3286, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 36, "steps": 31, "score": 0.5216, "total_reward": 12.5188, "completion_rate": 0.55, "detection_rate": 0.5, "trust_calibration": 0.506, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.02, 0.8711, 0.3519, 0.3367, 0.3422, 0.91, 0.9328, 0.02, 0.8711, 0.8467, 0.8922, 0.34, 0.3078, 0.8856, 0.3333, 0.3311, 0.02, 0.8744, 0.2922, 0.9095, 0.8283, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 37, "steps": 38, "score": 0.5875, "total_reward": 16.449, "completion_rate": 0.7, "detection_rate": 0.5, "trust_calibration": 0.866, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.9211, 0.3689, 0.02, 0.9122, 0.8578, 0.9033, 0.3211, 0.8989, 0.9217, 0.86, 0.3508, 0.02, 0.3563, 0.3641, 0.8789, 0.8394, 0.31, 0.0708, 0.3156, 0.3011, 0.8984, 0.3222, 0.335, 0.8278, 0.627 ] }, { "policy": "random", "task_type": "task3", "seed": 38, "steps": 35, "score": 0.7022, "total_reward": 18.2564, "completion_rate": 0.9, "detection_rate": 0.0, "trust_calibration": 0.441, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.8889, 0.9167, 0.9072, 0.8778, 0.02, 0.9033, 0.9011, 0.8989, 0.8967, 0.3574, 0.84, 0.0908, 0.8833, 0.3311, 0.8417, 0.8744, 0.8722, 0.8678, 0.3486, 0.3133, 0.3289, 0.4474 ] }, { "policy": "random", "task_type": "task3", "seed": 39, "steps": 33, "score": 0.7547, "total_reward": 18.1121, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.604, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9233, 0.9139, 0.9167, 0.8822, 0.02, 0.9078, 0.9056, 0.9033, 0.3189, 0.8444, 0.3122, 0.8578, 0.9106, 0.8833, 0.8489, 0.9161, 0.835, 0.8928, 0.9083, 0.3141, 0.3219, 0.7927 ] }, { "policy": "random", "task_type": "task3", "seed": 40, "steps": 37, "score": 0.5943, "total_reward": 15.4518, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.636, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9233, 0.8889, 0.9167, 0.3574, 0.8778, 0.8533, 0.9011, 0.8989, 0.8622, 0.34, 0.8878, 0.02, 0.2711, 0.8267, 0.3244, 0.33, 0.8356, 0.8633, 0.2811, 0.0619, 0.8567, 0.8172, 0.27, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 41, "steps": 38, "score": 0.7027, "total_reward": 18.9728, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.857, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.8733, 0.9461, 0.9167, 0.8622, 0.337, 0.8756, 0.3233, 0.02, 0.8967, 0.8944, 0.32, 0.8878, 0.8511, 0.8789, 0.8422, 0.87, 0.8678, 0.8833, 0.3341, 0.8567, 0.2744, 0.3352, 0.85, 0.3328, 0.8448 ] }, { "policy": "random", "task_type": "task3", "seed": 42, "steps": 34, "score": 0.7826, "total_reward": 18.7831, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.768, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.8911, 0.9167, 0.9144, 0.9122, 0.9078, 0.9033, 0.8989, 0.8967, 0.8622, 0.8578, 0.8656, 0.02, 0.3311, 0.8789, 0.3267, 0.9139, 0.8678, 0.3286, 0.9061, 0.8217, 0.849 ] }, { "policy": "random", "task_type": "task3", "seed": 43, "steps": 34, "score": 0.6248, "total_reward": 16.2447, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.572, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.02, 0.8911, 0.9189, 0.3667, 0.9122, 0.91, 0.8556, 0.3533, 0.8489, 0.8967, 0.8944, 0.3422, 0.34, 0.8856, 0.8833, 0.3011, 0.8417, 0.8744, 0.8722, 0.02, 0.8833, 0.2811, 0.8567, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 44, "steps": 41, "score": 0.6806, "total_reward": 19.7386, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.81, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9483, 0.3411, 0.9189, 0.3667, 0.8822, 0.8778, 0.9033, 0.9011, 0.8667, 0.3214, 0.8922, 0.34, 0.8878, 0.3586, 0.8811, 0.8267, 0.8744, 0.31, 0.9106, 0.8789, 0.8567, 0.8522, 0.8895, 0.2656, 0.02, 0.3261, 0.8264 ] }, { "policy": "random", "task_type": "task3", "seed": 45, "steps": 35, "score": 0.8133, "total_reward": 19.519, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.742, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.8867, 0.8622, 0.8578, 0.9033, 0.9011, 0.8989, 0.8967, 0.8422, 0.89, 0.8556, 0.3033, 0.8789, 0.2967, 0.9139, 0.9128, 0.9083, 0.2811, 0.8567, 0.8578 ] }, { "policy": "random", "task_type": "task3", "seed": 46, "steps": 36, "score": 0.7646, "total_reward": 19.8787, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9233, 0.9461, 0.9439, 0.9167, 0.9144, 0.88, 0.9328, 0.9056, 0.9033, 0.9011, 0.8989, 0.3167, 0.8922, 0.317, 0.8856, 0.3363, 0.3089, 0.8767, 0.8372, 0.32, 0.8856, 0.9061, 0.8567, 0.8559 ] }, { "policy": "random", "task_type": "task3", "seed": 47, "steps": 31, "score": 0.7026, "total_reward": 16.8629, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.89, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.9233, 0.9189, 0.8844, 0.02, 0.91, 0.8733, 0.8489, 0.8967, 0.8944, 0.9172, 0.89, 0.3256, 0.8833, 0.3311, 0.3319, 0.0797, 0.0774, 0.87, 0.8678, 0.8283, 0.5511 ] }, { "policy": "random", "task_type": "task3", "seed": 48, "steps": 40, "score": 0.6251, "total_reward": 17.5018, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.739, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.8867, 0.9144, 0.3622, 0.91, 0.3278, 0.9056, 0.3763, 0.3211, 0.3489, 0.3544, 0.8922, 0.373, 0.8533, 0.8489, 0.8744, 0.82, 0.8156, 0.8633, 0.8411, 0.9017, 0.3394, 0.85, 0.3328, 0.3133, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 49, "steps": 34, "score": 0.7592, "total_reward": 18.2216, "completion_rate": 0.95, "detection_rate": 0.0, "trust_calibration": 0.658, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9211, 0.8867, 0.9144, 0.9122, 0.9078, 0.9033, 0.9011, 0.8667, 0.8944, 0.9172, 0.31, 0.02, 0.8833, 0.3489, 0.8744, 0.0752, 0.8678, 0.8656, 0.8633, 0.8589, 0.5214 ] }, { "policy": "random", "task_type": "task3", "seed": 50, "steps": 31, "score": 0.5546, "total_reward": 13.3116, "completion_rate": 0.65, "detection_rate": 0.0, "trust_calibration": 0.768, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.8867, 0.3344, 0.9122, 0.91, 0.3356, 0.02, 0.9011, 0.02, 0.02, 0.8944, 0.84, 0.8556, 0.8511, 0.9039, 0.2967, 0.3422, 0.87, 0.8928, 0.0686, 0.2833, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 51, "steps": 35, "score": 0.6911, "total_reward": 16.5857, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.634, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9206, 0.9233, 0.9139, 0.8644, 0.9122, 0.8778, 0.9056, 0.3311, 0.8967, 0.3222, 0.8378, 0.8856, 0.8833, 0.9061, 0.8767, 0.9172, 0.073, 0.3178, 0.02, 0.8633, 0.02, 0.8217, 0.4783 ] }, { "policy": "random", "task_type": "task3", "seed": 52, "steps": 39, "score": 0.6835, "total_reward": 19.1391, "completion_rate": 0.95, "detection_rate": 0.5, "trust_calibration": 0.96, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3608, 0.9506, 0.9233, 0.9211, 0.3144, 0.9122, 0.337, 0.9056, 0.3233, 0.9011, 0.8989, 0.8944, 0.84, 0.8356, 0.3333, 0.8811, 0.8789, 0.8644, 0.8722, 0.8878, 0.8811, 0.8567, 0.2744, 0.8772, 0.27, 0.8873, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 53, "steps": 38, "score": 0.636, "total_reward": 16.5363, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.643, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9483, 0.8689, 0.9167, 0.8822, 0.8578, 0.8733, 0.8489, 0.3244, 0.8922, 0.34, 0.3378, 0.8856, 0.02, 0.8789, 0.9017, 0.3174, 0.02, 0.3378, 0.8656, 0.2833, 0.0641, 0.8217, 0.8522, 0.4564 ] }, { "policy": "random", "task_type": "task3", "seed": 54, "steps": 36, "score": 0.6308, "total_reward": 16.4, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.696, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.9233, 0.9211, 0.8667, 0.3252, 0.9078, 0.9056, 0.9011, 0.8989, 0.3237, 0.3474, 0.84, 0.8878, 0.8833, 0.3089, 0.8744, 0.87, 0.3308, 0.8811, 0.0619, 0.02, 0.3274, 0.8522, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 55, "steps": 32, "score": 0.5079, "total_reward": 13.2064, "completion_rate": 0.6, "detection_rate": 0.0, "trust_calibration": 0.674, "adversarial_detections": 0, "adversarial_poisonings": 3, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.02, 0.9256, 0.3241, 0.9189, 0.8644, 0.3322, 0.33, 0.9156, 0.02, 0.3511, 0.02, 0.8622, 0.8878, 0.3126, 0.8833, 0.8811, 0.8789, 0.8767, 0.3244, 0.8722, 0.333, 0.0708, 0.0686, 0.8611, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 56, "steps": 32, "score": 0.5675, "total_reward": 14.1867, "completion_rate": 0.65, "detection_rate": 0.0, "trust_calibration": 0.73, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3608, 0.9256, 0.02, 0.8867, 0.9144, 0.9122, 0.02, 0.3578, 0.9056, 0.9033, 0.8667, 0.8744, 0.3422, 0.363, 0.9128, 0.8833, 0.8489, 0.3774, 0.3222, 0.355, 0.8678, 0.0686, 0.8261, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 57, "steps": 37, "score": 0.5298, "total_reward": 13.7744, "completion_rate": 0.6, "detection_rate": 0.0, "trust_calibration": 0.716, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.9189, 0.3667, 0.3644, 0.3622, 0.343, 0.9056, 0.02, 0.8667, 0.8922, 0.8878, 0.8856, 0.02, 0.8289, 0.8922, 0.8678, 0.2933, 0.3141, 0.0619, 0.3197, 0.2744, 0.8772, 0.3, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 58, "steps": 37, "score": 0.6112, "total_reward": 17.1148, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.808, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.3433, 0.3167, 0.02, 0.9122, 0.91, 0.9078, 0.9056, 0.2911, 0.3259, 0.3467, 0.8422, 0.8878, 0.3163, 0.8811, 0.8789, 0.9194, 0.8522, 0.8856, 0.0663, 0.3341, 0.8567, 0.3044, 0.3022, 0.3, 0.4822 ] }, { "policy": "random", "task_type": "task3", "seed": 59, "steps": 39, "score": 0.6007, "total_reward": 16.2199, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.55, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3533, 0.9139, 0.9167, 0.9122, 0.33, 0.8756, 0.02, 0.02, 0.8467, 0.8944, 0.86, 0.3378, 0.3063, 0.8811, 0.8789, 0.8444, 0.8722, 0.32, 0.2956, 0.0663, 0.2811, 0.8839, 0.8722, 0.2878, 0.4134 ] }, { "policy": "random", "task_type": "task3", "seed": 60, "steps": 39, "score": 0.7149, "total_reward": 17.1579, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.717, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.9211, 0.02, 0.3044, 0.92, 0.9056, 0.3233, 0.3211, 0.8967, 0.9194, 0.89, 0.3056, 0.8833, 0.02, 0.8767, 0.8994, 0.87, 0.8306, 0.8789, 0.8194, 0.8678, 0.7902 ] }, { "policy": "random", "task_type": "task3", "seed": 61, "steps": 34, "score": 0.6745, "total_reward": 17.5378, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.937, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8911, 0.9439, 0.9167, 0.3644, 0.3322, 0.9078, 0.8533, 0.9011, 0.9239, 0.8944, 0.8922, 0.8828, 0.3356, 0.3333, 0.8967, 0.0774, 0.8722, 0.8678, 0.8906, 0.0663, 0.02, 0.2789, 0.8567, 0.5562 ] }, { "policy": "random", "task_type": "task3", "seed": 62, "steps": 38, "score": 0.5955, "total_reward": 16.078, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.583, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8733, 0.8689, 0.9144, 0.3622, 0.02, 0.8556, 0.9033, 0.9011, 0.02, 0.3544, 0.8922, 0.89, 0.8878, 0.8333, 0.3089, 0.0797, 0.3022, 0.3378, 0.8633, 0.8789, 0.3297, 0.8794, 0.3022, 0.8478, 0.4237 ] }, { "policy": "random", "task_type": "task3", "seed": 63, "steps": 35, "score": 0.6184, "total_reward": 16.0796, "completion_rate": 0.75, "detection_rate": 0.5, "trust_calibration": 0.677, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3511, 0.9167, 0.8944, 0.3, 0.9078, 0.9306, 0.9033, 0.02, 0.8667, 0.3122, 0.89, 0.8878, 0.9106, 0.8811, 0.8789, 0.8744, 0.3222, 0.3308, 0.2933, 0.2811, 0.3119, 0.8961, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 64, "steps": 43, "score": 0.6578, "total_reward": 19.7347, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.572, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9233, 0.9211, 0.3367, 0.8822, 0.33, 0.9056, 0.3463, 0.8489, 0.3167, 0.02, 0.3422, 0.31, 0.3086, 0.8311, 0.3367, 0.8744, 0.8722, 0.9028, 0.8656, 0.3263, 0.8611, 0.8589, 0.8744, 0.87, 0.8106, 0.8061, 0.2889, 0.7624 ] }, { "policy": "random", "task_type": "task3", "seed": 65, "steps": 34, "score": 0.6769, "total_reward": 16.2452, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.879, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.8933, 0.9461, 0.3689, 0.8844, 0.86, 0.8556, 0.9283, 0.3511, 0.8989, 0.3244, 0.8978, 0.8856, 0.8833, 0.3011, 0.8767, 0.8544, 0.8878, 0.8656, 0.3011, 0.2789, 0.8817, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 66, "steps": 34, "score": 0.5832, "total_reward": 15.7473, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.662, "adversarial_detections": 0, "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.9211, 0.9167, 0.3644, 0.91, 0.8756, 0.9033, 0.3511, 0.8989, 0.02, 0.8944, 0.86, 0.02, 0.3063, 0.8789, 0.8767, 0.0774, 0.02, 0.8878, 0.8906, 0.0663, 0.0641, 0.8589, 0.3067, 0.4526 ] }, { "policy": "random", "task_type": "task3", "seed": 67, "steps": 34, "score": 0.5986, "total_reward": 14.3671, "completion_rate": 0.7, "detection_rate": 0.0, "trust_calibration": 0.891, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.02, 0.9233, 0.3489, 0.9144, 0.3622, 0.36, 0.9328, 0.3003, 0.9011, 0.8667, 0.8922, 0.8378, 0.02, 0.8833, 0.8489, 0.9194, 0.89, 0.8678, 0.8811, 0.0619, 0.3397, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 68, "steps": 33, "score": 0.7306, "total_reward": 18.2654, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.684, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.3481, 0.9167, 0.3644, 0.9122, 0.91, 0.9078, 0.9033, 0.3511, 0.8989, 0.3297, 0.8922, 0.8878, 0.02, 0.02, 0.8811, 0.9039, 0.8767, 0.8722, 0.9095, 0.8656, 0.8611, 0.8126 ] }, { "policy": "random", "task_type": "task3", "seed": 69, "steps": 40, "score": 0.7144, "total_reward": 18.574, "completion_rate": 0.95, "detection_rate": 0.0, "trust_calibration": 0.723, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9506, 0.9233, 0.9189, 0.9167, 0.3422, 0.8756, 0.9283, 0.9011, 0.8989, 0.02, 0.3122, 0.8556, 0.8833, 0.8467, 0.8222, 0.8678, 0.8656, 0.3133, 0.8239, 0.0597, 0.8544, 0.87, 0.8106, 0.5244 ] }, { "policy": "random", "task_type": "task3", "seed": 70, "steps": 34, "score": 0.6867, "total_reward": 17.1674, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.786, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.02, 0.8889, 0.9144, 0.33, 0.9078, 0.9056, 0.9033, 0.8489, 0.3497, 0.8944, 0.8922, 0.8828, 0.8856, 0.8833, 0.8789, 0.3774, 0.87, 0.3706, 0.0663, 0.8239, 0.8567, 0.5184 ] }, { "policy": "random", "task_type": "task3", "seed": 71, "steps": 29, "score": 0.5734, "total_reward": 14.3348, "completion_rate": 0.65, "detection_rate": 0.0, "trust_calibration": 0.702, "adversarial_detections": 0, "adversarial_poisonings": 3, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.8911, 0.3459, 0.9167, 0.3644, 0.88, 0.9328, 0.8711, 0.8989, 0.8967, 0.3574, 0.3422, 0.89, 0.8856, 0.02, 0.8811, 0.0819, 0.3267, 0.3244, 0.8722, 0.353, 0.0708, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 72, "steps": 30, "score": 0.7073, "total_reward": 16.2683, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.584, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9483, 0.3489, 0.8844, 0.91, 0.9078, 0.9306, 0.9283, 0.9011, 0.3259, 0.8644, 0.84, 0.8878, 0.9106, 0.3033, 0.8811, 0.9039, 0.8394, 0.87, 0.02, 0.8906, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 73, "steps": 34, "score": 0.7782, "total_reward": 18.6761, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.803, "adversarial_detections": 2, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.9189, 0.9167, 0.3644, 0.91, 0.9078, 0.9056, 0.3533, 0.02, 0.8467, 0.8944, 0.28, 0.8878, 0.8333, 0.8811, 0.8267, 0.8372, 0.8328, 0.8656, 0.8883, 0.9006, 0.8577 ] }, { "policy": "random", "task_type": "task3", "seed": 74, "steps": 33, "score": 0.7164, "total_reward": 17.1935, "completion_rate": 0.9, "detection_rate": 0.3333, "trust_calibration": 0.957, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9506, 0.9233, 0.9211, 0.8844, 0.86, 0.9056, 0.3233, 0.3189, 0.9217, 0.8944, 0.3352, 0.89, 0.8856, 0.8311, 0.0819, 0.9161, 0.8722, 0.8678, 0.8906, 0.8633, 0.8589, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 75, "steps": 40, "score": 0.6588, "total_reward": 18.4472, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.642, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.8911, 0.8667, 0.3344, 0.86, 0.3278, 0.3256, 0.9033, 0.3511, 0.3259, 0.8444, 0.89, 0.8856, 0.3103, 0.8811, 0.8867, 0.8994, 0.8722, 0.3378, 0.3333, 0.2811, 0.9017, 0.8544, 0.8917, 0.2756, 0.7515 ] }, { "policy": "random", "task_type": "task3", "seed": 76, "steps": 28, "score": 0.7202, "total_reward": 15.8447, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.793, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3526, 0.9233, 0.9189, 0.9167, 0.9144, 0.9122, 0.935, 0.9078, 0.8983, 0.8689, 0.8967, 0.8944, 0.8922, 0.8878, 0.9033, 0.3189, 0.8394, 0.3222, 0.333, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 77, "steps": 34, "score": 0.6731, "total_reward": 16.8286, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.682, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9233, 0.9211, 0.02, 0.9167, 0.9394, 0.91, 0.9078, 0.9056, 0.9033, 0.8989, 0.8894, 0.3422, 0.89, 0.2756, 0.8311, 0.3467, 0.2944, 0.9117, 0.8678, 0.3363, 0.02, 0.8567, 0.775 ] }, { "policy": "random", "task_type": "task3", "seed": 78, "steps": 44, "score": 0.627, "total_reward": 18.1844, "completion_rate": 0.8, "detection_rate": 0.5, "trust_calibration": 0.849, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.8933, 0.3159, 0.8822, 0.8578, 0.9033, 0.9011, 0.8917, 0.8944, 0.8578, 0.3356, 0.8833, 0.2989, 0.3267, 0.3374, 0.2922, 0.32, 0.8156, 0.8711, 0.8589, 0.3022, 0.3, 0.0508, 0.8883, 0.8661, 0.3119, 0.2567, 0.6445 ] }, { "policy": "random", "task_type": "task3", "seed": 79, "steps": 37, "score": 0.6279, "total_reward": 16.9527, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.561, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9233, 0.8867, 0.9144, 0.9122, 0.02, 0.8756, 0.9011, 0.8989, 0.3467, 0.8622, 0.89, 0.8556, 0.8511, 0.9039, 0.3267, 0.0774, 0.02, 0.343, 0.8656, 0.3011, 0.8589, 0.3067, 0.8172, 0.333, 0.4381 ] }, { "policy": "random", "task_type": "task3", "seed": 80, "steps": 40, "score": 0.6766, "total_reward": 18.9455, "completion_rate": 0.9, "detection_rate": 0.0, "trust_calibration": 0.946, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9483, 0.3489, 0.8644, 0.3622, 0.91, 0.9328, 0.9056, 0.8511, 0.3189, 0.3467, 0.3144, 0.32, 0.8933, 0.8811, 0.8267, 0.8994, 0.8722, 0.8678, 0.0686, 0.9061, 0.0619, 0.8194, 0.323, 0.8456, 0.5627 ] }, { "policy": "random", "task_type": "task3", "seed": 81, "steps": 33, "score": 0.7394, "total_reward": 17.745, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.704, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9233, 0.9211, 0.9167, 0.8822, 0.91, 0.9078, 0.3256, 0.3533, 0.9261, 0.8467, 0.02, 0.8922, 0.3078, 0.8856, 0.8833, 0.0841, 0.8967, 0.8744, 0.87, 0.9073, 0.8261, 0.6676 ] }, { "policy": "random", "task_type": "task3", "seed": 82, "steps": 39, "score": 0.6307, "total_reward": 17.661, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.581, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9233, 0.8889, 0.3667, 0.9122, 0.2978, 0.3586, 0.9033, 0.3289, 0.9294, 0.89, 0.8556, 0.3663, 0.3311, 0.3289, 0.8767, 0.8994, 0.3222, 0.32, 0.3528, 0.3683, 0.8411, 0.8567, 0.02, 0.815, 0.8478, 0.7211 ] }, { "policy": "random", "task_type": "task3", "seed": 83, "steps": 33, "score": 0.6648, "total_reward": 15.9564, "completion_rate": 0.8, "detection_rate": 0.25, "trust_calibration": 0.917, "adversarial_detections": 1, "adversarial_poisonings": 3, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.3663, 0.9211, 0.9439, 0.9144, 0.3322, 0.9078, 0.9306, 0.8961, 0.8789, 0.8967, 0.8944, 0.86, 0.8356, 0.8811, 0.9184, 0.3474, 0.3552, 0.073, 0.0708, 0.8633, 0.9039, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 84, "steps": 39, "score": 0.7325, "total_reward": 19.0457, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.851, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.9233, 0.02, 0.3067, 0.9122, 0.8578, 0.3256, 0.8511, 0.8989, 0.8444, 0.8922, 0.8578, 0.8833, 0.8811, 0.8767, 0.8744, 0.3252, 0.8678, 0.8656, 0.3363, 0.8589, 0.8961, 0.85, 0.2978, 0.8586 ] }, { "policy": "random", "task_type": "task3", "seed": 85, "steps": 32, "score": 0.661, "total_reward": 15.2038, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.807, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9483, 0.9211, 0.9189, 0.9144, 0.9122, 0.33, 0.3356, 0.3533, 0.3211, 0.8967, 0.86, 0.8533, 0.8811, 0.3289, 0.8767, 0.8744, 0.8722, 0.8856, 0.0663, 0.8861, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 86, "steps": 44, "score": 0.6056, "total_reward": 19.3805, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.837, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.8933, 0.3489, 0.8844, 0.9372, 0.9078, 0.3556, 0.8689, 0.8967, 0.3222, 0.89, 0.3078, 0.8856, 0.9083, 0.3311, 0.2967, 0.3274, 0.88, 0.8678, 0.2926, 0.3133, 0.8861, 0.0619, 0.8544, 0.3022, 0.8678, 0.8456, 0.8061, 0.3119, 0.02, 0.5089 ] }, { "policy": "random", "task_type": "task3", "seed": 87, "steps": 37, "score": 0.6574, "total_reward": 16.4362, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.807, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8933, 0.9211, 0.9439, 0.8644, 0.86, 0.9078, 0.8733, 0.9011, 0.8667, 0.8922, 0.8556, 0.8833, 0.3311, 0.8789, 0.8767, 0.3422, 0.8878, 0.2856, 0.8261, 0.0619, 0.8994, 0.02, 0.303, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 88, "steps": 36, "score": 0.5911, "total_reward": 15.369, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.855, "adversarial_detections": 0, "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.02, 0.8733, 0.9461, 0.8667, 0.9144, 0.905, 0.9078, 0.9056, 0.3233, 0.8989, 0.8967, 0.8922, 0.3178, 0.02, 0.02, 0.0841, 0.8967, 0.0774, 0.0752, 0.8678, 0.8633, 0.8789, 0.8194, 0.3022, 0.4961 ] }, { "policy": "random", "task_type": "task3", "seed": 89, "steps": 34, "score": 0.7869, "total_reward": 20.4589, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.809, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8956, 0.9233, 0.3481, 0.9439, 0.3367, 0.9144, 0.9122, 0.337, 0.9056, 0.9033, 0.9011, 0.9239, 0.8644, 0.8922, 0.89, 0.8878, 0.925, 0.3441, 0.8967, 0.3474, 0.87, 0.8928, 0.8283, 0.8789, 0.8766 ] }, { "policy": "random", "task_type": "task3", "seed": 90, "steps": 31, "score": 0.6723, "total_reward": 14.7899, "completion_rate": 0.8, "detection_rate": 0.3333, "trust_calibration": 0.902, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9206, 0.8889, 0.3367, 0.8822, 0.8578, 0.9056, 0.9033, 0.9089, 0.02, 0.8922, 0.89, 0.8878, 0.8856, 0.8833, 0.9061, 0.8767, 0.0774, 0.02, 0.9095, 0.8633, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 91, "steps": 44, "score": 0.6543, "total_reward": 18.9747, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.289, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3111, 0.9189, 0.3667, 0.88, 0.3278, 0.9056, 0.8711, 0.8667, 0.3222, 0.34, 0.9128, 0.3133, 0.3311, 0.3367, 0.84, 0.8156, 0.8633, 0.8239, 0.2767, 0.8544, 0.85, 0.8728, 0.8883, 0.8039, 0.2744, 0.672 ] }, { "policy": "random", "task_type": "task3", "seed": 92, "steps": 39, "score": 0.6758, "total_reward": 16.8941, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.804, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9256, 0.9233, 0.9211, 0.3689, 0.3367, 0.9144, 0.3322, 0.8578, 0.9033, 0.8667, 0.3122, 0.2856, 0.8489, 0.2967, 0.8994, 0.8378, 0.8633, 0.8239, 0.8544, 0.8522, 0.85, 0.8106, 0.01 ] }, { "policy": "random", "task_type": "task3", "seed": 93, "steps": 38, "score": 0.7063, "total_reward": 17.6574, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.542, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.8756, 0.8711, 0.3389, 0.9167, 0.9144, 0.9122, 0.9178, 0.9033, 0.8689, 0.8644, 0.3422, 0.8878, 0.8833, 0.8289, 0.8767, 0.8372, 0.02, 0.8306, 0.8789, 0.8567, 0.3274, 0.3352, 0.27, 0.7487 ] }, { "policy": "random", "task_type": "task3", "seed": 94, "steps": 35, "score": 0.6388, "total_reward": 15.969, "completion_rate": 0.75, "detection_rate": 0.5, "trust_calibration": 0.941, "adversarial_detections": 1, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9233, 0.9139, 0.02, 0.9144, 0.02, 0.935, 0.9078, 0.3256, 0.8711, 0.3489, 0.3237, 0.8422, 0.8878, 0.8333, 0.8789, 0.3044, 0.3222, 0.073, 0.2878, 0.8633, 0.9006, 0.8567, 0.6701 ] }, { "policy": "random", "task_type": "task3", "seed": 95, "steps": 35, "score": 0.6469, "total_reward": 17.465, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8933, 0.8689, 0.3667, 0.3644, 0.307, 0.3578, 0.9056, 0.9283, 0.9011, 0.8967, 0.8944, 0.8922, 0.89, 0.8856, 0.9161, 0.2989, 0.3267, 0.3474, 0.0752, 0.073, 0.8306, 0.8261, 0.3089, 0.8194, 0.5106 ] }, { "policy": "random", "task_type": "task3", "seed": 96, "steps": 36, "score": 0.7393, "total_reward": 17.7436, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.619, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.8733, 0.3167, 0.92, 0.9056, 0.9033, 0.9011, 0.3189, 0.8622, 0.02, 0.8878, 0.8933, 0.8811, 0.9039, 0.8767, 0.8744, 0.8722, 0.87, 0.8306, 0.02, 0.3289, 0.8961, 0.7897 ] }, { "policy": "random", "task_type": "task3", "seed": 97, "steps": 36, "score": 0.7834, "total_reward": 18.8027, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.813, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.8711, 0.9439, 0.8844, 0.86, 0.9078, 0.9306, 0.8711, 0.8989, 0.8967, 0.3214, 0.84, 0.8856, 0.8811, 0.8767, 0.3244, 0.835, 0.2878, 0.3156, 0.8611, 0.8589, 0.8544, 0.8557 ] }, { "policy": "random", "task_type": "task3", "seed": 98, "steps": 37, "score": 0.7572, "total_reward": 18.9302, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.605, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.02, 0.8889, 0.8967, 0.8622, 0.9078, 0.3256, 0.8711, 0.8989, 0.8944, 0.3422, 0.89, 0.3308, 0.8333, 0.3341, 0.8767, 0.9172, 0.8678, 0.8656, 0.8811, 0.8839, 0.8194, 0.8014 ] }, { "policy": "random", "task_type": "task3", "seed": 99, "steps": 37, "score": 0.6531, "total_reward": 18.9394, "completion_rate": 0.9, "detection_rate": 0.0, "trust_calibration": 0.623, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9483, 0.9211, 0.9189, 0.9167, 0.9144, 0.333, 0.9078, 0.3011, 0.9239, 0.3144, 0.89, 0.8556, 0.3033, 0.8811, 0.8789, 0.9017, 0.0774, 0.835, 0.0708, 0.8656, 0.02, 0.9039, 0.3297, 0.8172, 0.3, 0.4885 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 0, "steps": 43, "score": 0.7114, "total_reward": 18.4969, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.729, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.3344, 0.91, 0.9056, 0.9011, 0.3267, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.3, 0.2956, 0.8611, 0.2867, 0.8522, 0.8478, 0.8433, 0.8389, 0.7841 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 1, "steps": 29, "score": 0.7083, "total_reward": 17.707, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.721, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6632 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 2, "steps": 29, "score": 0.6919, "total_reward": 17.2983, "completion_rate": 0.85, "detection_rate": 0.4444, "trust_calibration": 0.561, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3778, 0.3456, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6065 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 3, "steps": 42, "score": 0.8546, "total_reward": 18.8008, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8411, 0.8675 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 4, "steps": 29, "score": 0.7165, "total_reward": 17.9128, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.721, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.3414, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.664 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 5, "steps": 46, "score": 0.7558, "total_reward": 18.1385, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.832, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.3333, 0.8989, 0.8944, 0.32, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.2711, 0.8367, 0.8322, 0.8229 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 6, "steps": 27, "score": 0.6991, "total_reward": 16.778, "completion_rate": 0.85, "detection_rate": 0.4, "trust_calibration": 0.725, "adversarial_detections": 4, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.1019, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.6387 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 7, "steps": 42, "score": 0.7756, "total_reward": 19.3902, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.3919, 0.3997, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8544, 0.28, 0.8456, 0.8411, 0.8478 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 8, "steps": 44, "score": 0.809, "total_reward": 19.4157, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.853, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3526, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.8411, 0.8367, 0.8654 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 9, "steps": 40, "score": 0.782, "total_reward": 19.5499, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.837, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3841, 0.9189, 0.3997, 0.3994, 0.3972, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.8528 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 10, "steps": 31, "score": 0.712, "total_reward": 17.8008, "completion_rate": 0.85, "detection_rate": 0.625, "trust_calibration": 0.448, "adversarial_detections": 5, "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.3303, 0.3281, 0.8989, 0.0997, 0.0974, 0.0952, 0.9295, 0.925, 0.9206, 0.9161, 0.9117, 0.3356, 0.6281 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 11, "steps": 40, "score": 0.7732, "total_reward": 18.5566, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.2978, 0.2933, 0.8589, 0.8544, 0.85, 0.8456, 0.8349 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 12, "steps": 42, "score": 0.8546, "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.8411, 0.8676 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 13, "steps": 39, "score": 0.833, "total_reward": 18.3252, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.811, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3456, 0.3433, 0.9189, 0.9144, 0.91, 0.9056, 0.9011, 0.8967, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.87, 0.8656, 0.8611, 0.8567, 0.8522, 0.8478, 0.8485 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 14, "steps": 29, "score": 0.6889, "total_reward": 17.9127, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.609, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.3841, 0.3689, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.3237, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.6353 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 15, "steps": 30, "score": 0.6847, "total_reward": 18.4869, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.635, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3608, 0.9256, 0.9233, 0.9211, 0.3459, 0.9167, 0.9144, 0.9122, 0.91, 0.3348, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.0841, 0.9184, 0.9139, 0.9095, 0.6404 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 16, "steps": 42, "score": 0.8546, "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.8411, 0.8676 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 17, "steps": 46, "score": 0.8048, "total_reward": 19.316, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.28, 0.8456, 0.8411, 0.8367, 0.8322, 0.8605 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 18, "steps": 26, "score": 0.6967, "total_reward": 16.7213, "completion_rate": 0.85, "detection_rate": 0.3333, "trust_calibration": 0.701, "adversarial_detections": 3, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.6149 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 19, "steps": 20, "score": 0.6193, "total_reward": 13.0053, "completion_rate": 0.65, "detection_rate": 0.0, "trust_calibration": 0.576, "adversarial_detections": 0, "adversarial_poisonings": 5, "status": "failed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.9233, 0.9211, 0.9189, 0.3437, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.01 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 20, "steps": 46, "score": 0.7498, "total_reward": 19.4938, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.3841, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.8589, 0.2844, 0.85, 0.8456, 0.8411, 0.8367, 0.8322, 0.8412 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 21, "steps": 42, "score": 0.8546, "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.8411, 0.8676 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 22, "steps": 36, "score": 0.7334, "total_reward": 21.2675, "completion_rate": 1.0, "detection_rate": 0.8, "trust_calibration": 0.747, "adversarial_detections": 4, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3778, 0.9256, 0.9233, 0.3481, 0.9189, 0.9167, 0.9144, 0.9122, 0.337, 0.9078, 0.3326, 0.9033, 0.9011, 0.3259, 0.8967, 0.3214, 0.8922, 0.89, 0.3148, 0.8856, 0.0863, 0.9206, 0.9161, 0.9117, 0.9073, 0.8789, 0.8544, 0.7968 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 23, "steps": 28, "score": 0.6922, "total_reward": 17.3057, "completion_rate": 0.85, "detection_rate": 0.4444, "trust_calibration": 0.645, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.3433, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6298 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 24, "steps": 46, "score": 0.7725, "total_reward": 20.0838, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.836, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.9483, 0.3711, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8456, 0.2711, 0.8367, 0.8322, 0.8591 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 25, "steps": 34, "score": 0.6755, "total_reward": 18.9148, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.71, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.3322, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.3189, 0.3167, 0.8944, 0.3122, 0.31, 0.3078, 0.8856, 0.9011, 0.8967, 0.8922, 0.3078, 0.3033, 0.8789, 0.782 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 26, "steps": 34, "score": 0.6561, "total_reward": 19.0282, "completion_rate": 0.8, "detection_rate": 0.6667, "trust_calibration": 0.467, "adversarial_detections": 2, "adversarial_poisonings": 1, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.8989, 0.3667, 0.3644, 0.9372, 0.935, 0.3578, 0.3556, 0.3533, 0.3289, 0.8967, 0.8944, 0.8922, 0.317, 0.8878, 0.3126, 0.8833, 0.0841, 0.9184, 0.9139, 0.8878, 0.8656, 0.3163, 0.3419, 0.6213 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 27, "steps": 46, "score": 0.7256, "total_reward": 20.3155, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.828, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.9189, 0.3997, 0.3994, 0.3972, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.2844, 0.85, 0.2756, 0.8411, 0.8367, 0.8322, 0.8395 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 28, "steps": 28, "score": 0.7355, "total_reward": 17.6509, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.725, "adversarial_detections": 5, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.1019, 0.0997, 0.0974, 0.0952, 0.093, 0.9273, 0.9228, 0.9184, 0.9139, 0.6841 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 29, "steps": 28, "score": 0.7258, "total_reward": 18.144, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.709, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.3392, 0.91, 0.9078, 0.9056, 0.9033, 0.3281, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6801 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 30, "steps": 39, "score": 0.6238, "total_reward": 18.7149, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.712, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3411, 0.9189, 0.9167, 0.9144, 0.3322, 0.33, 0.9078, 0.3256, 0.3233, 0.9011, 0.8989, 0.8967, 0.3144, 0.8922, 0.31, 0.3078, 0.3233, 0.8989, 0.8944, 0.89, 0.3056, 0.3011, 0.2967, 0.8722, 0.2878, 0.7538 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 31, "steps": 37, "score": 0.6915, "total_reward": 20.0529, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.911, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.3322, 0.91, 0.3278, 0.9056, 0.9033, 0.9011, 0.8989, 0.3697, 0.3774, 0.3772, 0.89, 0.3728, 0.3706, 0.9228, 0.8944, 0.87, 0.8656, 0.2911, 0.8567, 0.8522, 0.843 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 32, "steps": 30, "score": 0.689, "total_reward": 17.9142, "completion_rate": 0.8, "detection_rate": 0.6, "trust_calibration": 0.712, "adversarial_detections": 3, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.9122, 0.91, 0.9078, 0.3326, 0.3303, 0.9011, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.9273, 0.9228, 0.9184, 0.8922, 0.87, 0.3208, 0.6714 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 33, "steps": 31, "score": 0.7464, "total_reward": 18.6604, "completion_rate": 0.9, "detection_rate": 0.6667, "trust_calibration": 0.845, "adversarial_detections": 4, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.3437, 0.3414, 0.9122, 0.91, 0.9078, 0.9056, 0.3303, 0.9011, 0.8989, 0.3237, 0.8944, 0.0952, 0.093, 0.9273, 0.9228, 0.9184, 0.9139, 0.8856, 0.7574 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 34, "steps": 40, "score": 0.8105, "total_reward": 18.6408, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.838, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3433, 0.3411, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.853 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 35, "steps": 28, "score": 0.7257, "total_reward": 18.1436, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.708, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.3437, 0.9144, 0.9122, 0.91, 0.3348, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6797 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 36, "steps": 42, "score": 0.7886, "total_reward": 18.9271, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.854, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.3433, 0.3481, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.2978, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8411, 0.8527 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 37, "steps": 28, "score": 0.6822, "total_reward": 17.056, "completion_rate": 0.85, "detection_rate": 0.4, "trust_calibration": 0.659, "adversarial_detections": 4, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.62 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 38, "steps": 43, "score": 0.8297, "total_reward": 19.0823, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.774, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9233, 0.9189, 0.9144, 0.91, 0.9056, 0.9011, 0.8967, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.3, 0.8656, 0.8611, 0.8567, 0.8522, 0.8478, 0.8433, 0.8389, 0.8479 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 39, "steps": 28, "score": 0.7161, "total_reward": 17.9036, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.71, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.3303, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6637 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 40, "steps": 37, "score": 0.6368, "total_reward": 19.7412, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.678, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.9122, 0.33, 0.9078, 0.9056, 0.3233, 0.9011, 0.3189, 0.3167, 0.8944, 0.3122, 0.89, 0.3078, 0.8856, 0.3033, 0.8811, 0.8967, 0.3122, 0.3078, 0.8833, 0.2989, 0.2944, 0.87, 0.7673 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 41, "steps": 38, "score": 0.6416, "total_reward": 20.5321, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.732, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3411, 0.9189, 0.3367, 0.9144, 0.9122, 0.33, 0.9078, 0.3256, 0.3233, 0.3211, 0.8989, 0.8967, 0.8944, 0.3122, 0.89, 0.3078, 0.3056, 0.8833, 0.3189, 0.8944, 0.31, 0.3056, 0.8811, 0.8589, 0.3097, 0.8722, 0.323, 0.7961 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 42, "steps": 31, "score": 0.6759, "total_reward": 17.5723, "completion_rate": 0.85, "detection_rate": 0.4444, "trust_calibration": 0.442, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.3414, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.3056, 0.5724 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 43, "steps": 32, "score": 0.6896, "total_reward": 17.9304, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.681, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3456, 0.9233, 0.9211, 0.3719, 0.3797, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.3356, 0.6467 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 44, "steps": 38, "score": 0.6652, "total_reward": 19.2895, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.721, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.8967, 0.9394, 0.9372, 0.36, 0.3578, 0.3556, 0.9283, 0.3511, 0.3489, 0.3167, 0.8944, 0.8922, 0.343, 0.3508, 0.9011, 0.8767, 0.8722, 0.2978, 0.8633, 0.8589, 0.2844, 0.85, 0.7757 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 45, "steps": 40, "score": 0.8827, "total_reward": 18.5376, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.839, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8709 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 46, "steps": 40, "score": 0.6245, "total_reward": 18.7359, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.429, "adversarial_detections": 3, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9506, 0.9483, 0.9461, 0.3689, 0.3667, 0.3644, 0.9122, 0.91, 0.9078, 0.9056, 0.3763, 0.3841, 0.3839, 0.3817, 0.8944, 0.3192, 0.3178, 0.8856, 0.9228, 0.9184, 0.3422, 0.3728, 0.3683, 0.9006, 0.8994, 0.85, 0.3156, 0.6634 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 47, "steps": 29, "score": 0.7165, "total_reward": 17.9132, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.722, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.3303, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6644 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 48, "steps": 28, "score": 0.7257, "total_reward": 18.1437, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.708, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3459, 0.9167, 0.9144, 0.9122, 0.337, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6798 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 49, "steps": 28, "score": 0.7355, "total_reward": 17.6512, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.726, "adversarial_detections": 5, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.1019, 0.0997, 0.0974, 0.0952, 0.093, 0.9273, 0.9228, 0.9184, 0.9139, 0.6844 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 50, "steps": 42, "score": 0.717, "total_reward": 18.643, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.82, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3919, 0.3997, 0.3994, 0.3972, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.8633, 0.8589, 0.2844, 0.28, 0.8456, 0.8411, 0.8092 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 51, "steps": 34, "score": 0.7274, "total_reward": 19.6391, "completion_rate": 0.95, "detection_rate": 0.625, "trust_calibration": 0.682, "adversarial_detections": 5, "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.3414, 0.9122, 0.337, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.3237, 0.8944, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.9073, 0.3311, 0.715 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 52, "steps": 46, "score": 0.5965, "total_reward": 19.0884, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.686, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.9256, 0.3433, 0.9211, 0.3619, 0.9144, 0.34, 0.3578, 0.3333, 0.9011, 0.8989, 0.8967, 0.3774, 0.8922, 0.375, 0.8878, 0.3706, 0.3683, 0.3661, 0.3639, 0.2967, 0.3594, 0.3572, 0.355, 0.8656, 0.3089, 0.8722, 0.8656, 0.8589, 0.8522, 0.7165 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 53, "steps": 44, "score": 0.8286, "total_reward": 19.0588, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8411, 0.8367, 0.8632 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 54, "steps": 35, "score": 0.6607, "total_reward": 18.4987, "completion_rate": 0.8, "detection_rate": 0.75, "trust_calibration": 0.358, "adversarial_detections": 6, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9233, 0.9211, 0.9189, 0.3797, 0.3874, 0.3952, 0.36, 0.9078, 0.3326, 0.3233, 0.9011, 0.8989, 0.3237, 0.8944, 0.8922, 0.89, 0.8878, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.9073, 0.3661, 0.8984, 0.6167 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 55, "steps": 29, "score": 0.728, "total_reward": 18.1995, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.713, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.3756, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.3348, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6787 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 56, "steps": 37, "score": 0.6768, "total_reward": 20.9793, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.796, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.9506, 0.9483, 0.9461, 0.3689, 0.3667, 0.3644, 0.9122, 0.91, 0.9078, 0.3256, 0.3233, 0.9011, 0.3189, 0.8967, 0.3144, 0.8922, 0.89, 0.8878, 0.8856, 0.3033, 0.8989, 0.2967, 0.8922, 0.8878, 0.2856, 0.8811, 0.2967, 0.8722, 0.8317 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 57, "steps": 38, "score": 0.6683, "total_reward": 20.0487, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.771, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3389, 0.9167, 0.3344, 0.3322, 0.33, 0.9078, 0.3256, 0.9033, 0.3211, 0.8989, 0.3167, 0.3144, 0.8922, 0.89, 0.8878, 0.3486, 0.3563, 0.3819, 0.9161, 0.34, 0.8833, 0.8589, 0.8544, 0.85, 0.8058 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 58, "steps": 29, "score": 0.7278, "total_reward": 18.1944, "completion_rate": 0.9, "detection_rate": 0.5556, "trust_calibration": 0.731, "adversarial_detections": 5, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3481, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.3348, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6999 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 59, "steps": 43, "score": 0.7547, "total_reward": 19.6215, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.735, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3526, 0.9233, 0.9211, 0.3719, 0.3797, 0.3644, 0.91, 0.9056, 0.9011, 0.8967, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.3, 0.8656, 0.2911, 0.8567, 0.8522, 0.8478, 0.8433, 0.8389, 0.8208 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 60, "steps": 36, "score": 0.7716, "total_reward": 18.5184, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.836, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3411, 0.9189, 0.9167, 0.3344, 0.3322, 0.33, 0.3278, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.8439 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 61, "steps": 42, "score": 0.7616, "total_reward": 19.8021, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.836, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.9189, 0.3997, 0.3994, 0.3972, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.28, 0.8456, 0.2711, 0.8483 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 62, "steps": 43, "score": 0.7892, "total_reward": 18.9402, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.925, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3548, 0.9506, 0.3733, 0.9189, 0.9144, 0.91, 0.9056, 0.9011, 0.8967, 0.3222, 0.3178, 0.8833, 0.8789, 0.8744, 0.87, 0.8656, 0.8611, 0.8567, 0.8522, 0.8478, 0.8433, 0.8389, 0.8682 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 63, "steps": 44, "score": 0.8031, "total_reward": 18.4705, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.841, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.2711, 0.8367, 0.8449 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 64, "steps": 37, "score": 0.7634, "total_reward": 19.0848, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.76, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3841, 0.9189, 0.3997, 0.3994, 0.3972, 0.33, 0.9328, 0.3556, 0.9011, 0.8967, 0.8922, 0.8878, 0.8833, 0.8789, 0.8744, 0.87, 0.8656, 0.8611, 0.8567, 0.8522, 0.8227 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 65, "steps": 40, "score": 0.797, "total_reward": 19.1287, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.838, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.3919, 0.3997, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.853 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 66, "steps": 29, "score": 0.7165, "total_reward": 17.9135, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.723, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.337, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6647 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 67, "steps": 32, "score": 0.6707, "total_reward": 18.1095, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.44, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3503, 0.9211, 0.3459, 0.9167, 0.9144, 0.3392, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.9095, 0.3333, 0.5864 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 68, "steps": 42, "score": 0.7575, "total_reward": 19.6962, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.641, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.9233, 0.3741, 0.9189, 0.3897, 0.3974, 0.3622, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8411, 0.7994 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 69, "steps": 30, "score": 0.6917, "total_reward": 17.9836, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.658, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9506, 0.9483, 0.9461, 0.3689, 0.3667, 0.3644, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.9095, 0.6454 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 70, "steps": 27, "score": 0.7, "total_reward": 19.5996, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.716, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3919, 0.9167, 0.3994, 0.3972, 0.395, 0.3928, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.3144, 0.8922, 0.89, 0.8878, 0.8856, 0.3033, 0.8811, 0.8789, 0.2967, 0.8744, 0.2922, 0.8166 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 71, "steps": 29, "score": 0.6808, "total_reward": 17.7021, "completion_rate": 0.85, "detection_rate": 0.375, "trust_calibration": 0.687, "adversarial_detections": 3, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.3841, 0.3919, 0.3997, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.6174 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 72, "steps": 34, "score": 0.6937, "total_reward": 19.4234, "completion_rate": 0.9, "detection_rate": 0.5714, "trust_calibration": 0.723, "adversarial_detections": 4, "adversarial_poisonings": 3, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3763, 0.9211, 0.3919, 0.3997, 0.3644, 0.9122, 0.91, 0.3348, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.9095, 0.8833, 0.2811, 0.6916 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 73, "steps": 26, "score": 0.6967, "total_reward": 16.7204, "completion_rate": 0.85, "detection_rate": 0.3333, "trust_calibration": 0.697, "adversarial_detections": 3, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.614 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 74, "steps": 28, "score": 0.675, "total_reward": 18.8991, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.806, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.3414, 0.9122, 0.91, 0.9078, 0.3256, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.8922, 0.31, 0.8878, 0.3056, 0.8833, 0.8811, 0.2989, 0.2967, 0.8744, 0.2922, 0.8193 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 75, "steps": 40, "score": 0.7977, "total_reward": 19.1445, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.837, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.8529 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 76, "steps": 42, "score": 0.8335, "total_reward": 19.1712, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.2844, 0.85, 0.8456, 0.8411, 0.8671 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 77, "steps": 29, "score": 0.6992, "total_reward": 18.1794, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.645, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.3259, 0.8967, 0.8944, 0.0952, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.6451 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 78, "steps": 28, "score": 0.7161, "total_reward": 17.9032, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.709, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3503, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6633 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 79, "steps": 44, "score": 0.7563, "total_reward": 19.6626, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.829, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.2844, 0.85, 0.8456, 0.8411, 0.8367, 0.8421 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 80, "steps": 46, "score": 0.7065, "total_reward": 16.9564, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.803, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.3111, 0.3067, 0.8722, 0.8678, 0.2933, 0.8589, 0.8544, 0.85, 0.2756, 0.8411, 0.8367, 0.8322, 0.7808 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 81, "steps": 44, "score": 0.8031, "total_reward": 18.4703, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.2711, 0.8367, 0.8447 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 82, "steps": 44, "score": 0.8286, "total_reward": 19.0588, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8456, 0.8411, 0.8367, 0.8632 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 83, "steps": 33, "score": 0.7082, "total_reward": 18.4133, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.826, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3456, 0.9483, 0.9461, 0.8989, 0.3667, 0.3644, 0.9372, 0.935, 0.8878, 0.9306, 0.3533, 0.3511, 0.3489, 0.3467, 0.9194, 0.3422, 0.34, 0.9033, 0.8789, 0.8744, 0.87, 0.8656, 0.8611, 0.8133 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 84, "steps": 29, "score": 0.71, "total_reward": 18.4609, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.716, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.3686, 0.9233, 0.9211, 0.3459, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.3214, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.6795 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 85, "steps": 29, "score": 0.6923, "total_reward": 17.999, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.718, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9144, 0.3392, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.6627 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 86, "steps": 33, "score": 0.6856, "total_reward": 18.5122, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.605, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.3663, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.9095, 0.3333, 0.6263 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 87, "steps": 27, "score": 0.6991, "total_reward": 16.7786, "completion_rate": 0.85, "detection_rate": 0.4, "trust_calibration": 0.727, "adversarial_detections": 4, "adversarial_poisonings": 6, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.1019, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.6393 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 88, "steps": 42, "score": 0.7762, "total_reward": 19.4062, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.28, 0.8456, 0.8411, 0.8479 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 89, "steps": 29, "score": 0.7259, "total_reward": 18.1473, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.696, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.3503, 0.9211, 0.9189, 0.9167, 0.9144, 0.3392, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6745 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 90, "steps": 28, "score": 0.7161, "total_reward": 17.9035, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.71, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.3392, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.0886, 0.9228, 0.9184, 0.9139, 0.6635 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 91, "steps": 29, "score": 0.7262, "total_reward": 18.1541, "completion_rate": 0.9, "detection_rate": 0.5, "trust_calibration": 0.723, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3459, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.3259, 0.8967, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6813 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 92, "steps": 38, "score": 0.7692, "total_reward": 18.4611, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3389, 0.3367, 0.3344, 0.3322, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8411 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 93, "steps": 45, "score": 0.6795, "total_reward": 19.7049, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.749, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3478, 0.9256, 0.9233, 0.9211, 0.9189, 0.3897, 0.9144, 0.3972, 0.395, 0.9078, 0.3906, 0.3883, 0.3861, 0.8967, 0.3222, 0.8878, 0.8833, 0.8789, 0.8744, 0.3, 0.2956, 0.8789, 0.8722, 0.8478, 0.8433, 0.8389, 0.2644, 0.7847 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 94, "steps": 32, "score": 0.6725, "total_reward": 18.1568, "completion_rate": 0.8, "detection_rate": 0.3333, "trust_calibration": 0.85, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.8989, 0.3667, 0.3644, 0.9372, 0.935, 0.3578, 0.8856, 0.9283, 0.3511, 0.3489, 0.3467, 0.3444, 0.8922, 0.89, 0.8878, 0.0886, 0.0863, 0.9206, 0.8922, 0.8678, 0.8633, 0.6213 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 95, "steps": 40, "score": 0.7977, "total_reward": 19.1447, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.838, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.3733, 0.9461, 0.3689, 0.3667, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.2756, 0.853 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 96, "steps": 31, "score": 0.6978, "total_reward": 17.4444, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.726, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9189, 0.9167, 0.3344, 0.3322, 0.91, 0.9078, 0.9056, 0.9033, 0.9011, 0.8989, 0.8967, 0.8944, 0.8922, 0.093, 0.0908, 0.0886, 0.0863, 0.9206, 0.9161, 0.9117, 0.9073, 0.66 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 97, "steps": 38, "score": 0.8207, "total_reward": 18.8758, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.829, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9528, 0.9506, 0.9483, 0.3711, 0.3689, 0.3667, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8552 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 98, "steps": 29, "score": 0.7165, "total_reward": 17.9132, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.722, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.9189, 0.9167, 0.9144, 0.9122, 0.91, 0.9078, 0.9056, 0.3303, 0.9011, 0.8989, 0.0997, 0.0974, 0.0952, 0.093, 0.0908, 0.925, 0.9206, 0.9161, 0.9117, 0.6644 ] }, { "policy": "heuristic", "task_type": "task3", "seed": 99, "steps": 38, "score": 0.7037, "total_reward": 21.1121, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.723, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9278, 0.9256, 0.9233, 0.9211, 0.3389, 0.9167, 0.9144, 0.3322, 0.91, 0.3278, 0.9056, 0.9033, 0.3211, 0.8989, 0.3167, 0.8944, 0.8922, 0.31, 0.8878, 0.3056, 0.3211, 0.8967, 0.3122, 0.8878, 0.3033, 0.8789, 0.8744, 0.87, 0.8287 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 0, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 1, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 2, "steps": 42, "score": 0.8422, "total_reward": 18.5276, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8724 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 3, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 4, "steps": 42, "score": 0.8689, "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 5, "steps": 46, "score": 0.7916, "total_reward": 18.9976, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.917, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.3333, 0.8989, 0.8944, 0.32, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8618 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 6, "steps": 40, "score": 0.8977, "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 7, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 8, "steps": 44, "score": 0.8405, "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 9, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 10, "steps": 42, "score": 0.8421, "total_reward": 18.5263, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.928, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.3022, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.871 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 11, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 12, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 13, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 14, "steps": 44, "score": 0.8405, "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8853 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 15, "steps": 46, "score": 0.8162, "total_reward": 19.5883, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.93, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8825 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 16, "steps": 40, "score": 0.8977, "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 17, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8903 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 18, "steps": 40, "score": 0.8958, "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 19, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 20, "steps": 46, "score": 0.7653, "total_reward": 18.3663, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.909, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.2933, 0.8589, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8423 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 21, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 22, "steps": 46, "score": 0.7652, "total_reward": 18.3659, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.908, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.2978, 0.8633, 0.2889, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8419 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 23, "steps": 42, "score": 0.8403, "total_reward": 18.4862, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8727 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 24, "steps": 44, "score": 0.8405, "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8854 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 25, "steps": 44, "score": 0.8405, "total_reward": 19.3314, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 26, "steps": 42, "score": 0.8403, "total_reward": 18.4855, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.3022, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.872 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 27, "steps": 46, "score": 0.8179, "total_reward": 19.6285, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.924, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.2978, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.881 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 28, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 29, "steps": 44, "score": 0.8405, "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8854 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 30, "steps": 44, "score": 0.8148, "total_reward": 18.741, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.922, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.3111, 0.3067, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8653 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 31, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 32, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8851 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 33, "steps": 46, "score": 0.8178, "total_reward": 19.6279, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.922, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8804 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 34, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 35, "steps": 44, "score": 0.8405, "total_reward": 19.3313, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8855 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 36, "steps": 44, "score": 0.8167, "total_reward": 18.7838, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.927, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.3467, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8664 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 37, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 38, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 39, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 40, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 41, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 42, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 43, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 44, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 45, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 46, "steps": 42, "score": 0.8689, "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 47, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 48, "steps": 44, "score": 0.8405, "total_reward": 19.3312, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8855 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 49, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 50, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.3022, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 51, "steps": 46, "score": 0.8178, "total_reward": 19.6279, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.922, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8804 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 52, "steps": 46, "score": 0.7899, "total_reward": 18.9582, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.926, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.3289, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8641 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 53, "steps": 44, "score": 0.8423, "total_reward": 19.3723, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.93, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8848 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 54, "steps": 46, "score": 0.7916, "total_reward": 18.9976, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.918, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.2978, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8619 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 55, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8851 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 56, "steps": 46, "score": 0.7899, "total_reward": 18.957, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.922, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.8633, 0.2889, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8629 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 57, "steps": 46, "score": 0.8179, "total_reward": 19.6284, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.924, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.2933, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.881 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 58, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 59, "steps": 42, "score": 0.8403, "total_reward": 18.4861, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8726 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 60, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 61, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 62, "steps": 46, "score": 0.7933, "total_reward": 19.0389, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.916, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8615 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 63, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 64, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8851 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 65, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 66, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 67, "steps": 46, "score": 0.8179, "total_reward": 19.6293, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.927, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.3378, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8818 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 68, "steps": 46, "score": 0.8162, "total_reward": 19.5881, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.929, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8823 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 69, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 70, "steps": 44, "score": 0.8423, "total_reward": 19.3728, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8854 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 71, "steps": 44, "score": 0.8405, "total_reward": 19.3313, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8856 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 72, "steps": 44, "score": 0.8423, "total_reward": 19.3724, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8849 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 73, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 74, "steps": 44, "score": 0.8423, "total_reward": 19.3724, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.8856, 0.3111, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8849 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 75, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 76, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 77, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.2978, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 78, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8903 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 79, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 80, "steps": 46, "score": 0.7652, "total_reward": 18.3653, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.905, "adversarial_detections": 5, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.3111, 0.3067, 0.8722, 0.8678, 0.2933, 0.8589, 0.8544, 0.85, 0.8873, 0.8828, 0.8784, 0.8739, 0.8412 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 81, "steps": 42, "score": 0.867, "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8904 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 82, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.8944, 0.32, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 83, "steps": 40, "score": 0.8977, "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 84, "steps": 44, "score": 0.8405, "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 85, "steps": 44, "score": 0.8405, "total_reward": 19.3313, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.3422, 0.9078, 0.9033, 0.8989, 0.3244, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8856 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 86, "steps": 40, "score": 0.8977, "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 87, "steps": 40, "score": 0.8977, "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.9095, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8938 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 88, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 89, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 90, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 91, "steps": 44, "score": 0.8423, "total_reward": 19.3721, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.93, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.2978, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8846 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 92, "steps": 44, "score": 0.8423, "total_reward": 19.3725, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.3333, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.8633, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.885 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 93, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.3556, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 94, "steps": 46, "score": 0.8161, "total_reward": 19.5872, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.926, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.3289, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.2978, 0.8633, 0.8589, 0.8544, 0.8917, 0.8873, 0.8828, 0.8784, 0.8739, 0.8814 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 95, "steps": 44, "score": 0.8405, "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.3467, 0.9122, 0.3378, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.8633, 0.8589, 0.8961, 0.8917, 0.8873, 0.8828, 0.8784, 0.8857 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 96, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.3511, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 97, "steps": 40, "score": 0.8958, "total_reward": 18.8109, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.894 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 98, "steps": 42, "score": 0.8689, "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.8856, 0.8811, 0.3067, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] }, { "policy": "oracle_lite", "task_type": "task3", "seed": 99, "steps": 42, "score": 0.8689, "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", "difficulty_profile": { "adaptive": false, "episodes_seen": 0, "rolling_detection_rate": 0.0, "adversarial_threshold": 0.7, "high_stakes_ratio": 0.35, "verify_budget_penalty": 0, "adversary_benign_confidence": 0.88, "adversary_poison_confidence": 0.92 }, "rewards": [ 0.9256, 0.9211, 0.9167, 0.9122, 0.9078, 0.9033, 0.8989, 0.8944, 0.89, 0.3156, 0.8811, 0.8767, 0.8722, 0.8678, 0.905, 0.9006, 0.8961, 0.8917, 0.8873, 0.8828, 0.8901 ] } ], "chart": "outputs/baseline_comparison.png" }