diff --git "a/outputs/evaluation_results.json" "b/outputs/evaluation_results.json" --- "a/outputs/evaluation_results.json" +++ "b/outputs/evaluation_results.json" @@ -6,10 +6,38 @@ "task3" ], "episodes_per_policy": 20, + "adaptive": false, + "difficulty_controller": { + "adaptive": true, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "difficulty_controller_by_task_policy": { + "task1": { + "random": {}, + "heuristic": {}, + "oracle_lite": {} + }, + "task2": { + "random": {}, + "heuristic": {}, + "oracle_lite": {} + }, + "task3": { + "random": {}, + "heuristic": {}, + "oracle_lite": {} + } + }, "summary": { "random": { "episodes": 60, - "avg_score": 0.7144, + "avg_score": 0.6954, "avg_completion_rate": 0.8222, "avg_detection_rate": 0.8111, "avg_trust_calibration": 0.4227, @@ -17,7 +45,7 @@ }, "heuristic": { "episodes": 60, - "avg_score": 0.8162, + "avg_score": 0.796, "avg_completion_rate": 0.8958, "avg_detection_rate": 0.9115, "avg_trust_calibration": 0.4381, @@ -25,7 +53,7 @@ }, "oracle_lite": { "episodes": 60, - "avg_score": 0.8718, + "avg_score": 0.8553, "avg_completion_rate": 0.8858, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.5905, @@ -36,7 +64,7 @@ "task1": { "random": { "episodes": 20, - "avg_score": 0.7948, + "avg_score": 0.7702, "avg_completion_rate": 0.77, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.0, @@ -44,7 +72,7 @@ }, "heuristic": { "episodes": 20, - "avg_score": 0.8911, + "avg_score": 0.869, "avg_completion_rate": 0.845, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.0, @@ -52,7 +80,7 @@ }, "oracle_lite": { "episodes": 20, - "avg_score": 0.9445, + "avg_score": 0.918, "avg_completion_rate": 0.735, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.0, @@ -62,7 +90,7 @@ "task2": { "random": { "episodes": 20, - "avg_score": 0.6493, + "avg_score": 0.6505, "avg_completion_rate": 0.8767, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.5424, @@ -70,7 +98,7 @@ }, "heuristic": { "episodes": 20, - "avg_score": 0.7736, + "avg_score": 0.7677, "avg_completion_rate": 0.9399, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.5741, @@ -78,7 +106,7 @@ }, "oracle_lite": { "episodes": 20, - "avg_score": 0.776, + "avg_score": 0.7801, "avg_completion_rate": 0.93, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.84, @@ -88,7 +116,7 @@ "task3": { "random": { "episodes": 20, - "avg_score": 0.699, + "avg_score": 0.6655, "avg_completion_rate": 0.82, "avg_detection_rate": 0.4333, "avg_trust_calibration": 0.7258, @@ -96,7 +124,7 @@ }, "heuristic": { "episodes": 20, - "avg_score": 0.7838, + "avg_score": 0.7513, "avg_completion_rate": 0.9025, "avg_detection_rate": 0.7346, "avg_trust_calibration": 0.7401, @@ -104,7 +132,7 @@ }, "oracle_lite": { "episodes": 20, - "avg_score": 0.895, + "avg_score": 0.8678, "avg_completion_rate": 0.9925, "avg_detection_rate": 1.0, "avg_trust_calibration": 0.9314, @@ -118,26 +146,36 @@ "task_type": "task1", "seed": 0, "steps": 15, - "score": 0.6812, - "total_reward": 8.175, + "score": 0.6569, + "total_reward": 7.8825, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.955, - 0.99, - 0.99, + 0.914, + 0.962, + 0.962, 0.02, - 0.99, - 0.99, - 0.99, - 0.36, + 0.962, + 0.962, + 0.962, + 0.3645, 0.02, 0.02, - 0.925 + 0.867 ] }, { @@ -145,23 +183,33 @@ "task_type": "task1", "seed": 1, "steps": 15, - "score": 0.825, - "total_reward": 7.425, + "score": 0.7996, + "total_reward": 7.196, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.955, - 0.955, - 0.99, - 0.925, - 0.99, - 0.315 + 0.962, + 0.982, + 0.914, + 0.914, + 0.962, + 0.867, + 0.962, + 0.3165 ] }, { @@ -169,24 +217,34 @@ "task_type": "task1", "seed": 2, "steps": 15, - "score": 0.843, - "total_reward": 8.43, + "score": 0.8129, + "total_reward": 8.1294, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.315, - 0.99, - 0.99, - 0.955, - 0.99, - 0.925, - 0.925, - 0.99 + 0.369, + 0.3024, + 0.962, + 0.962, + 0.914, + 0.962, + 0.867, + 0.867, + 0.962 ] }, { @@ -194,27 +252,37 @@ "task_type": "task1", "seed": 3, "steps": 14, - "score": 0.8346, - "total_reward": 10.85, + "score": 0.8084, + "total_reward": 10.5095, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.925, - 0.99, - 0.99, - 0.99, - 0.36, - 0.36, - 0.925, - 0.99 + 0.982, + 0.982, + 0.962, + 0.3455, + 0.867, + 0.962, + 0.946, + 0.962, + 0.3455, + 0.3645, + 0.867, + 0.962 ] }, { @@ -222,25 +290,35 @@ "task_type": "task1", "seed": 4, "steps": 15, - "score": 0.8045, - "total_reward": 8.85, + "score": 0.7814, + "total_reward": 8.5956, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.955, - 0.99, - 0.36, - 0.315, - 0.99, - 0.955 + 0.962, + 0.982, + 0.962, + 0.3645, + 0.914, + 0.962, + 0.3455, + 0.3136, + 0.962, + 0.914 ] }, { @@ -248,26 +326,36 @@ "task_type": "task1", "seed": 5, "steps": 15, - "score": 0.7571, - "total_reward": 9.085, + "score": 0.725, + "total_reward": 8.7, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.955, - 0.99, - 0.315, - 0.36, - 0.36, - 0.925, - 0.99, - 0.99, - 0.36, - 0.925 + 0.962, + 0.914, + 0.962, + 0.3165, + 0.3455, + 0.3455, + 0.867, + 0.946, + 0.962, + 0.3455, + 0.867 ] }, { @@ -275,24 +363,34 @@ "task_type": "task1", "seed": 6, "steps": 15, - "score": 0.838, - "total_reward": 8.38, + "score": 0.8118, + "total_reward": 8.1182, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.955, - 0.955, - 0.99, - 0.99, - 0.99, - 0.99, - 0.955, - 0.925, - 0.315 + 0.914, + 0.914, + 0.962, + 0.962, + 0.962, + 0.962, + 0.934, + 0.867, + 0.3206 ] }, { @@ -300,24 +398,34 @@ "task_type": "task1", "seed": 7, "steps": 15, - "score": 0.97, - "total_reward": 9.7, + "score": 0.9334, + "total_reward": 9.334, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.925, - 0.955, - 0.955, - 0.99, - 0.925, - 0.99, - 0.99, - 0.99, - 0.99 + 0.867, + 0.914, + 0.914, + 0.962, + 0.867, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -325,25 +433,35 @@ "task_type": "task1", "seed": 8, "steps": 15, - "score": 0.8659, - "total_reward": 9.525, + "score": 0.8425, + "total_reward": 9.2675, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.36, - 0.99, - 0.955, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.955 + 0.962, + 0.369, + 0.962, + 0.914, + 0.962, + 0.962, + 0.3645, + 0.962, + 0.982, + 0.914 ] }, { @@ -351,26 +469,36 @@ "task_type": "task1", "seed": 9, "steps": 15, - "score": 0.7958, - "total_reward": 9.55, + "score": 0.7751, + "total_reward": 9.3011, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.925, - 0.36, - 0.955, - 0.36, + 0.982, + 0.962, + 0.962, + 0.867, + 0.3616, + 0.914, + 0.3645, 0.02, - 0.99, - 0.99, - 0.99 + 0.982, + 0.962, + 0.962 ] }, { @@ -378,25 +506,35 @@ "task_type": "task1", "seed": 10, "steps": 15, - "score": 0.7841, - "total_reward": 8.625, + "score": 0.7653, + "total_reward": 8.418, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.955, - 0.99, - 0.99, + 0.962, + 0.962, + 0.982, + 0.914, + 0.982, + 0.962, 0.02, - 0.36, - 0.36, - 0.99 + 0.3455, + 0.3645, + 0.962 ] }, { @@ -404,26 +542,36 @@ "task_type": "task1", "seed": 11, "steps": 15, - "score": 0.8454, - "total_reward": 10.145, + "score": 0.8199, + "total_reward": 9.8394, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.925, - 0.99, - 0.99, - 0.36, - 0.955, - 0.99, - 0.99, - 0.99, + 0.867, + 0.962, + 0.962, + 0.3504, + 0.914, + 0.982, + 0.962, + 0.982, 0.02, - 0.955, - 0.99 + 0.914, + 0.962 ] }, { @@ -431,26 +579,36 @@ "task_type": "task1", "seed": 12, "steps": 15, - "score": 0.6379, - "total_reward": 7.655, + "score": 0.6163, + "total_reward": 7.3956, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.955, - 0.36, - 0.99, - 0.955, - 0.99, - 0.955, + 0.982, + 0.914, + 0.3455, + 0.962, + 0.898, + 0.962, + 0.914, 0.02, - 0.36, - 0.36, - 0.36 + 0.3616, + 0.3455, + 0.3455 ] }, { @@ -458,22 +616,32 @@ "task_type": "task1", "seed": 13, "steps": 15, - "score": 0.7556, - "total_reward": 6.8, + "score": 0.7283, + "total_reward": 6.555, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.955, - 0.925, - 0.99, - 0.955, - 0.99, - 0.955, + 0.962, + 0.914, + 0.867, + 0.962, + 0.934, + 0.962, + 0.914, 0.02 ] }, @@ -482,26 +650,36 @@ "task_type": "task1", "seed": 14, "steps": 17, - "score": 0.9204, - "total_reward": 11.045, + "score": 0.8867, + "total_reward": 10.6405, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.925, - 0.955, - 0.955, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.955 + 0.962, + 0.867, + 0.914, + 0.914, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.914 ] }, { @@ -509,28 +687,38 @@ "task_type": "task1", "seed": 15, "steps": 16, - "score": 0.7107, - "total_reward": 9.95, + "score": 0.6915, + "total_reward": 9.6809, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.99, - 0.99, - 0.925, - 0.36, - 0.36, - 0.99, - 0.36, - 0.925, - 0.99, - 0.36, - 0.36, - 0.99 + 0.3504, + 0.962, + 0.962, + 0.867, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.867, + 0.982, + 0.3455, + 0.3455, + 0.962 ] }, { @@ -538,27 +726,37 @@ "task_type": "task1", "seed": 16, "steps": 15, - "score": 0.7377, - "total_reward": 9.59, + "score": 0.7164, + "total_reward": 9.313, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.315, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.2975, 0.02, - 0.99, + 0.982, 0.02, - 0.36, - 0.955, - 0.99 + 0.3455, + 0.914, + 0.962 ] }, { @@ -566,27 +764,37 @@ "task_type": "task1", "seed": 17, "steps": 15, - "score": 0.6612, - "total_reward": 8.595, + "score": 0.6495, + "total_reward": 8.4439, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, + 0.962, 0.02, - 0.36, - 0.315, - 0.925, - 0.99, - 0.99, - 0.315, - 0.99, - 0.99, - 0.99, - 0.36 + 0.3455, + 0.3136, + 0.867, + 0.982, + 0.962, + 0.3206, + 0.962, + 0.962, + 0.982, + 0.3826 ] }, { @@ -594,26 +802,36 @@ "task_type": "task1", "seed": 18, "steps": 15, - "score": 0.8483, - "total_reward": 10.18, + "score": 0.8235, + "total_reward": 9.8815, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, + 0.962, + 0.962, 0.02, - 0.99, - 0.99, - 0.36, - 0.99, - 0.925, - 0.99, - 0.955, - 0.99 + 0.962, + 0.982, + 0.3645, + 0.962, + 0.867, + 0.962, + 0.914, + 0.962 ] }, { @@ -621,25 +839,35 @@ "task_type": "task1", "seed": 19, "steps": 15, - "score": 0.78, - "total_reward": 8.58, + "score": 0.7588, + "total_reward": 8.347, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.315, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, + 0.2975, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3455, 0.02, - 0.955, - 0.99, - 0.99 + 0.93, + 0.982, + 0.962 ] }, { @@ -647,28 +875,38 @@ "task_type": "task1", "seed": 0, "steps": 13, - "score": 0.765, - "total_reward": 10.71, + "score": 0.753, + "total_reward": 10.5415, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.36, - 0.99, - 0.36, - 0.36, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.3455, + 0.962, + 0.962 ] }, { @@ -676,27 +914,37 @@ "task_type": "task1", "seed": 1, "steps": 12, - "score": 0.7962, - "total_reward": 10.35, + "score": 0.7843, + "total_reward": 10.196, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.36, - 0.99, - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -704,26 +952,36 @@ "task_type": "task1", "seed": 2, "steps": 11, - "score": 0.885, - "total_reward": 10.62, + "score": 0.8612, + "total_reward": 10.3345, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.369, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -731,23 +989,33 @@ "task_type": "task1", "seed": 3, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -755,26 +1023,36 @@ "task_type": "task1", "seed": 4, "steps": 11, - "score": 0.9375, - "total_reward": 11.25, + "score": 0.911, + "total_reward": 10.9324, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -782,23 +1060,33 @@ "task_type": "task1", "seed": 5, "steps": 16, - "score": 0.85, - "total_reward": 7.65, + "score": 0.8266, + "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.36, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { @@ -806,25 +1094,35 @@ "task_type": "task1", "seed": 6, "steps": 10, - "score": 0.99, - "total_reward": 10.89, + "score": 0.962, + "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -832,26 +1130,36 @@ "task_type": "task1", "seed": 7, "steps": 16, - "score": 0.8325, - "total_reward": 9.99, + "score": 0.8166, + "total_reward": 9.7988, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.36, - 0.99, - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.3686, + 0.962, + 0.3826, + 0.3896, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -859,24 +1167,34 @@ "task_type": "task1", "seed": 8, "steps": 16, - "score": 0.864, - "total_reward": 8.64, + "score": 0.8399, + "total_reward": 8.3989, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99 + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { @@ -884,27 +1202,37 @@ "task_type": "task1", "seed": 9, "steps": 16, - "score": 0.7962, - "total_reward": 10.35, + "score": 0.785, + "total_reward": 10.2052, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.36, - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -912,25 +1240,35 @@ "task_type": "task1", "seed": 10, "steps": 10, - "score": 0.99, - "total_reward": 10.89, + "score": 0.962, + "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -938,27 +1276,37 @@ "task_type": "task1", "seed": 11, "steps": 16, - "score": 0.7962, - "total_reward": 10.35, + "score": 0.7843, + "total_reward": 10.196, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.36, - 0.99, - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99 + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -966,23 +1314,33 @@ "task_type": "task1", "seed": 12, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -990,24 +1348,34 @@ "task_type": "task1", "seed": 13, "steps": 16, - "score": 0.927, - "total_reward": 9.27, + "score": 0.9003, + "total_reward": 9.0035, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1015,28 +1383,38 @@ "task_type": "task1", "seed": 14, "steps": 13, - "score": 0.765, - "total_reward": 10.71, + "score": 0.7534, + "total_reward": 10.5473, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.36, - 0.36, - 0.99, - 0.36, - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1044,28 +1422,38 @@ "task_type": "task1", "seed": 15, "steps": 13, - "score": 0.855, - "total_reward": 11.97, + "score": 0.8312, + "total_reward": 11.6374, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99 + 0.3546, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 ] }, { @@ -1073,23 +1461,33 @@ "task_type": "task1", "seed": 16, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1097,23 +1495,33 @@ "task_type": "task1", "seed": 17, "steps": 16, - "score": 0.92, - "total_reward": 8.28, + "score": 0.8943, + "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1121,25 +1529,35 @@ "task_type": "task1", "seed": 18, "steps": 10, - "score": 0.99, - "total_reward": 10.89, + "score": 0.962, + "total_reward": 10.582, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1147,27 +1565,37 @@ "task_type": "task1", "seed": 19, "steps": 12, - "score": 0.8931, - "total_reward": 11.61, + "score": 0.8675, + "total_reward": 11.2779, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1175,23 +1603,33 @@ "task_type": "task1", "seed": 0, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1199,23 +1637,33 @@ "task_type": "task1", "seed": 1, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1223,23 +1671,33 @@ "task_type": "task1", "seed": 2, "steps": 16, - "score": 0.85, - "total_reward": 7.65, + "score": 0.8266, + "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.3525, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1247,23 +1705,33 @@ "task_type": "task1", "seed": 3, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1271,23 +1739,33 @@ "task_type": "task1", "seed": 4, "steps": 16, - "score": 0.92, - "total_reward": 8.28, + "score": 0.8943, + "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962 ] }, { @@ -1295,23 +1773,33 @@ "task_type": "task1", "seed": 5, "steps": 16, - "score": 0.85, - "total_reward": 7.65, + "score": 0.8266, + "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.36, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { @@ -1319,23 +1807,33 @@ "task_type": "task1", "seed": 6, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1343,23 +1841,33 @@ "task_type": "task1", "seed": 7, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1367,23 +1875,33 @@ "task_type": "task1", "seed": 8, "steps": 16, - "score": 0.85, - "total_reward": 7.65, + "score": 0.8266, + "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99 + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962 ] }, { @@ -1391,23 +1909,33 @@ "task_type": "task1", "seed": 9, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1415,23 +1943,33 @@ "task_type": "task1", "seed": 10, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1439,23 +1977,33 @@ "task_type": "task1", "seed": 11, "steps": 16, - "score": 0.92, - "total_reward": 8.28, + "score": 0.8943, + "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1463,23 +2011,33 @@ "task_type": "task1", "seed": 12, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1487,23 +2045,33 @@ "task_type": "task1", "seed": 13, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1511,23 +2079,33 @@ "task_type": "task1", "seed": 14, "steps": 16, - "score": 0.92, - "total_reward": 8.28, + "score": 0.8943, + "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1535,23 +2113,33 @@ "task_type": "task1", "seed": 15, "steps": 16, - "score": 0.85, - "total_reward": 7.65, + "score": 0.8266, + "total_reward": 7.439, "completion_rate": 0.6, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.36, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99 + 0.3525, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 ] }, { @@ -1559,23 +2147,33 @@ "task_type": "task1", "seed": 16, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1583,23 +2181,33 @@ "task_type": "task1", "seed": 17, "steps": 16, - "score": 0.92, - "total_reward": 8.28, + "score": 0.8943, + "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.36, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1607,23 +2215,33 @@ "task_type": "task1", "seed": 18, "steps": 16, - "score": 0.99, - "total_reward": 8.91, + "score": 0.962, + "total_reward": 8.658, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 ] }, { @@ -1631,23 +2249,33 @@ "task_type": "task1", "seed": 19, "steps": 16, - "score": 0.92, - "total_reward": 8.28, + "score": 0.8943, + "total_reward": 8.0485, "completion_rate": 0.7, "detection_rate": 1.0, "trust_calibration": 0.0, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.99, - 0.99, - 0.36, - 0.99, - 0.99 + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 ] }, { @@ -1655,31 +2283,41 @@ "task_type": "task2", "seed": 0, "steps": 25, - "score": 0.536, - "total_reward": 9.6482, + "score": 0.5298, + "total_reward": 9.5365, "completion_rate": 0.667, "detection_rate": 1.0, "trust_calibration": 0.461, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9417, - 0.965, - 0.9533, + 0.9083, + 0.925, + 0.9167, 0.02, - 0.9183, - 0.9067, + 0.8917, 0.8833, - 0.2217, + 0.8667, + 0.2233, 0.02, 0.02, - 0.755, - 0.8017, - 0.7083, - 0.755, - 0.0933, - 0.07, + 0.725, + 0.8083, + 0.6917, + 0.775, + 0.1317, + 0.115, 0.5949 ] }, @@ -1688,31 +2326,41 @@ "task_type": "task2", "seed": 1, "steps": 31, - "score": 0.723, - "total_reward": 13.014, + "score": 0.7252, + "total_reward": 13.054, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.569, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.9067, - 0.8717, - 0.895, - 0.8017, - 0.8483, - 0.14, - 0.7667, - 0.79, - 0.6967, - 0.755, - 0.0933, - 0.6733, - 0.65, - 0.6617, + 0.9417, + 0.9833, + 0.8833, + 0.8583, + 0.875, + 0.7583, + 0.8417, + 0.165, + 0.7833, + 0.8, + 0.6833, + 0.735, + 0.1217, + 0.7167, + 0.7, + 0.7083, 0.849 ] }, @@ -1721,31 +2369,41 @@ "task_type": "task2", "seed": 2, "steps": 27, - "score": 0.6585, - "total_reward": 11.8527, + "score": 0.6551, + "total_reward": 11.7913, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.579, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3383, - 0.28, - 0.9533, - 0.9417, - 0.8717, - 0.895, - 0.8017, - 0.7783, + 0.3517, + 0.262, + 0.9167, + 0.9083, + 0.8583, + 0.875, + 0.7583, + 0.7417, 0.825, - 0.7317, - 0.79, - 0.6967, - 0.6733, - 0.72, - 0.0583, - 0.0467, + 0.7083, + 0.8, + 0.6833, + 0.6667, + 0.75, + 0.0967, + 0.0983, 0.766 ] }, @@ -1754,34 +2412,44 @@ "task_type": "task2", "seed": 3, "steps": 28, - "score": 0.6095, - "total_reward": 12.8003, + "score": 0.6154, + "total_reward": 12.9233, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.586, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.3033, - 0.86, - 0.9183, - 0.9067, - 0.895, - 0.2333, + 0.99, + 0.9833, + 0.925, + 0.2717, + 0.8, + 0.8917, + 0.9333, + 0.875, 0.2217, - 0.7783, - 0.8367, - 0.1167, - 0.7783, - 0.1167, - 0.6967, - 0.72, - 0.6617, - 0.035, + 0.2233, + 0.7417, + 0.8333, + 0.1483, + 0.7917, + 0.1383, + 0.7333, + 0.75, + 0.7083, + 0.123, 0.7686 ] }, @@ -1790,32 +2458,42 @@ "task_type": "task2", "seed": 4, "steps": 28, - "score": 0.6492, - "total_reward": 12.3352, + "score": 0.6575, + "total_reward": 12.4928, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.458, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.3033, + 0.9417, + 0.9833, + 0.925, + 0.2817, + 0.8667, 0.8833, - 0.9067, - 0.245, - 0.1867, - 0.8483, - 0.79, - 0.755, - 0.7783, - 0.755, - 0.7317, - 0.72, - 0.0583, - 0.0467, + 0.23, + 0.2113, + 0.8417, + 0.8, + 0.775, + 0.7917, + 0.775, + 0.7583, + 0.75, + 0.1067, + 0.1313, 0.7235 ] }, @@ -1824,31 +2502,41 @@ "task_type": "task2", "seed": 5, "steps": 26, - "score": 0.6227, - "total_reward": 12.4544, + "score": 0.6206, + "total_reward": 12.4128, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.408, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.93, - 0.9533, - 0.245, - 0.2683, - 0.2567, - 0.8133, - 0.8717, - 0.86, - 0.1983, - 0.755, - 0.8133, - 0.14, - 0.7783, + 0.9417, + 0.9, + 0.9167, + 0.24, + 0.2467, + 0.2383, + 0.7667, + 0.9083, + 0.85, + 0.1967, + 0.725, + 0.8667, + 0.165, + 0.7917, + 0.7833, 0.7667, - 0.7433, 0.02, 0.65, 0.7061 @@ -1859,31 +2547,41 @@ "task_type": "task2", "seed": 6, "steps": 31, - "score": 0.6949, - "total_reward": 12.5083, + "score": 0.7065, + "total_reward": 12.7163, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.576, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9417, - 0.9067, - 0.93, - 0.9183, - 0.9067, - 0.895, - 0.8367, - 0.7783, - 0.14, - 0.8133, - 0.105, - 0.7317, - 0.7433, - 0.7317, - 0.6617, - 0.65, + 0.9083, + 0.8833, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8833, + 0.7417, + 0.188, + 0.8667, + 0.13, + 0.7583, + 0.7667, + 0.7583, + 0.7083, + 0.7, 0.8083 ] }, @@ -1892,30 +2590,40 @@ "task_type": "task2", "seed": 7, "steps": 25, - "score": 0.7412, - "total_reward": 12.6001, + "score": 0.7366, + "total_reward": 12.5218, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.776, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9067, - 0.9183, - 0.895, - 0.9183, + 0.8333, + 0.8917, + 0.875, + 0.8917, + 0.775, + 0.8583, + 0.85, + 0.8333, 0.825, - 0.8717, - 0.86, - 0.8367, + 0.1817, + 0.8, + 0.6833, 0.825, - 0.1633, - 0.79, - 0.6967, - 0.755, 0.02, - 0.7317, + 0.7583, 0.8784 ] }, @@ -1924,32 +2632,42 @@ "task_type": "task2", "seed": 8, "steps": 25, - "score": 0.7241, - "total_reward": 13.757, + "score": 0.7329, + "total_reward": 13.9253, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.425, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.315, - 0.9533, + 0.9333, + 0.335, + 0.9167, + 0.8667, 0.8833, - 0.9067, - 0.895, - 0.2333, - 0.8717, - 0.86, - 0.79, - 0.8133, - 0.72, - 0.0817, - 0.755, - 0.7433, - 0.7317, - 0.72, + 0.875, + 0.2767, + 0.8583, + 0.9, + 0.8, + 0.8167, + 0.7, + 0.1233, + 0.775, + 0.7667, + 0.8083, + 0.75, 0.7987 ] }, @@ -1958,30 +2676,40 @@ "task_type": "task2", "seed": 9, "steps": 27, - "score": 0.7134, - "total_reward": 12.8407, + "score": 0.7062, + "total_reward": 12.712, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.597, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.8717, - 0.28, - 0.8717, - 0.895, + 0.99, + 0.9333, + 0.925, + 0.8083, + 0.278, + 0.8583, + 0.925, 0.02, - 0.2217, - 0.8483, + 0.2233, + 0.8417, 0.825, - 0.8133, - 0.755, - 0.6967, - 0.6967, + 0.8167, + 0.735, + 0.6833, + 0.7333, 0.65, 0.8157 ] @@ -1991,34 +2719,44 @@ "task_type": "task2", "seed": 10, "steps": 26, - "score": 0.5607, - "total_reward": 11.7744, + "score": 0.5723, + "total_reward": 12.0174, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.855, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.965, - 0.9533, - 0.8833, - 0.2567, - 0.8833, + 0.9417, + 0.925, + 0.9667, + 0.8667, + 0.2483, + 0.8667, 0.02, - 0.21, - 0.8483, + 0.205, + 0.8917, 0.825, - 0.8133, - 0.755, - 0.1283, + 0.8167, + 0.775, + 0.1567, 0.02, + 0.153, + 0.7667, + 0.8083, 0.105, - 0.7433, - 0.7317, - 0.07, - 0.0583, + 0.1067, 0.8194 ] }, @@ -2027,31 +2765,41 @@ "task_type": "task2", "seed": 11, "steps": 23, - "score": 0.7258, - "total_reward": 13.0647, + "score": 0.7197, + "total_reward": 12.955, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.57, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9067, - 0.965, - 0.9533, - 0.2917, - 0.8833, - 0.2567, - 0.8833, - 0.8717, + 0.8333, + 0.925, + 0.9167, + 0.2703, + 0.8667, + 0.2483, + 0.8667, + 0.9083, 0.02, - 0.8017, + 0.8083, 0.825, - 0.8133, - 0.8017, - 0.7783, - 0.7667, - 0.7083, + 0.8167, + 0.7683, + 0.7917, + 0.7833, + 0.7417, 0.8063 ] }, @@ -2060,33 +2808,43 @@ "task_type": "task2", "seed": 12, "steps": 30, - "score": 0.6, - "total_reward": 12.0005, + "score": 0.6047, + "total_reward": 12.0935, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.344, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.93, - 0.3033, - 0.9417, + 0.99, + 0.9, + 0.2717, + 0.9083, + 0.8267, 0.8833, - 0.9067, - 0.8367, + 0.8333, 0.02, - 0.1983, - 0.1867, - 0.175, - 0.7667, - 0.7783, - 0.7083, - 0.7317, - 0.65, + 0.2197, + 0.1883, + 0.18, + 0.7833, + 0.7917, + 0.7417, + 0.7583, 0.65, - 0.0117, + 0.7, + 0.0633, 0.6839 ] }, @@ -2095,31 +2853,41 @@ "task_type": "task2", "seed": 13, "steps": 30, - "score": 0.6597, - "total_reward": 11.8747, + "score": 0.6649, + "total_reward": 11.9681, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.271, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9183, - 0.86, - 0.9067, - 0.1983, - 0.8717, - 0.8017, + 0.9417, + 0.8917, + 0.8, + 0.8833, + 0.2067, + 0.8583, + 0.8083, 0.02, - 0.7667, - 0.7083, - 0.72, - 0.6617, - 0.72, - 0.6967, - 0.035, - 0.65, + 0.7833, + 0.6917, + 0.75, + 0.6583, + 0.75, + 0.7333, + 0.09, + 0.7, 0.7014 ] }, @@ -2128,31 +2896,41 @@ "task_type": "task2", "seed": 14, "steps": 26, - "score": 0.7196, - "total_reward": 13.6724, + "score": 0.7146, + "total_reward": 13.5771, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.416, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.895, - 0.9067, - 0.8833, - 0.2567, - 0.895, + 0.9417, + 0.825, 0.8833, - 0.8717, - 0.86, - 0.8367, - 0.7667, - 0.79, - 0.1283, - 0.7667, - 0.755, - 0.0933, + 0.8667, + 0.2383, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8333, + 0.7833, + 0.8, + 0.1797, + 0.7833, + 0.775, + 0.1217, 0.65, 0.7957 ] @@ -2162,35 +2940,45 @@ "task_type": "task2", "seed": 15, "steps": 27, - "score": 0.5586, - "total_reward": 12.2887, + "score": 0.5573, + "total_reward": 12.2603, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.606, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3383, - 0.9767, - 0.965, - 0.8717, - 0.28, - 0.2683, - 0.9067, - 0.895, - 0.8017, - 0.21, - 0.1983, - 0.1867, - 0.8133, - 0.79, - 0.1283, - 0.685, - 0.7317, - 0.72, - 0.7083, - 0.0467, + 0.3037, + 0.9333, + 0.925, + 0.8083, + 0.265, + 0.2567, + 0.8433, + 0.925, + 0.7583, + 0.215, + 0.1967, + 0.1883, + 0.8167, + 0.8, + 0.1567, + 0.675, + 0.7583, + 0.75, + 0.7417, + 0.0883, 0.732 ] }, @@ -2199,33 +2987,43 @@ "task_type": "task2", "seed": 16, "steps": 23, - "score": 0.5559, - "total_reward": 11.1181, + "score": 0.5543, + "total_reward": 11.0864, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.437, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.9533, 0.9417, - 0.2333, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.2217, 0.02, - 0.895, + 0.925, 0.02, - 0.2217, - 0.8133, + 0.2133, + 0.8167, 0.825, - 0.1633, + 0.1817, 0.02, - 0.79, - 0.7317, - 0.755, - 0.0933, + 0.8, + 0.7583, + 0.775, + 0.1317, 0.6298 ] }, @@ -2234,34 +3032,44 @@ "task_type": "task2", "seed": 17, "steps": 27, - "score": 0.5614, - "total_reward": 11.7892, + "score": 0.5694, + "total_reward": 11.9565, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.688, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, + 0.9417, 0.02, - 0.315, - 0.2567, - 0.8483, - 0.2567, - 0.895, - 0.1867, - 0.86, - 0.8483, - 0.8367, - 0.175, - 0.8133, - 0.72, + 0.28, + 0.2613, + 0.7917, + 0.2483, + 0.875, + 0.2213, + 0.85, + 0.8417, + 0.7933, + 0.233, + 0.8667, + 0.7, + 0.7833, 0.7667, - 0.7433, - 0.7317, - 0.07, - 0.0117, + 0.7583, + 0.168, + 0.0703, 0.7609 ] }, @@ -2270,30 +3078,40 @@ "task_type": "task2", "seed": 18, "steps": 23, - "score": 0.6709, - "total_reward": 11.4056, + "score": 0.6662, + "total_reward": 11.3256, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.602, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.965, + 0.9417, + 0.925, 0.02, - 0.93, - 0.2683, + 0.9, 0.2567, - 0.895, - 0.8017, - 0.86, - 0.8017, + 0.2483, + 0.875, + 0.7583, + 0.85, + 0.8083, 0.825, - 0.8017, - 0.79, + 0.8083, + 0.85, 0.02, - 0.72, + 0.75, 0.7306 ] }, @@ -2310,23 +3128,33 @@ "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.2917, - 0.9533, - 0.9417, - 0.9183, - 0.9067, - 0.245, + 0.2633, + 0.9167, + 0.9083, + 0.8917, + 0.8833, + 0.23, 0.02, 0.825, - 0.8483, + 0.8917, 0.825, - 0.7317, - 0.79, - 0.6967, 0.7083, - 0.7317, - 0.6617, + 0.8, + 0.6833, + 0.7417, + 0.7583, + 0.7083, 0.8255 ] }, @@ -2335,33 +3163,43 @@ "task_type": "task2", "seed": 0, "steps": 31, - "score": 0.6053, - "total_reward": 12.1069, + "score": 0.6145, + "total_reward": 12.2902, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.72, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.3033, - 0.2917, - 0.28, - 0.2683, - 0.895, - 0.8717, - 0.8483, - 0.175, - 0.8017, - 0.7783, - 0.755, - 0.7317, - 0.7083, - 0.035, - 0.0117, + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.2467, + 0.875, + 0.8583, + 0.8417, + 0.19, + 0.8083, + 0.7917, + 0.775, + 0.7583, + 0.7417, + 0.09, + 0.0733, 0.7719 ] }, @@ -2370,31 +3208,41 @@ "task_type": "task2", "seed": 1, "steps": 17, - "score": 0.7761, - "total_reward": 13.9703, + "score": 0.768, + "total_reward": 13.8236, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.282, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.3033, - 0.2917, - 0.28, - 0.9183, - 0.9067, - 0.895, + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.825, - 0.8133, + 0.8167, 0.7053 ] }, @@ -2403,31 +3251,41 @@ "task_type": "task2", "seed": 2, "steps": 17, - "score": 0.7376, - "total_reward": 13.2776, + "score": 0.7237, + "total_reward": 13.0266, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.284, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3383, - 0.3267, - 0.965, - 0.9533, - 0.9417, - 0.93, - 0.9183, - 0.9067, - 0.895, + 0.3517, + 0.2883, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.825, - 0.1633, + 0.1787, 0.6626 ] }, @@ -2436,29 +3294,39 @@ "task_type": "task2", "seed": 3, "steps": 30, - "score": 0.7783, - "total_reward": 12.4521, + "score": 0.7823, + "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, - 0.7667, - 0.0933, - 0.72, - 0.6967, - 0.6733, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, 0.9021 ] }, @@ -2467,31 +3335,41 @@ "task_type": "task2", "seed": 4, "steps": 17, - "score": 0.8174, - "total_reward": 14.7141, + "score": 0.7999, + "total_reward": 14.3981, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.426, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.9533, 0.9417, - 0.93, - 0.2683, - 0.9067, - 0.895, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.825, - 0.1633, + 0.1787, 0.7991 ] }, @@ -2500,29 +3378,39 @@ "task_type": "task2", "seed": 5, "steps": 30, - "score": 0.6476, - "total_reward": 10.3623, + "score": 0.6545, + "total_reward": 10.4723, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.2567, - 0.8833, - 0.21, - 0.8367, - 0.8133, - 0.14, - 0.7667, - 0.0933, - 0.72, - 0.6967, - 0.6733, + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, 0.7623 ] }, @@ -2531,29 +3419,39 @@ "task_type": "task2", "seed": 6, "steps": 15, - "score": 0.8968, - "total_reward": 14.3481, + "score": 0.8749, + "total_reward": 13.9981, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.9533, 0.9417, - 0.93, - 0.9183, - 0.9067, - 0.895, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.7481 ] }, @@ -2562,31 +3460,41 @@ "task_type": "task2", "seed": 7, "steps": 28, - "score": 0.7441, - "total_reward": 13.3933, + "score": 0.7465, + "total_reward": 13.4373, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.833, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.315, - 0.9533, - 0.2917, - 0.28, - 0.9067, + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.3163, + 0.318, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, + 0.75, + 0.7333, 0.8983 ] }, @@ -2595,31 +3503,41 @@ "task_type": "task2", "seed": 8, "steps": 17, - "score": 0.8168, - "total_reward": 14.7016, + "score": 0.7992, + "total_reward": 14.3856, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.39, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.3267, - 0.965, - 0.9533, 0.9417, - 0.93, - 0.2683, - 0.9067, - 0.895, + 0.2953, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.825, - 0.8133, + 0.8167, 0.7866 ] }, @@ -2628,32 +3546,42 @@ "task_type": "task2", "seed": 9, "steps": 28, - "score": 0.719, - "total_reward": 13.6619, + "score": 0.7248, + "total_reward": 13.7712, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.3033, 0.9417, - 0.28, - 0.2683, - 0.2567, - 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.9333, + 0.925, + 0.3147, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, + 0.75, + 0.7333, 0.8986 ] }, @@ -2662,29 +3590,39 @@ "task_type": "task2", "seed": 10, "steps": 16, - "score": 0.8159, - "total_reward": 13.8711, + "score": 0.797, + "total_reward": 13.5485, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.432, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.9533, 0.9417, - 0.93, - 0.9183, - 0.9067, - 0.895, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, 0.8833, - 0.8717, - 0.21, - 0.1983, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.212, + 0.2037, + 0.8333, 0.825, 0.7578 ] @@ -2694,32 +3632,42 @@ "task_type": "task2", "seed": 11, "steps": 30, - "score": 0.6688, - "total_reward": 12.7072, + "score": 0.6763, + "total_reward": 12.8505, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.825, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.3033, - 0.2917, - 0.28, - 0.9067, + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.0233, + 0.75, + 0.7333, + 0.0817, 0.8522 ] }, @@ -2728,29 +3676,39 @@ "task_type": "task2", "seed": 12, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -2759,30 +3717,40 @@ "task_type": "task2", "seed": 13, "steps": 30, - "score": 0.7931, - "total_reward": 13.4819, + "score": 0.7935, + "total_reward": 13.4903, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.839, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.3267, - 0.9533, - 0.93, - 0.9067, + 0.9417, + 0.2883, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9436 ] }, @@ -2791,32 +3759,42 @@ "task_type": "task2", "seed": 14, "steps": 18, - "score": 0.7423, - "total_reward": 14.1039, + "score": 0.7309, + "total_reward": 13.8869, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.264, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.315, - 0.3033, - 0.2917, - 0.93, - 0.9183, - 0.9067, - 0.895, + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.2733, + 0.9, + 0.8917, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, - 0.175, - 0.8133, - 0.8017, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.187, + 0.8167, + 0.8083, 0.6989 ] }, @@ -2825,32 +3803,42 @@ "task_type": "task2", "seed": 15, "steps": 18, - "score": 0.78, - "total_reward": 14.8206, + "score": 0.7649, + "total_reward": 14.5326, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.33, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3383, - 0.9767, - 0.965, - 0.9533, - 0.2917, - 0.93, - 0.9183, - 0.9067, - 0.895, - 0.2333, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.3097, + 0.9333, + 0.925, + 0.9167, + 0.2703, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.2287, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.825, - 0.8133, - 0.8017, + 0.8167, + 0.8083, 0.7656 ] }, @@ -2859,29 +3847,39 @@ "task_type": "task2", "seed": 16, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -2890,29 +3888,39 @@ "task_type": "task2", "seed": 17, "steps": 30, - "score": 0.7783, - "total_reward": 12.452, + "score": 0.7823, + "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.28, - 0.9067, + 0.9333, + 0.9167, + 0.265, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.902 ] }, @@ -2921,29 +3929,39 @@ "task_type": "task2", "seed": 18, "steps": 15, - "score": 0.8968, - "total_reward": 14.348, + "score": 0.8749, + "total_reward": 13.998, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.28, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9883, - 0.9767, - 0.965, - 0.9533, 0.9417, - 0.93, - 0.9183, - 0.9067, - 0.895, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.748 ] }, @@ -2952,31 +3970,41 @@ "task_type": "task2", "seed": 19, "steps": 17, - "score": 0.8146, - "total_reward": 14.6625, + "score": 0.7966, + "total_reward": 14.3395, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.279, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3383, - 0.9767, - 0.965, - 0.9533, - 0.9417, - 0.28, - 0.9183, - 0.9067, - 0.895, + 0.2967, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.262, + 0.8917, 0.8833, - 0.8717, - 0.86, - 0.8483, - 0.8367, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, 0.825, - 0.8133, + 0.8167, 0.7475 ] }, @@ -2985,29 +4013,39 @@ "task_type": "task2", "seed": 0, "steps": 30, - "score": 0.7783, - "total_reward": 12.4521, + "score": 0.7823, + "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, - 0.7667, - 0.0933, - 0.72, - 0.6967, - 0.6733, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, 0.9021 ] }, @@ -3016,29 +4054,39 @@ "task_type": "task2", "seed": 1, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3047,29 +4095,39 @@ "task_type": "task2", "seed": 2, "steps": 30, - "score": 0.7349, - "total_reward": 11.7585, + "score": 0.7399, + "total_reward": 11.8385, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3267, - 0.3033, - 0.93, - 0.9067, + 0.2983, + 0.2817, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.8585 ] }, @@ -3078,29 +4136,39 @@ "task_type": "task2", "seed": 3, "steps": 30, - "score": 0.7783, - "total_reward": 12.4521, + "score": 0.7823, + "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, - 0.7667, - 0.0933, - 0.72, - 0.6967, - 0.6733, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, 0.9021 ] }, @@ -3109,29 +4177,39 @@ "task_type": "task2", "seed": 4, "steps": 30, - "score": 0.7783, - "total_reward": 12.4521, + "score": 0.7823, + "total_reward": 12.5171, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.1867, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9021 ] }, @@ -3140,29 +4218,39 @@ "task_type": "task2", "seed": 5, "steps": 30, - "score": 0.6476, - "total_reward": 10.3623, + "score": 0.6545, + "total_reward": 10.4723, "completion_rate": 0.733, "detection_rate": 1.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.2567, - 0.8833, - 0.21, - 0.8367, - 0.8133, - 0.14, - 0.7667, - 0.0933, - 0.72, - 0.6967, - 0.6733, + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, 0.7623 ] }, @@ -3171,29 +4259,39 @@ "task_type": "task2", "seed": 6, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3202,29 +4300,39 @@ "task_type": "task2", "seed": 7, "steps": 30, - "score": 0.8215, - "total_reward": 13.1442, + "score": 0.8246, + "total_reward": 13.1942, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9442 ] }, @@ -3233,29 +4341,39 @@ "task_type": "task2", "seed": 8, "steps": 30, - "score": 0.7349, - "total_reward": 11.7583, + "score": 0.7399, + "total_reward": 11.8383, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.3033, - 0.93, - 0.9067, + 0.9333, + 0.2817, + 0.9, 0.8833, - 0.86, - 0.1867, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.8583 ] }, @@ -3264,29 +4382,39 @@ "task_type": "task2", "seed": 9, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3295,29 +4423,39 @@ "task_type": "task2", "seed": 10, "steps": 30, - "score": 0.7348, - "total_reward": 11.7573, + "score": 0.7398, + "total_reward": 11.8373, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.07, - 0.0467, - 0.6733, + 0.115, + 0.0983, + 0.7167, 0.8573 ] }, @@ -3326,29 +4464,39 @@ "task_type": "task2", "seed": 11, "steps": 30, - "score": 0.7783, - "total_reward": 12.452, + "score": 0.7823, + "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.28, - 0.9067, + 0.9333, + 0.9167, + 0.265, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.902 ] }, @@ -3357,29 +4505,39 @@ "task_type": "task2", "seed": 12, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3388,29 +4546,39 @@ "task_type": "task2", "seed": 13, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3419,29 +4587,39 @@ "task_type": "task2", "seed": 14, "steps": 30, - "score": 0.7355, - "total_reward": 11.7676, + "score": 0.7398, + "total_reward": 11.8376, "completion_rate": 0.867, "detection_rate": 1.0, "trust_calibration": 0.841, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.2567, - 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.8576 ] }, @@ -3450,29 +4628,39 @@ "task_type": "task2", "seed": 15, "steps": 30, - "score": 0.6914, - "total_reward": 11.0619, + "score": 0.6973, + "total_reward": 11.1569, "completion_rate": 0.8, "detection_rate": 1.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3267, - 0.9533, - 0.93, - 0.9067, - 0.2333, - 0.86, - 0.8367, - 0.8133, - 0.79, - 0.1167, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.2983, + 0.9167, + 0.9, + 0.8833, + 0.2317, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, 0.8119 ] }, @@ -3481,29 +4669,39 @@ "task_type": "task2", "seed": 16, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3512,29 +4710,39 @@ "task_type": "task2", "seed": 17, "steps": 30, - "score": 0.7783, - "total_reward": 12.452, + "score": 0.7823, + "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.28, - 0.9067, + 0.9333, + 0.9167, + 0.265, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.902 ] }, @@ -3543,29 +4751,39 @@ "task_type": "task2", "seed": 18, "steps": 30, - "score": 0.8215, - "total_reward": 13.1441, + "score": 0.8246, + "total_reward": 13.1941, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.84, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.86, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.9441 ] }, @@ -3574,29 +4792,39 @@ "task_type": "task2", "seed": 19, "steps": 30, - "score": 0.7783, - "total_reward": 12.452, + "score": 0.7823, + "total_reward": 12.517, "completion_rate": 0.933, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.9767, - 0.9533, - 0.93, - 0.9067, + 0.9333, + 0.9167, + 0.9, 0.8833, - 0.21, - 0.8367, - 0.8133, - 0.79, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.8, + 0.7833, 0.7667, - 0.7433, - 0.72, - 0.6967, - 0.6733, + 0.75, + 0.7333, + 0.7167, 0.902 ] }, @@ -3605,38 +4833,48 @@ "task_type": "task3", "seed": 0, "steps": 36, - "score": 0.6387, - "total_reward": 15.9668, + "score": 0.6105, + "total_reward": 15.2622, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.884, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.9867, + 0.8956, + 0.9233, + 0.9211, 0.02, - 0.9767, - 0.9733, - 0.9667, - 0.3383, + 0.9144, + 0.9122, + 0.9078, + 0.3556, 0.02, 0.02, - 0.95, - 0.9433, - 0.9367, - 0.93, - 0.9267, - 0.295, + 0.8467, + 0.8922, + 0.8378, + 0.8833, + 0.9061, + 0.3067, 0.02, - 0.9133, - 0.06, - 0.9033, - 0.8967, - 0.265, - 0.2617, + 0.8722, + 0.073, + 0.8306, + 0.9061, + 0.3397, + 0.3044, 0.5035 ] }, @@ -3645,38 +4883,48 @@ "task_type": "task3", "seed": 1, "steps": 40, - "score": 0.761, - "total_reward": 19.0254, + "score": 0.7205, + "total_reward": 18.0135, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.662, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.9833, - 0.9733, - 0.97, - 0.9633, - 0.9567, - 0.325, - 0.9433, - 0.94, - 0.9333, - 0.93, - 0.3017, - 0.9167, - 0.9067, - 0.9033, - 0.8967, - 0.8933, + 0.9278, + 0.9506, + 0.8889, 0.8822, + 0.91, + 0.8556, + 0.9011, + 0.3167, + 0.8622, + 0.89, + 0.8356, + 0.8633, + 0.3011, + 0.8444, + 0.8378, + 0.8656, + 0.8261, + 0.8589, + 0.8961, 0.02, - 0.255, - 0.8733, - 0.245, + 0.3, + 0.8106, + 0.2933, 0.7916 ] }, @@ -3685,36 +4933,46 @@ "task_type": "task3", "seed": 2, "steps": 37, - "score": 0.8129, - "total_reward": 18.6957, + "score": 0.7627, + "total_reward": 17.5411, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.752, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3717, - 0.365, - 0.9867, - 0.9833, - 0.9733, - 0.97, - 0.9633, - 0.9567, - 0.95, - 0.9433, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.2917, - 0.9133, + 0.3778, + 0.3203, + 0.9211, + 0.9189, + 0.8822, 0.91, - 0.9033, - 0.8933, + 0.8556, + 0.8511, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8311, + 0.8767, + 0.2944, + 0.8972, + 0.87, + 0.8656, + 0.8789, 0.02, - 0.8833, + 0.8722, 0.8207 ] }, @@ -3723,39 +4981,49 @@ "task_type": "task3", "seed": 3, "steps": 36, - "score": 0.6563, - "total_reward": 17.065, + "score": 0.6303, + "total_reward": 16.3887, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.798, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.3617, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.3417, - 0.3383, - 0.9567, - 0.9533, - 0.3183, - 0.9367, - 0.3083, + 0.9528, + 0.9506, 0.9233, - 0.92, - 0.9133, - 0.285, - 0.2817, - 0.9, - 0.8933, - 0.265, - 0.2583, + 0.3411, + 0.8667, + 0.9144, + 0.9372, + 0.91, + 0.3278, + 0.3556, + 0.8511, + 0.8989, + 0.2922, + 0.8878, + 0.3056, + 0.8489, + 0.8767, + 0.8922, + 0.333, + 0.2878, + 0.8283, + 0.8589, + 0.3297, + 0.3552, 0.01 ] }, @@ -3764,40 +5032,50 @@ "task_type": "task3", "seed": 4, "steps": 41, - "score": 0.7128, - "total_reward": 19.2459, + "score": 0.6894, + "total_reward": 18.6138, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.405, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.3617, - 0.9767, - 0.9733, - 0.345, - 0.3383, - 0.9567, - 0.95, - 0.94, - 0.9367, - 0.93, + 0.9278, + 0.9506, 0.9233, - 0.92, - 0.9167, - 0.2883, - 0.9067, - 0.275, - 0.8933, - 0.8867, + 0.3711, + 0.8844, + 0.9122, + 0.33, + 0.3186, + 0.9011, + 0.8667, + 0.86, + 0.8878, + 0.8833, + 0.8789, 0.8767, - 0.2483, - 0.245, - 0.2417, + 0.8994, + 0.3352, + 0.8678, + 0.3033, + 0.8239, + 0.8744, + 0.8678, + 0.2656, + 0.2933, + 0.2911, 0.7076 ] }, @@ -3806,38 +5084,48 @@ "task_type": "task3", "seed": 5, "steps": 31, - "score": 0.6418, - "total_reward": 16.045, + "score": 0.6062, + "total_reward": 15.1538, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.816, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.9867, - 0.355, - 0.3517, - 0.3483, - 0.9667, - 0.9633, - 0.96, - 0.3317, - 0.95, - 0.9467, - 0.315, - 0.9367, - 0.9333, - 0.9267, + 0.9278, + 0.8933, + 0.9211, + 0.3367, + 0.3344, + 0.3322, + 0.8578, + 0.9306, + 0.9033, + 0.3211, + 0.8467, + 0.9194, + 0.32, + 0.8878, + 0.8856, + 0.8811, 0.02, - 0.9167, - 0.0633, - 0.91, - 0.9067, + 0.8394, + 0.0752, + 0.87, + 0.8678, 0.02, - 0.9, + 0.8883, 0.01 ] }, @@ -3846,37 +5134,47 @@ "task_type": "task3", "seed": 6, "steps": 39, - "score": 0.665, - "total_reward": 16.625, + "score": 0.6337, + "total_reward": 15.8429, "completion_rate": 0.8, "detection_rate": 0.0, "trust_calibration": 0.872, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9833, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.9633, - 0.9567, - 0.325, - 0.9467, - 0.315, - 0.9333, - 0.9267, - 0.9233, - 0.9133, - 0.9033, - 0.2717, - 0.8933, - 0.04, - 0.2583, - 0.255, - 0.8767, + 0.8956, + 0.8889, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9006, + 0.8511, + 0.3197, + 0.9194, + 0.28, + 0.8556, + 0.8811, + 0.8789, + 0.8422, + 0.8856, + 0.3311, + 0.8589, + 0.0597, + 0.3222, + 0.27, + 0.8728, 0.02, 0.01 ] @@ -3886,35 +5184,45 @@ "task_type": "task3", "seed": 7, "steps": 32, - "score": 0.7617, - "total_reward": 16.7567, + "score": 0.7179, + "total_reward": 15.793, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.869, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9767, - 0.97, - 0.9633, - 0.96, - 0.9533, - 0.95, - 0.3217, - 0.94, - 0.9333, - 0.93, + 0.8756, + 0.8911, + 0.8867, + 0.9144, + 0.86, + 0.9056, + 0.9033, + 0.8989, + 0.8967, + 0.3444, + 0.89, + 0.8356, + 0.9083, 0.02, - 0.9233, - 0.9167, - 0.91, - 0.9067, - 0.9, - 0.2717, + 0.8789, + 0.8744, + 0.87, + 0.8928, + 0.8633, + 0.3111, 0.01 ] }, @@ -3923,36 +5231,46 @@ "task_type": "task3", "seed": 8, "steps": 31, - "score": 0.7482, - "total_reward": 17.2083, + "score": 0.7087, + "total_reward": 16.3004, "completion_rate": 0.85, "detection_rate": 0.0, "trust_calibration": 0.701, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.365, - 0.9867, - 0.9767, - 0.9733, - 0.97, - 0.3417, - 0.9633, - 0.96, - 0.95, - 0.9467, - 0.94, - 0.3083, - 0.93, - 0.9267, - 0.9233, - 0.92, - 0.0667, + 0.9256, + 0.3733, + 0.9211, + 0.8844, + 0.9122, 0.91, - 0.9033, - 0.9, + 0.3578, + 0.9056, + 0.9283, + 0.8667, + 0.8944, + 0.84, + 0.3456, + 0.8833, + 0.8811, + 0.8589, + 0.8767, + 0.0774, + 0.835, + 0.8856, + 0.8633, 0.01 ] }, @@ -3961,38 +5279,48 @@ "task_type": "task3", "seed": 9, "steps": 37, - "score": 0.7597, - "total_reward": 18.9924, + "score": 0.7151, + "total_reward": 17.877, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.558, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.9833, - 0.355, - 0.9733, - 0.97, - 0.02, - 0.3383, - 0.9567, - 0.95, - 0.9467, - 0.94, - 0.9333, + 0.9528, + 0.9256, 0.9233, - 0.9167, - 0.91, - 0.2817, - 0.9033, + 0.8689, + 0.3597, + 0.8822, + 0.935, + 0.02, + 0.3556, + 0.9011, 0.8967, - 0.89, - 0.2617, - 0.88, + 0.8944, + 0.84, + 0.8356, + 0.8489, + 0.8244, + 0.835, + 0.3178, + 0.8656, + 0.8261, + 0.8217, + 0.3044, + 0.85, 0.7724 ] }, @@ -4001,42 +5329,52 @@ "task_type": "task3", "seed": 10, "steps": 38, - "score": 0.6182, - "total_reward": 17.9276, + "score": 0.6037, + "total_reward": 17.5072, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.772, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.9867, - 0.9767, - 0.3483, - 0.9667, + 0.9278, + 0.9233, + 0.9461, + 0.8844, + 0.3622, + 0.9078, 0.02, - 0.335, - 0.9567, - 0.95, - 0.9467, - 0.94, - 0.3117, + 0.3233, + 0.9261, + 0.8967, + 0.8944, + 0.86, + 0.3378, 0.02, - 0.305, - 0.9267, - 0.2983, - 0.295, - 0.9167, - 0.9133, - 0.9067, - 0.2783, - 0.275, + 0.3263, + 0.8811, + 0.3289, + 0.2967, + 0.8994, + 0.8722, + 0.8678, + 0.3386, + 0.3463, 0.02, - 0.2683, - 0.8867, - 0.255, + 0.3089, + 0.8544, + 0.355, 0.7709 ] }, @@ -4045,38 +5383,48 @@ "task_type": "task3", "seed": 11, "steps": 32, - "score": 0.6153, - "total_reward": 15.3822, + "score": 0.5881, + "total_reward": 14.703, "completion_rate": 0.7, "detection_rate": 0.3333, "trust_calibration": 0.743, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.9867, - 0.3583, - 0.9767, - 0.3483, - 0.9667, - 0.9633, + 0.8756, + 0.9233, + 0.9211, + 0.3459, + 0.8844, + 0.3622, + 0.9078, + 0.9306, 0.02, - 0.9533, - 0.95, - 0.9467, - 0.9433, - 0.9367, - 0.9333, - 0.9222, - 0.0733, - 0.295, - 0.2917, - 0.9133, + 0.8689, + 0.8967, + 0.8944, + 0.8722, + 0.8878, + 0.8856, + 0.9228, + 0.0819, + 0.2967, + 0.3244, + 0.8722, 0.02, - 0.2783, - 0.2717, + 0.3356, + 0.3011, 0.01 ] }, @@ -4085,37 +5433,47 @@ "task_type": "task3", "seed": 12, "steps": 36, - "score": 0.6593, - "total_reward": 15.8238, + "score": 0.6249, + "total_reward": 14.9974, "completion_rate": 0.75, "detection_rate": 0.3333, "trust_calibration": 0.716, "adversarial_detections": 1, "adversarial_poisonings": 2, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.3617, - 0.9833, - 0.9767, - 0.9733, - 0.9633, - 0.02, - 0.3317, - 0.3283, - 0.325, - 0.9433, - 0.9367, - 0.9267, - 0.9233, - 0.9167, - 0.9067, - 0.2783, - 0.05, + 0.9528, 0.8933, - 0.8822, - 0.8833, + 0.3411, + 0.9189, + 0.8644, + 0.9122, + 0.8756, + 0.02, + 0.3441, + 0.3189, + 0.3167, + 0.8622, + 0.8878, + 0.8511, + 0.8789, + 0.8244, + 0.8878, + 0.2856, + 0.0663, + 0.8589, + 0.8961, + 0.8772, 0.01 ] }, @@ -4124,36 +5482,46 @@ "task_type": "task3", "seed": 13, "steps": 38, - "score": 0.83, - "total_reward": 19.0902, + "score": 0.7872, + "total_reward": 18.1053, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.64, "adversarial_detections": 1, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.3417, - 0.9633, - 0.9533, + 0.9278, + 0.8911, + 0.8667, + 0.9122, + 0.3278, + 0.9056, + 0.8689, 0.02, - 0.9433, - 0.9367, - 0.93, - 0.9233, - 0.92, - 0.9133, - 0.91, - 0.9033, - 0.9, - 0.8967, - 0.89, - 0.8788, - 0.255, + 0.8622, + 0.8378, + 0.8533, + 0.8289, + 0.8767, + 0.8722, + 0.895, + 0.8856, + 0.8633, + 0.8611, + 0.9017, + 0.8939, + 0.3, 0.8081 ] }, @@ -4162,39 +5530,49 @@ "task_type": "task3", "seed": 14, "steps": 35, - "score": 0.7406, - "total_reward": 19.2567, + "score": 0.6988, + "total_reward": 18.1679, "completion_rate": 0.95, "detection_rate": 0.0, "trust_calibration": 0.661, "adversarial_detections": 0, "adversarial_poisonings": 2, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.9833, - 0.9767, - 0.3483, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9533, - 0.9433, - 0.94, - 0.3117, - 0.9333, - 0.93, - 0.3017, - 0.92, - 0.9133, + 0.9278, + 0.8733, + 0.8889, + 0.8844, + 0.3322, 0.91, - 0.0567, - 0.9, - 0.2717, - 0.8933, - 0.8867, + 0.9078, + 0.9056, + 0.9033, + 0.8989, + 0.8622, + 0.89, + 0.3408, + 0.8856, + 0.8833, + 0.3011, + 0.8267, + 0.8372, + 0.87, + 0.0708, + 0.8833, + 0.2811, + 0.8839, + 0.8544, 0.01 ] }, @@ -4203,41 +5581,51 @@ "task_type": "task3", "seed": 15, "steps": 38, - "score": 0.711, - "total_reward": 19.9066, + "score": 0.68, + "total_reward": 19.0388, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.774, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3717, - 0.99, - 0.99, - 0.9833, - 0.355, - 0.3517, - 0.9733, - 0.97, - 0.9633, - 0.335, - 0.3317, - 0.3283, - 0.9467, - 0.94, - 0.3117, - 0.93, + 0.3548, + 0.9256, 0.9233, - 0.92, - 0.9167, - 0.2883, - 0.285, - 0.9033, - 0.8967, + 0.8689, + 0.3667, + 0.3644, + 0.8922, + 0.935, + 0.8556, + 0.3533, + 0.3211, + 0.3189, + 0.8944, 0.89, - 0.255, + 0.3378, + 0.8333, + 0.8789, 0.8767, + 0.8744, + 0.2922, + 0.333, + 0.8656, + 0.8261, + 0.8567, + 0.3, + 0.8478, 0.8066 ] }, @@ -4246,39 +5634,49 @@ "task_type": "task3", "seed": 16, "steps": 33, - "score": 0.6242, - "total_reward": 16.23, + "score": 0.5966, + "total_reward": 15.511, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.691, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.9867, - 0.9833, - 0.3517, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3044, 0.02, - 0.97, + 0.935, 0.02, - 0.3383, - 0.9567, - 0.95, - 0.3217, + 0.3256, + 0.8711, + 0.8967, + 0.3444, 0.02, - 0.94, - 0.9333, - 0.93, - 0.3017, - 0.2983, - 0.9167, - 0.9067, - 0.9, - 0.2717, - 0.8933, + 0.89, + 0.8556, + 0.8833, + 0.3311, + 0.3289, + 0.8744, + 0.8878, + 0.8633, + 0.3141, + 0.8589, 0.01 ] }, @@ -4287,40 +5685,50 @@ "task_type": "task3", "seed": 17, "steps": 35, - "score": 0.6167, - "total_reward": 17.2669, + "score": 0.5891, + "total_reward": 16.4939, "completion_rate": 0.75, "detection_rate": 0.0, "trust_calibration": 0.795, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, + 0.9278, 0.02, - 0.365, - 0.3583, - 0.9767, - 0.3483, - 0.97, - 0.3383, - 0.96, - 0.9567, - 0.9533, - 0.325, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.9233, - 0.295, - 0.2883, + 0.3433, + 0.3319, + 0.8644, + 0.3622, 0.91, - 0.2817, + 0.3286, 0.9033, - 0.05, - 0.8933, - 0.89, + 0.9011, + 0.8789, + 0.3697, + 0.9194, + 0.84, + 0.8856, + 0.8811, + 0.8789, + 0.3597, + 0.2692, + 0.87, + 0.2878, + 0.8656, + 0.0663, + 0.8239, + 0.8817, 0.02, 0.4835 ] @@ -4330,39 +5738,49 @@ "task_type": "task3", "seed": 18, "steps": 35, - "score": 0.6817, - "total_reward": 17.0431, + "score": 0.6548, + "total_reward": 16.3705, "completion_rate": 0.75, "detection_rate": 1.0, "trust_calibration": 0.573, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.02, - 0.98, - 0.3517, - 0.3483, - 0.97, - 0.9633, - 0.96, - 0.9533, - 0.95, - 0.9433, - 0.94, - 0.02, - 0.93, + 0.9278, 0.9233, - 0.295, - 0.2883, - 0.2817, + 0.02, + 0.9167, + 0.3644, + 0.3622, + 0.91, + 0.8556, 0.9033, + 0.8689, 0.8967, - 0.2683, - 0.265, - 0.7281 + 0.8922, + 0.915, + 0.02, + 0.8533, + 0.8789, + 0.2967, + 0.3422, + 0.3078, + 0.8656, + 0.8611, + 0.2789, + 0.3297, + 0.7281 ] }, { @@ -4370,38 +5788,48 @@ "task_type": "task3", "seed": 19, "steps": 38, - "score": 0.7253, - "total_reward": 18.1333, + "score": 0.6912, + "total_reward": 17.2799, "completion_rate": 0.9, "detection_rate": 0.0, "trust_calibration": 0.834, "adversarial_detections": 0, "adversarial_poisonings": 1, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3683, - 0.9867, - 0.9833, - 0.9767, - 0.9733, - 0.345, + 0.3156, + 0.9211, + 0.9189, + 0.9144, + 0.9122, + 0.33, 0.02, - 0.96, - 0.9567, - 0.95, - 0.9433, - 0.94, - 0.9333, - 0.9267, - 0.9233, 0.9133, - 0.91, - 0.2817, + 0.9261, 0.8967, - 0.8933, - 0.8867, - 0.88, - 0.2517, + 0.8422, + 0.89, + 0.8356, + 0.8511, + 0.8789, + 0.8922, + 0.87, + 0.3178, + 0.8811, + 0.8589, + 0.8544, + 0.87, + 0.3108, 0.01 ] }, @@ -4410,39 +5838,49 @@ "task_type": "task3", "seed": 0, "steps": 43, - "score": 0.7354, - "total_reward": 19.1208, + "score": 0.7114, + "total_reward": 18.4969, "completion_rate": 0.85, "detection_rate": 1.0, "trust_calibration": 0.729, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.3617, - 0.3583, - 0.355, - 0.3517, - 0.97, - 0.9633, - 0.9567, - 0.325, - 0.9433, - 0.9367, - 0.93, - 0.9233, - 0.9167, - 0.285, - 0.2783, - 0.8967, - 0.265, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3344, + 0.91, + 0.9056, + 0.9011, + 0.3267, + 0.8922, + 0.8878, 0.8833, - 0.8767, - 0.87, - 0.8633, + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8611, + 0.2867, + 0.8522, + 0.8478, + 0.8433, + 0.8389, 0.7841 ] }, @@ -4451,38 +5889,48 @@ "task_type": "task3", "seed": 1, "steps": 29, - "score": 0.7324, - "total_reward": 18.3102, + "score": 0.7083, + "total_reward": 17.707, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.721, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.3617, - 0.3583, - 0.355, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.95, - 0.0967, - 0.0933, - 0.09, - 0.0867, - 0.9255, - 0.9188, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, 0.9122, - 0.9055, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, 0.6632 ] }, @@ -4491,38 +5939,48 @@ "task_type": "task3", "seed": 2, "steps": 29, - "score": 0.7215, - "total_reward": 18.0385, + "score": 0.6919, + "total_reward": 17.2983, "completion_rate": 0.85, "detection_rate": 0.4444, "trust_calibration": 0.561, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3717, - 0.3683, - 0.99, - 0.9867, - 0.9833, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.1, - 0.0967, - 0.0933, - 0.09, - 0.0867, - 0.9255, - 0.9188, + 0.3778, + 0.3456, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, 0.9122, - 0.9055, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, 0.6065 ] }, @@ -4531,35 +5989,45 @@ "task_type": "task3", "seed": 3, "steps": 42, - "score": 0.8954, - "total_reward": 19.6992, + "score": 0.8546, + "total_reward": 18.8008, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.843, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.3017, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8933, - 0.8867, - 0.88, - 0.8733, - 0.8667, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, 0.8675 ] }, @@ -4568,38 +6036,48 @@ "task_type": "task3", "seed": 4, "steps": 29, - "score": 0.7484, - "total_reward": 18.711, + "score": 0.7165, + "total_reward": 17.9128, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.721, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.9867, - 0.9833, - 0.98, - 0.3517, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.1, - 0.0967, - 0.0933, - 0.09, - 0.0867, - 0.9255, - 0.9188, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.3414, 0.9122, - 0.9055, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, 0.664 ] }, @@ -4608,37 +6086,47 @@ "task_type": "task3", "seed": 5, "steps": 46, - "score": 0.7857, - "total_reward": 18.8579, + "score": 0.7558, + "total_reward": 18.1385, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.832, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.3483, - 0.9667, - 0.335, - 0.9533, - 0.9467, - 0.315, - 0.9333, - 0.3017, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8933, - 0.8867, - 0.88, - 0.8733, - 0.2417, - 0.86, - 0.8533, + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, 0.8229 ] }, @@ -4647,37 +6135,47 @@ "task_type": "task3", "seed": 6, "steps": 27, - "score": 0.7316, - "total_reward": 17.5574, + "score": 0.6991, + "total_reward": 16.778, "completion_rate": 0.85, "detection_rate": 0.4, "trust_calibration": 0.725, "adversarial_detections": 4, "adversarial_poisonings": 6, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.9867, - 0.9833, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.1033, - 0.1, - 0.0967, - 0.0933, - 0.09, - 0.0867, - 0.9255, - 0.9188, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, 0.6387 ] }, @@ -4686,38 +6184,48 @@ "task_type": "task3", "seed": 7, "steps": 42, - "score": 0.8057, - "total_reward": 20.1428, + "score": 0.7756, + "total_reward": 19.3902, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.365, - 0.9867, - 0.3583, - 0.355, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.275, - 0.8933, - 0.8867, - 0.255, - 0.8733, - 0.8667, + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.3919, + 0.3997, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, 0.8478 ] }, @@ -4726,37 +6234,47 @@ "task_type": "task3", "seed": 8, "steps": 44, - "score": 0.8456, - "total_reward": 20.2937, + "score": 0.809, + "total_reward": 19.4157, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.853, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.3683, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.335, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8933, - 0.8867, - 0.88, - 0.2483, - 0.8667, - 0.86, + 0.9278, + 0.3526, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8367, 0.8654 ] }, @@ -4765,38 +6283,48 @@ "task_type": "task3", "seed": 9, "steps": 40, - "score": 0.8106, - "total_reward": 20.2645, + "score": 0.782, + "total_reward": 19.5499, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.837, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.3617, - 0.9833, - 0.355, - 0.3517, - 0.3483, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.2683, - 0.8867, - 0.88, - 0.8733, + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, 0.8528 ] }, @@ -4805,38 +6333,48 @@ "task_type": "task3", "seed": 10, "steps": 31, - "score": 0.7381, - "total_reward": 18.4523, + "score": 0.712, + "total_reward": 17.8008, "completion_rate": 0.85, "detection_rate": 0.625, "trust_calibration": 0.448, "adversarial_detections": 5, "adversarial_poisonings": 3, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.9867, - 0.9833, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.335, - 0.3317, - 0.9533, - 0.1, - 0.0967, - 0.0933, - 0.9322, - 0.9255, - 0.9188, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, 0.9122, - 0.9055, - 0.2783, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.3281, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.3356, 0.6281 ] }, @@ -4845,37 +6383,47 @@ "task_type": "task3", "seed": 11, "steps": 40, - "score": 0.8029, - "total_reward": 19.2699, + "score": 0.7732, + "total_reward": 18.5566, "completion_rate": 0.9, "detection_rate": 1.0, "trust_calibration": 0.835, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.3617, - 0.3583, - 0.355, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.2817, - 0.275, - 0.8933, - 0.8867, - 0.88, - 0.8733, + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.2978, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.8456, 0.8349 ] }, @@ -4884,35 +6432,45 @@ "task_type": "task3", "seed": 12, "steps": 42, - "score": 0.8954, - "total_reward": 19.6992, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.2683, - 0.8867, - 0.88, - 0.8733, - 0.8667, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, 0.8676 ] }, @@ -4921,35 +6479,45 @@ "task_type": "task3", "seed": 13, "steps": 39, - "score": 0.8751, - "total_reward": 19.2519, + "score": 0.833, + "total_reward": 18.3252, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.811, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.3683, - 0.365, - 0.9833, - 0.9767, - 0.97, - 0.9633, - 0.9567, - 0.95, - 0.9433, - 0.9367, - 0.93, - 0.9233, - 0.9167, + 0.9278, + 0.3456, + 0.3433, + 0.9189, + 0.9144, 0.91, - 0.9033, + 0.9056, + 0.9011, 0.8967, - 0.89, + 0.8922, + 0.8878, 0.8833, - 0.8767, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, 0.8485 ] }, @@ -4958,39 +6526,49 @@ "task_type": "task3", "seed": 14, "steps": 29, - "score": 0.715, - "total_reward": 18.5907, + "score": 0.6889, + "total_reward": 17.9127, "completion_rate": 0.85, "detection_rate": 0.5, "trust_calibration": 0.609, "adversarial_detections": 4, "adversarial_poisonings": 4, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.365, - 0.3617, - 0.3583, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.325, - 0.9467, - 0.9433, - 0.09, - 0.0867, - 0.0833, - 0.08, - 0.9188, + 0.9278, + 0.9256, + 0.3763, + 0.3841, + 0.3689, + 0.9167, + 0.9144, 0.9122, - 0.9055, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, 0.6353 ] }, @@ -4999,40 +6577,50 @@ "task_type": "task3", "seed": 15, "steps": 30, - "score": 0.7145, - "total_reward": 19.2907, + "score": 0.6847, + "total_reward": 18.4869, "completion_rate": 0.9, "detection_rate": 0.4444, "trust_calibration": 0.635, "adversarial_detections": 4, "adversarial_poisonings": 5, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3717, - 0.99, - 0.99, - 0.9867, - 0.3583, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.3417, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.95, - 0.9467, - 0.9433, - 0.09, - 0.0867, - 0.0833, - 0.08, - 0.0767, - 0.9155, - 0.9088, - 0.9022, + 0.3608, + 0.9256, + 0.9233, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.0841, + 0.9184, + 0.9139, + 0.9095, 0.6404 ] }, @@ -5041,35 +6629,45 @@ "task_type": "task3", "seed": 16, "steps": 42, - "score": 0.8954, - "total_reward": 19.6992, + "score": 0.8546, + "total_reward": 18.8009, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.844, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.2683, - 0.8867, - 0.88, - 0.8733, - 0.8667, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, 0.8676 ] }, @@ -5078,37 +6676,47 @@ "task_type": "task3", "seed": 17, "steps": 46, - "score": 0.8394, - "total_reward": 20.1455, + "score": 0.8048, + "total_reward": 19.316, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.842, "adversarial_detections": 0, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.355, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.2683, - 0.8867, - 0.255, - 0.8733, - 0.8667, - 0.86, - 0.8533, + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8367, + 0.8322, 0.8605 ] }, @@ -5117,37 +6725,47 @@ "task_type": "task3", "seed": 18, "steps": 26, - "score": 0.7313, - "total_reward": 17.5514, + "score": 0.6967, + "total_reward": 16.7213, "completion_rate": 0.85, "detection_rate": 0.3333, "trust_calibration": 0.701, "adversarial_detections": 3, "adversarial_poisonings": 6, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.99, - 0.99, - 0.9867, - 0.9833, - 0.98, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.1, - 0.0967, - 0.0933, - 0.09, - 0.0867, - 0.0833, - 0.9222, - 0.9155, + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, 0.6149 ] }, @@ -5156,34 +6774,44 @@ "task_type": "task3", "seed": 19, "steps": 20, - "score": 0.6575, - "total_reward": 13.8067, + "score": 0.6193, + "total_reward": 13.0053, "completion_rate": 0.65, "detection_rate": 0.0, "trust_calibration": 0.576, "adversarial_detections": 0, "adversarial_poisonings": 5, "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3717, - 0.99, - 0.99, - 0.9867, - 0.9833, - 0.355, - 0.9767, - 0.9733, - 0.97, - 0.9667, - 0.9633, - 0.96, - 0.9567, - 0.9533, - 0.95, - 0.0967, - 0.0933, - 0.09, - 0.0867, + 0.3478, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3437, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, 0.01 ] }, @@ -5192,35 +6820,45 @@ "task_type": "task3", "seed": 0, "steps": 42, - "score": 0.8952, - "total_reward": 19.6951, + "score": 0.867, + "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.3017, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8904 ] }, @@ -5229,34 +6867,44 @@ "task_type": "task3", "seed": 1, "steps": 40, - "score": 0.9266, - "total_reward": 19.459, + "score": 0.8977, + "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9022, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.8938 ] }, @@ -5265,35 +6913,45 @@ "task_type": "task3", "seed": 2, "steps": 42, - "score": 0.8659, - "total_reward": 19.0509, + "score": 0.8422, + "total_reward": 18.5276, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.933, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3683, - 0.3617, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.3556, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8724 ] }, @@ -5302,35 +6960,45 @@ "task_type": "task3", "seed": 3, "steps": 42, - "score": 0.895, - "total_reward": 19.6902, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.3017, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8901 ] }, @@ -5339,35 +7007,45 @@ "task_type": "task3", "seed": 4, "steps": 42, - "score": 0.895, - "total_reward": 19.6903, + "score": 0.8689, + "total_reward": 19.1154, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.3283, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8901 ] }, @@ -5376,37 +7054,47 @@ "task_type": "task3", "seed": 5, "steps": 46, - "score": 0.8123, - "total_reward": 19.4948, + "score": 0.7916, + "total_reward": 18.9976, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.917, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.3483, - 0.9667, - 0.335, - 0.9533, - 0.9467, - 0.315, - 0.9333, - 0.3017, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8933, - 0.8867, - 0.8755, - 0.8688, - 0.8622, - 0.8555, - 0.8488, + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, 0.8618 ] }, @@ -5415,34 +7103,44 @@ "task_type": "task3", "seed": 6, "steps": 40, - "score": 0.9266, - "total_reward": 19.4589, + "score": 0.8977, + "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9022, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.8938 ] }, @@ -5451,34 +7149,44 @@ "task_type": "task3", "seed": 7, "steps": 40, - "score": 0.9268, - "total_reward": 19.4636, + "score": 0.8958, + "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.894 ] }, @@ -5487,36 +7195,46 @@ "task_type": "task3", "seed": 8, "steps": 44, - "score": 0.866, - "total_reward": 19.9187, + "score": 0.8405, + "total_reward": 19.3315, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.3617, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.3283, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8933, - 0.8822, - 0.8755, - 0.8688, - 0.8622, - 0.8555, + 0.9256, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, 0.8857 ] }, @@ -5525,34 +7243,44 @@ "task_type": "task3", "seed": 9, "steps": 40, - "score": 0.9268, - "total_reward": 19.4636, + "score": 0.8958, + "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.894 ] }, @@ -5561,35 +7289,45 @@ "task_type": "task3", "seed": 10, "steps": 42, - "score": 0.8657, - "total_reward": 19.0462, + "score": 0.8421, + "total_reward": 18.5263, "completion_rate": 0.95, "detection_rate": 1.0, "trust_calibration": 0.928, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.295, - 0.2883, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.3022, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.871 ] }, @@ -5598,35 +7336,45 @@ "task_type": "task3", "seed": 11, "steps": 42, - "score": 0.895, - "total_reward": 19.6903, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.355, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8901 ] }, @@ -5635,34 +7383,44 @@ "task_type": "task3", "seed": 12, "steps": 40, - "score": 0.9266, - "total_reward": 19.459, + "score": 0.8977, + "total_reward": 18.8524, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9022, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.8938 ] }, @@ -5671,34 +7429,44 @@ "task_type": "task3", "seed": 13, "steps": 40, - "score": 0.9268, - "total_reward": 19.4636, + "score": 0.8958, + "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.894 ] }, @@ -5707,36 +7475,46 @@ "task_type": "task3", "seed": 14, "steps": 44, - "score": 0.866, - "total_reward": 19.9183, + "score": 0.8405, + "total_reward": 19.3311, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.932, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.3483, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.275, - 0.8933, - 0.8822, - 0.8755, - 0.8688, - 0.8622, - 0.8555, + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, 0.8853 ] }, @@ -5745,37 +7523,47 @@ "task_type": "task3", "seed": 15, "steps": 46, - "score": 0.8393, - "total_reward": 20.1438, + "score": 0.8162, + "total_reward": 19.5883, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.93, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.3683, - 0.9867, - 0.98, - 0.9733, - 0.3417, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.3083, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8933, - 0.8867, - 0.8755, - 0.8688, - 0.8622, - 0.8555, - 0.8488, + 0.3556, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, 0.8825 ] }, @@ -5784,34 +7572,44 @@ "task_type": "task3", "seed": 16, "steps": 40, - "score": 0.9266, - "total_reward": 19.4589, + "score": 0.8977, + "total_reward": 18.8523, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9022, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.8938 ] }, @@ -5820,35 +7618,45 @@ "task_type": "task3", "seed": 17, "steps": 42, - "score": 0.8952, - "total_reward": 19.695, + "score": 0.867, + "total_reward": 19.0739, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.935, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.355, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.9, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8903 ] }, @@ -5857,34 +7665,44 @@ "task_type": "task3", "seed": 18, "steps": 40, - "score": 0.9268, - "total_reward": 19.4636, + "score": 0.8958, + "total_reward": 18.8108, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.931, "adversarial_detections": 6, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.96, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, 0.894 ] }, @@ -5893,35 +7711,45 @@ "task_type": "task3", "seed": 19, "steps": 42, - "score": 0.895, - "total_reward": 19.6903, + "score": 0.8689, + "total_reward": 19.1153, "completion_rate": 1.0, "detection_rate": 1.0, "trust_calibration": 0.934, "adversarial_detections": 7, "adversarial_poisonings": 0, "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, "rewards": [ - 0.99, - 0.9867, - 0.98, - 0.9733, - 0.9667, - 0.335, - 0.9533, - 0.9467, - 0.94, - 0.9333, - 0.9267, - 0.92, - 0.9133, - 0.9067, - 0.8955, - 0.8888, - 0.8822, - 0.8755, - 0.8688, - 0.8622, + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, 0.8901 ] }