Spaces:
Running
Running
| { | |
| "task": "all", | |
| "tasks": [ | |
| "task1", | |
| "task2", | |
| "task3" | |
| ], | |
| "episodes_per_policy": 30, | |
| "adaptive": false, | |
| "difficulty_controller": { | |
| "adaptive": true, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "difficulty_controller_by_task_policy": { | |
| "task1": { | |
| "random": {}, | |
| "heuristic": {}, | |
| "oracle_lite": {}, | |
| "trained": {} | |
| }, | |
| "task2": { | |
| "random": {}, | |
| "heuristic": {}, | |
| "oracle_lite": {}, | |
| "trained": {} | |
| }, | |
| "task3": { | |
| "random": {}, | |
| "heuristic": {}, | |
| "oracle_lite": {}, | |
| "trained": {} | |
| } | |
| }, | |
| "summary": { | |
| "random": { | |
| "episodes": 90, | |
| "avg_score": 0.6904, | |
| "avg_completion_rate": 0.8131, | |
| "avg_detection_rate": 0.7935, | |
| "avg_trust_calibration": 0.4453, | |
| "avg_steps": 26.2111 | |
| }, | |
| "heuristic": { | |
| "episodes": 90, | |
| "avg_score": 0.7817, | |
| "avg_completion_rate": 0.8918, | |
| "avg_detection_rate": 0.9178, | |
| "avg_trust_calibration": 0.4373, | |
| "avg_steps": 24.4 | |
| }, | |
| "oracle_lite": { | |
| "episodes": 90, | |
| "avg_score": 0.8405, | |
| "avg_completion_rate": 0.8687, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.5892, | |
| "avg_steps": 29.4444 | |
| }, | |
| "trained": { | |
| "episodes": 90, | |
| "avg_score": 0.788, | |
| "avg_completion_rate": 0.8979, | |
| "avg_detection_rate": 0.9437, | |
| "avg_trust_calibration": 0.4378, | |
| "avg_steps": 24.5 | |
| } | |
| }, | |
| "by_task": { | |
| "task1": { | |
| "random": { | |
| "episodes": 30, | |
| "avg_score": 0.7635, | |
| "avg_completion_rate": 0.76, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.0, | |
| "avg_steps": 15.1333 | |
| }, | |
| "heuristic": { | |
| "episodes": 30, | |
| "avg_score": 0.8504, | |
| "avg_completion_rate": 0.84, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.0, | |
| "avg_steps": 13.8333 | |
| }, | |
| "oracle_lite": { | |
| "episodes": 30, | |
| "avg_score": 0.9011, | |
| "avg_completion_rate": 0.7167, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.0, | |
| "avg_steps": 16.0 | |
| }, | |
| "trained": { | |
| "episodes": 30, | |
| "avg_score": 0.8504, | |
| "avg_completion_rate": 0.84, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.0, | |
| "avg_steps": 13.8333 | |
| } | |
| }, | |
| "task2": { | |
| "random": { | |
| "episodes": 30, | |
| "avg_score": 0.6472, | |
| "avg_completion_rate": 0.8644, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.5829, | |
| "avg_steps": 26.7667 | |
| }, | |
| "heuristic": { | |
| "episodes": 30, | |
| "avg_score": 0.7497, | |
| "avg_completion_rate": 0.9288, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.5737, | |
| "avg_steps": 23.2333 | |
| }, | |
| "oracle_lite": { | |
| "episodes": 30, | |
| "avg_score": 0.7638, | |
| "avg_completion_rate": 0.9045, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.8377, | |
| "avg_steps": 30.0 | |
| }, | |
| "trained": { | |
| "episodes": 30, | |
| "avg_score": 0.7497, | |
| "avg_completion_rate": 0.9288, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.5737, | |
| "avg_steps": 23.2333 | |
| } | |
| }, | |
| "task3": { | |
| "random": { | |
| "episodes": 30, | |
| "avg_score": 0.6606, | |
| "avg_completion_rate": 0.815, | |
| "avg_detection_rate": 0.3806, | |
| "avg_trust_calibration": 0.7531, | |
| "avg_steps": 36.7333 | |
| }, | |
| "heuristic": { | |
| "episodes": 30, | |
| "avg_score": 0.7449, | |
| "avg_completion_rate": 0.9067, | |
| "avg_detection_rate": 0.7534, | |
| "avg_trust_calibration": 0.7383, | |
| "avg_steps": 36.1333 | |
| }, | |
| "oracle_lite": { | |
| "episodes": 30, | |
| "avg_score": 0.8567, | |
| "avg_completion_rate": 0.985, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.9299, | |
| "avg_steps": 42.3333 | |
| }, | |
| "trained": { | |
| "episodes": 30, | |
| "avg_score": 0.7637, | |
| "avg_completion_rate": 0.925, | |
| "avg_detection_rate": 0.8312, | |
| "avg_trust_calibration": 0.7396, | |
| "avg_steps": 36.4333 | |
| } | |
| } | |
| }, | |
| "episodes": [ | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 0, | |
| "steps": 15, | |
| "score": 0.6569, | |
| "total_reward": 7.8825, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.914, | |
| 0.962, | |
| 0.962, | |
| 0.02, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3645, | |
| 0.02, | |
| 0.02, | |
| 0.867 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 1, | |
| "steps": 15, | |
| "score": 0.7996, | |
| "total_reward": 7.196, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.982, | |
| 0.914, | |
| 0.914, | |
| 0.962, | |
| 0.867, | |
| 0.962, | |
| 0.3165 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 2, | |
| "steps": 15, | |
| "score": 0.8129, | |
| "total_reward": 8.1294, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.369, | |
| 0.3024, | |
| 0.962, | |
| 0.962, | |
| 0.914, | |
| 0.962, | |
| 0.867, | |
| 0.867, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 3, | |
| "steps": 14, | |
| "score": 0.8084, | |
| "total_reward": 10.5095, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.962, | |
| 0.3455, | |
| 0.867, | |
| 0.962, | |
| 0.946, | |
| 0.962, | |
| 0.3455, | |
| 0.3645, | |
| 0.867, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 4, | |
| "steps": 15, | |
| "score": 0.7814, | |
| "total_reward": 8.5956, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.982, | |
| 0.962, | |
| 0.3645, | |
| 0.914, | |
| 0.962, | |
| 0.3455, | |
| 0.3136, | |
| 0.962, | |
| 0.914 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 5, | |
| "steps": 15, | |
| "score": 0.725, | |
| "total_reward": 8.7, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.914, | |
| 0.962, | |
| 0.3165, | |
| 0.3455, | |
| 0.3455, | |
| 0.867, | |
| 0.946, | |
| 0.962, | |
| 0.3455, | |
| 0.867 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 6, | |
| "steps": 15, | |
| "score": 0.8118, | |
| "total_reward": 8.1182, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.914, | |
| 0.914, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.934, | |
| 0.867, | |
| 0.3206 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 7, | |
| "steps": 15, | |
| "score": 0.9334, | |
| "total_reward": 9.334, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.867, | |
| 0.914, | |
| 0.914, | |
| 0.962, | |
| 0.867, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 8, | |
| "steps": 15, | |
| "score": 0.8425, | |
| "total_reward": 9.2675, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.369, | |
| 0.962, | |
| 0.914, | |
| 0.962, | |
| 0.962, | |
| 0.3645, | |
| 0.962, | |
| 0.982, | |
| 0.914 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 9, | |
| "steps": 15, | |
| "score": 0.7751, | |
| "total_reward": 9.3011, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.962, | |
| 0.962, | |
| 0.867, | |
| 0.3616, | |
| 0.914, | |
| 0.3645, | |
| 0.02, | |
| 0.982, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 10, | |
| "steps": 15, | |
| "score": 0.7653, | |
| "total_reward": 8.418, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.982, | |
| 0.914, | |
| 0.982, | |
| 0.962, | |
| 0.02, | |
| 0.3455, | |
| 0.3645, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 11, | |
| "steps": 15, | |
| "score": 0.8199, | |
| "total_reward": 9.8394, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.867, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.914, | |
| 0.982, | |
| 0.962, | |
| 0.982, | |
| 0.02, | |
| 0.914, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 12, | |
| "steps": 15, | |
| "score": 0.6163, | |
| "total_reward": 7.3956, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.914, | |
| 0.3455, | |
| 0.962, | |
| 0.898, | |
| 0.962, | |
| 0.914, | |
| 0.02, | |
| 0.3616, | |
| 0.3455, | |
| 0.3455 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 13, | |
| "steps": 15, | |
| "score": 0.7283, | |
| "total_reward": 6.555, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.914, | |
| 0.867, | |
| 0.962, | |
| 0.934, | |
| 0.962, | |
| 0.914, | |
| 0.02 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 14, | |
| "steps": 17, | |
| "score": 0.8867, | |
| "total_reward": 10.6405, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.867, | |
| 0.914, | |
| 0.914, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.914 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 15, | |
| "steps": 16, | |
| "score": 0.6915, | |
| "total_reward": 9.6809, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.867, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.867, | |
| 0.982, | |
| 0.3455, | |
| 0.3455, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 16, | |
| "steps": 15, | |
| "score": 0.7164, | |
| "total_reward": 9.313, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.2975, | |
| 0.02, | |
| 0.982, | |
| 0.02, | |
| 0.3455, | |
| 0.914, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 17, | |
| "steps": 15, | |
| "score": 0.6495, | |
| "total_reward": 8.4439, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.02, | |
| 0.3455, | |
| 0.3136, | |
| 0.867, | |
| 0.982, | |
| 0.962, | |
| 0.3206, | |
| 0.962, | |
| 0.962, | |
| 0.982, | |
| 0.3826 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 18, | |
| "steps": 15, | |
| "score": 0.8235, | |
| "total_reward": 9.8815, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.02, | |
| 0.962, | |
| 0.982, | |
| 0.3645, | |
| 0.962, | |
| 0.867, | |
| 0.962, | |
| 0.914, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 19, | |
| "steps": 15, | |
| "score": 0.7588, | |
| "total_reward": 8.347, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2975, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3455, | |
| 0.02, | |
| 0.93, | |
| 0.982, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 20, | |
| "steps": 15, | |
| "score": 0.6444, | |
| "total_reward": 7.7329, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3525, | |
| 0.867, | |
| 0.867, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.3686, | |
| 0.3645 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 21, | |
| "steps": 15, | |
| "score": 0.8756, | |
| "total_reward": 9.6315, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.914, | |
| 0.914, | |
| 0.962, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.914, | |
| 0.867 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 22, | |
| "steps": 15, | |
| "score": 0.6437, | |
| "total_reward": 7.0809, | |
| "completion_rate": 0.5, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3504, | |
| 0.02, | |
| 0.93, | |
| 0.914, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.2975, | |
| 0.914, | |
| 0.982 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 23, | |
| "steps": 15, | |
| "score": 0.8985, | |
| "total_reward": 10.7824, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.867, | |
| 0.962, | |
| 0.3504, | |
| 0.982, | |
| 0.962, | |
| 0.962, | |
| 0.867, | |
| 0.962, | |
| 0.962, | |
| 0.982, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 24, | |
| "steps": 16, | |
| "score": 0.6933, | |
| "total_reward": 7.6267, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2975, | |
| 0.982, | |
| 0.982, | |
| 0.3066, | |
| 0.934, | |
| 0.914, | |
| 0.962, | |
| 0.3686, | |
| 0.02, | |
| 0.93 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 25, | |
| "steps": 15, | |
| "score": 0.8266, | |
| "total_reward": 9.0928, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.914, | |
| 0.982, | |
| 0.962, | |
| 0.867, | |
| 0.962, | |
| 0.962, | |
| 0.867, | |
| 0.914, | |
| 0.962, | |
| 0.3504 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 26, | |
| "steps": 15, | |
| "score": 0.7833, | |
| "total_reward": 7.8326, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.914, | |
| 0.962, | |
| 0.982, | |
| 0.962, | |
| 0.962, | |
| 0.02, | |
| 0.3206, | |
| 0.914, | |
| 0.898 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 27, | |
| "steps": 16, | |
| "score": 0.8311, | |
| "total_reward": 9.1421, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.934, | |
| 0.3455, | |
| 0.3546, | |
| 0.962, | |
| 0.93, | |
| 0.982, | |
| 0.962, | |
| 0.93, | |
| 0.914, | |
| 0.914 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 28, | |
| "steps": 15, | |
| "score": 0.7196, | |
| "total_reward": 8.6356, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.914, | |
| 0.962, | |
| 0.962, | |
| 0.914, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3826, | |
| 0.02, | |
| 0.3165 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task1", | |
| "seed": 29, | |
| "steps": 15, | |
| "score": 0.5851, | |
| "total_reward": 7.021, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.914, | |
| 0.3455, | |
| 0.962, | |
| 0.02, | |
| 0.914, | |
| 0.3165, | |
| 0.02, | |
| 0.867, | |
| 0.962, | |
| 0.369 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 0, | |
| "steps": 13, | |
| "score": 0.753, | |
| "total_reward": 10.5415, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.3455, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 1, | |
| "steps": 12, | |
| "score": 0.7843, | |
| "total_reward": 10.196, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 2, | |
| "steps": 11, | |
| "score": 0.8612, | |
| "total_reward": 10.3345, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.369, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 3, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 4, | |
| "steps": 11, | |
| "score": 0.911, | |
| "total_reward": 10.9324, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 5, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 6, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 7, | |
| "steps": 16, | |
| "score": 0.8166, | |
| "total_reward": 9.7988, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.962, | |
| 0.3826, | |
| 0.3896, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 8, | |
| "steps": 16, | |
| "score": 0.8399, | |
| "total_reward": 8.3989, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 9, | |
| "steps": 16, | |
| "score": 0.785, | |
| "total_reward": 10.2052, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3756, | |
| 0.962, | |
| 0.3896, | |
| 0.391, | |
| 0.391, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 10, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 11, | |
| "steps": 16, | |
| "score": 0.7843, | |
| "total_reward": 10.196, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 12, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 13, | |
| "steps": 16, | |
| "score": 0.9003, | |
| "total_reward": 9.0035, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 14, | |
| "steps": 13, | |
| "score": 0.7534, | |
| "total_reward": 10.5473, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.3756, | |
| 0.962, | |
| 0.3896, | |
| 0.391, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 15, | |
| "steps": 13, | |
| "score": 0.8312, | |
| "total_reward": 11.6374, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3546, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 16, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 17, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 18, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 19, | |
| "steps": 12, | |
| "score": 0.8675, | |
| "total_reward": 11.2779, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 20, | |
| "steps": 16, | |
| "score": 0.7993, | |
| "total_reward": 8.7927, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.3756, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 21, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 22, | |
| "steps": 15, | |
| "score": 0.772, | |
| "total_reward": 12.3526, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.369, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 23, | |
| "steps": 11, | |
| "score": 0.8606, | |
| "total_reward": 10.3271, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3616, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 24, | |
| "steps": 16, | |
| "score": 0.8161, | |
| "total_reward": 9.7931, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3616, | |
| 0.982, | |
| 0.369, | |
| 0.982, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 25, | |
| "steps": 14, | |
| "score": 0.6506, | |
| "total_reward": 9.7585, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.946, | |
| 0.369, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 26, | |
| "steps": 16, | |
| "score": 0.7054, | |
| "total_reward": 11.2865, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.946, | |
| 0.982, | |
| 0.3645, | |
| 0.946, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.369, | |
| 0.369, | |
| 0.3645, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 27, | |
| "steps": 15, | |
| "score": 0.6937, | |
| "total_reward": 9.0177, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.962, | |
| 0.962, | |
| 0.3896, | |
| 0.391, | |
| 0.391, | |
| 0.391, | |
| 0.962, | |
| 0.3525, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 28, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task1", | |
| "seed": 29, | |
| "steps": 11, | |
| "score": 0.911, | |
| "total_reward": 10.9324, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 0, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 1, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 2, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3525, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 3, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 4, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 5, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 6, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 7, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 8, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 9, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 10, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 11, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 12, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 13, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 14, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 15, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 16, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 17, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 18, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 19, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 20, | |
| "steps": 16, | |
| "score": 0.7588, | |
| "total_reward": 6.8295, | |
| "completion_rate": 0.5, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 21, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 22, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 23, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3525, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 24, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 25, | |
| "steps": 16, | |
| "score": 0.7588, | |
| "total_reward": 6.8295, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 26, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 27, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 28, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task1", | |
| "seed": 29, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 0, | |
| "steps": 13, | |
| "score": 0.753, | |
| "total_reward": 10.5415, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.3455, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 1, | |
| "steps": 12, | |
| "score": 0.7843, | |
| "total_reward": 10.196, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 2, | |
| "steps": 11, | |
| "score": 0.8612, | |
| "total_reward": 10.3345, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.369, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 3, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 4, | |
| "steps": 11, | |
| "score": 0.911, | |
| "total_reward": 10.9324, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 5, | |
| "steps": 16, | |
| "score": 0.8266, | |
| "total_reward": 7.439, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 6, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 7, | |
| "steps": 16, | |
| "score": 0.8166, | |
| "total_reward": 9.7988, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.962, | |
| 0.3826, | |
| 0.3896, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 8, | |
| "steps": 16, | |
| "score": 0.8399, | |
| "total_reward": 8.3989, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 9, | |
| "steps": 16, | |
| "score": 0.785, | |
| "total_reward": 10.2052, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3756, | |
| 0.962, | |
| 0.3896, | |
| 0.391, | |
| 0.391, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 10, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 11, | |
| "steps": 16, | |
| "score": 0.7843, | |
| "total_reward": 10.196, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 12, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 13, | |
| "steps": 16, | |
| "score": 0.9003, | |
| "total_reward": 9.0035, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 14, | |
| "steps": 13, | |
| "score": 0.7534, | |
| "total_reward": 10.5473, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.3756, | |
| 0.962, | |
| 0.3896, | |
| 0.391, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 15, | |
| "steps": 13, | |
| "score": 0.8312, | |
| "total_reward": 11.6374, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3546, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 16, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 17, | |
| "steps": 16, | |
| "score": 0.8943, | |
| "total_reward": 8.0485, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 18, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 19, | |
| "steps": 12, | |
| "score": 0.8675, | |
| "total_reward": 11.2779, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 20, | |
| "steps": 16, | |
| "score": 0.7993, | |
| "total_reward": 8.7927, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.3756, | |
| 0.962, | |
| 0.962, | |
| 0.3525, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 21, | |
| "steps": 16, | |
| "score": 0.962, | |
| "total_reward": 8.658, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 22, | |
| "steps": 15, | |
| "score": 0.772, | |
| "total_reward": 12.3526, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.369, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 23, | |
| "steps": 11, | |
| "score": 0.8606, | |
| "total_reward": 10.3271, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3616, | |
| 0.3455, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 24, | |
| "steps": 16, | |
| "score": 0.8161, | |
| "total_reward": 9.7931, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.3616, | |
| 0.982, | |
| 0.369, | |
| 0.982, | |
| 0.3645, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 25, | |
| "steps": 14, | |
| "score": 0.6506, | |
| "total_reward": 9.7585, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.946, | |
| 0.369, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.982, | |
| 0.3645, | |
| 0.3645 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 26, | |
| "steps": 16, | |
| "score": 0.7054, | |
| "total_reward": 11.2865, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.982, | |
| 0.982, | |
| 0.982, | |
| 0.3645, | |
| 0.946, | |
| 0.982, | |
| 0.3645, | |
| 0.946, | |
| 0.982, | |
| 0.3645, | |
| 0.3645, | |
| 0.369, | |
| 0.369, | |
| 0.3645, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 27, | |
| "steps": 15, | |
| "score": 0.6937, | |
| "total_reward": 9.0177, | |
| "completion_rate": 0.6, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.3686, | |
| 0.962, | |
| 0.962, | |
| 0.3896, | |
| 0.391, | |
| 0.391, | |
| 0.391, | |
| 0.962, | |
| 0.3525, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 28, | |
| "steps": 10, | |
| "score": 0.962, | |
| "total_reward": 10.582, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task1", | |
| "seed": 29, | |
| "steps": 11, | |
| "score": 0.911, | |
| "total_reward": 10.9324, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.0, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.962, | |
| 0.3504, | |
| 0.962, | |
| 0.962, | |
| 0.962 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 0, | |
| "steps": 25, | |
| "score": 0.5298, | |
| "total_reward": 9.5365, | |
| "completion_rate": 0.667, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.461, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9083, | |
| 0.925, | |
| 0.9167, | |
| 0.02, | |
| 0.8917, | |
| 0.8833, | |
| 0.8667, | |
| 0.2233, | |
| 0.02, | |
| 0.02, | |
| 0.725, | |
| 0.8083, | |
| 0.6917, | |
| 0.775, | |
| 0.1317, | |
| 0.115, | |
| 0.5949 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 1, | |
| "steps": 31, | |
| "score": 0.7252, | |
| "total_reward": 13.054, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.569, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9833, | |
| 0.8833, | |
| 0.8583, | |
| 0.875, | |
| 0.7583, | |
| 0.8417, | |
| 0.165, | |
| 0.7833, | |
| 0.8, | |
| 0.6833, | |
| 0.735, | |
| 0.1217, | |
| 0.7167, | |
| 0.7, | |
| 0.7083, | |
| 0.849 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 2, | |
| "steps": 27, | |
| "score": 0.6551, | |
| "total_reward": 11.7913, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.579, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3517, | |
| 0.262, | |
| 0.9167, | |
| 0.9083, | |
| 0.8583, | |
| 0.875, | |
| 0.7583, | |
| 0.7417, | |
| 0.825, | |
| 0.7083, | |
| 0.8, | |
| 0.6833, | |
| 0.6667, | |
| 0.75, | |
| 0.0967, | |
| 0.0983, | |
| 0.766 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 3, | |
| "steps": 28, | |
| "score": 0.6154, | |
| "total_reward": 12.9233, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.586, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.925, | |
| 0.2717, | |
| 0.8, | |
| 0.8917, | |
| 0.9333, | |
| 0.875, | |
| 0.2217, | |
| 0.2233, | |
| 0.7417, | |
| 0.8333, | |
| 0.1483, | |
| 0.7917, | |
| 0.1383, | |
| 0.7333, | |
| 0.75, | |
| 0.7083, | |
| 0.123, | |
| 0.7686 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 4, | |
| "steps": 28, | |
| "score": 0.6575, | |
| "total_reward": 12.4928, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.458, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9833, | |
| 0.925, | |
| 0.2817, | |
| 0.8667, | |
| 0.8833, | |
| 0.23, | |
| 0.2113, | |
| 0.8417, | |
| 0.8, | |
| 0.775, | |
| 0.7917, | |
| 0.775, | |
| 0.7583, | |
| 0.75, | |
| 0.1067, | |
| 0.1313, | |
| 0.7235 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 5, | |
| "steps": 26, | |
| "score": 0.6206, | |
| "total_reward": 12.4128, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.408, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9, | |
| 0.9167, | |
| 0.24, | |
| 0.2467, | |
| 0.2383, | |
| 0.7667, | |
| 0.9083, | |
| 0.85, | |
| 0.1967, | |
| 0.725, | |
| 0.8667, | |
| 0.165, | |
| 0.7917, | |
| 0.7833, | |
| 0.7667, | |
| 0.02, | |
| 0.65, | |
| 0.7061 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 6, | |
| "steps": 31, | |
| "score": 0.7065, | |
| "total_reward": 12.7163, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.576, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9083, | |
| 0.8833, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8833, | |
| 0.7417, | |
| 0.188, | |
| 0.8667, | |
| 0.13, | |
| 0.7583, | |
| 0.7667, | |
| 0.7583, | |
| 0.7083, | |
| 0.7, | |
| 0.8083 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 7, | |
| "steps": 25, | |
| "score": 0.7366, | |
| "total_reward": 12.5218, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.776, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8333, | |
| 0.8917, | |
| 0.875, | |
| 0.8917, | |
| 0.775, | |
| 0.8583, | |
| 0.85, | |
| 0.8333, | |
| 0.825, | |
| 0.1817, | |
| 0.8, | |
| 0.6833, | |
| 0.825, | |
| 0.02, | |
| 0.7583, | |
| 0.8784 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 8, | |
| "steps": 25, | |
| "score": 0.7329, | |
| "total_reward": 13.9253, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.425, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.335, | |
| 0.9167, | |
| 0.8667, | |
| 0.8833, | |
| 0.875, | |
| 0.2767, | |
| 0.8583, | |
| 0.9, | |
| 0.8, | |
| 0.8167, | |
| 0.7, | |
| 0.1233, | |
| 0.775, | |
| 0.7667, | |
| 0.8083, | |
| 0.75, | |
| 0.7987 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 9, | |
| "steps": 27, | |
| "score": 0.7062, | |
| "total_reward": 12.712, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.597, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9333, | |
| 0.925, | |
| 0.8083, | |
| 0.278, | |
| 0.8583, | |
| 0.925, | |
| 0.02, | |
| 0.2233, | |
| 0.8417, | |
| 0.825, | |
| 0.8167, | |
| 0.735, | |
| 0.6833, | |
| 0.7333, | |
| 0.65, | |
| 0.8157 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 10, | |
| "steps": 26, | |
| "score": 0.5723, | |
| "total_reward": 12.0174, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.855, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.925, | |
| 0.9667, | |
| 0.8667, | |
| 0.2483, | |
| 0.8667, | |
| 0.02, | |
| 0.205, | |
| 0.8917, | |
| 0.825, | |
| 0.8167, | |
| 0.775, | |
| 0.1567, | |
| 0.02, | |
| 0.153, | |
| 0.7667, | |
| 0.8083, | |
| 0.105, | |
| 0.1067, | |
| 0.8194 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 11, | |
| "steps": 23, | |
| "score": 0.7197, | |
| "total_reward": 12.955, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.57, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8333, | |
| 0.925, | |
| 0.9167, | |
| 0.2703, | |
| 0.8667, | |
| 0.2483, | |
| 0.8667, | |
| 0.9083, | |
| 0.02, | |
| 0.8083, | |
| 0.825, | |
| 0.8167, | |
| 0.7683, | |
| 0.7917, | |
| 0.7833, | |
| 0.7417, | |
| 0.8063 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 12, | |
| "steps": 30, | |
| "score": 0.6047, | |
| "total_reward": 12.0935, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.344, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9, | |
| 0.2717, | |
| 0.9083, | |
| 0.8267, | |
| 0.8833, | |
| 0.8333, | |
| 0.02, | |
| 0.2197, | |
| 0.1883, | |
| 0.18, | |
| 0.7833, | |
| 0.7917, | |
| 0.7417, | |
| 0.7583, | |
| 0.65, | |
| 0.7, | |
| 0.0633, | |
| 0.6839 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 13, | |
| "steps": 30, | |
| "score": 0.6649, | |
| "total_reward": 11.9681, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.271, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.8917, | |
| 0.8, | |
| 0.8833, | |
| 0.2067, | |
| 0.8583, | |
| 0.8083, | |
| 0.02, | |
| 0.7833, | |
| 0.6917, | |
| 0.75, | |
| 0.6583, | |
| 0.75, | |
| 0.7333, | |
| 0.09, | |
| 0.7, | |
| 0.7014 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 14, | |
| "steps": 26, | |
| "score": 0.7146, | |
| "total_reward": 13.5771, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.416, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.825, | |
| 0.8833, | |
| 0.8667, | |
| 0.2383, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8333, | |
| 0.7833, | |
| 0.8, | |
| 0.1797, | |
| 0.7833, | |
| 0.775, | |
| 0.1217, | |
| 0.65, | |
| 0.7957 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 15, | |
| "steps": 27, | |
| "score": 0.5573, | |
| "total_reward": 12.2603, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.606, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3037, | |
| 0.9333, | |
| 0.925, | |
| 0.8083, | |
| 0.265, | |
| 0.2567, | |
| 0.8433, | |
| 0.925, | |
| 0.7583, | |
| 0.215, | |
| 0.1967, | |
| 0.1883, | |
| 0.8167, | |
| 0.8, | |
| 0.1567, | |
| 0.675, | |
| 0.7583, | |
| 0.75, | |
| 0.7417, | |
| 0.0883, | |
| 0.732 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 16, | |
| "steps": 23, | |
| "score": 0.5543, | |
| "total_reward": 11.0864, | |
| "completion_rate": 0.733, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.437, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.2217, | |
| 0.02, | |
| 0.925, | |
| 0.02, | |
| 0.2133, | |
| 0.8167, | |
| 0.825, | |
| 0.1817, | |
| 0.02, | |
| 0.8, | |
| 0.7583, | |
| 0.775, | |
| 0.1317, | |
| 0.6298 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 17, | |
| "steps": 27, | |
| "score": 0.5694, | |
| "total_reward": 11.9565, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.688, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.02, | |
| 0.28, | |
| 0.2613, | |
| 0.7917, | |
| 0.2483, | |
| 0.875, | |
| 0.2213, | |
| 0.85, | |
| 0.8417, | |
| 0.7933, | |
| 0.233, | |
| 0.8667, | |
| 0.7, | |
| 0.7833, | |
| 0.7667, | |
| 0.7583, | |
| 0.168, | |
| 0.0703, | |
| 0.7609 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 18, | |
| "steps": 23, | |
| "score": 0.6662, | |
| "total_reward": 11.3256, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.602, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.925, | |
| 0.02, | |
| 0.9, | |
| 0.2567, | |
| 0.2483, | |
| 0.875, | |
| 0.7583, | |
| 0.85, | |
| 0.8083, | |
| 0.825, | |
| 0.8083, | |
| 0.85, | |
| 0.02, | |
| 0.75, | |
| 0.7306 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 19, | |
| "steps": 27, | |
| "score": 0.7003, | |
| "total_reward": 12.6055, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.625, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2633, | |
| 0.9167, | |
| 0.9083, | |
| 0.8917, | |
| 0.8833, | |
| 0.23, | |
| 0.02, | |
| 0.825, | |
| 0.8917, | |
| 0.825, | |
| 0.7083, | |
| 0.8, | |
| 0.6833, | |
| 0.7417, | |
| 0.7583, | |
| 0.7083, | |
| 0.8255 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 20, | |
| "steps": 32, | |
| "score": 0.5557, | |
| "total_reward": 12.7811, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.636, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2983, | |
| 0.8167, | |
| 0.8, | |
| 0.8917, | |
| 0.8833, | |
| 0.237, | |
| 0.8667, | |
| 0.85, | |
| 0.2037, | |
| 0.2213, | |
| 0.19, | |
| 0.8083, | |
| 0.8, | |
| 0.1997, | |
| 0.7833, | |
| 0.775, | |
| 0.6583, | |
| 0.0787, | |
| 0.725, | |
| 0.1347, | |
| 0.7083, | |
| 0.7861 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 21, | |
| "steps": 23, | |
| "score": 0.7485, | |
| "total_reward": 12.7247, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.523, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.8917, | |
| 0.875, | |
| 0.8917, | |
| 0.2383, | |
| 0.8667, | |
| 0.8583, | |
| 0.8167, | |
| 0.725, | |
| 0.8083, | |
| 0.8, | |
| 0.02, | |
| 0.7833, | |
| 0.7667, | |
| 0.7897 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 22, | |
| "steps": 31, | |
| "score": 0.4891, | |
| "total_reward": 10.2706, | |
| "completion_rate": 0.667, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.544, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3037, | |
| 0.02, | |
| 0.8917, | |
| 0.8667, | |
| 0.2483, | |
| 0.24, | |
| 0.8667, | |
| 0.18, | |
| 0.8083, | |
| 0.875, | |
| 0.1817, | |
| 0.165, | |
| 0.7583, | |
| 0.13, | |
| 0.1217, | |
| 0.75, | |
| 0.0983, | |
| 0.725, | |
| 0.7167, | |
| 0.6236 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 23, | |
| "steps": 20, | |
| "score": 0.7708, | |
| "total_reward": 13.1031, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.755, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8333, | |
| 0.925, | |
| 0.2787, | |
| 0.9583, | |
| 0.9, | |
| 0.8917, | |
| 0.775, | |
| 0.8667, | |
| 0.85, | |
| 0.8917, | |
| 0.825, | |
| 0.8167, | |
| 0.8083, | |
| 0.8, | |
| 0.7917, | |
| 0.8711 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 24, | |
| "steps": 31, | |
| "score": 0.5498, | |
| "total_reward": 9.8972, | |
| "completion_rate": 0.667, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.809, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2633, | |
| 0.975, | |
| 0.9667, | |
| 0.243, | |
| 0.9083, | |
| 0.8333, | |
| 0.85, | |
| 0.2297, | |
| 0.02, | |
| 0.7917, | |
| 0.02, | |
| 0.1647, | |
| 0.7417, | |
| 0.09, | |
| 0.7333, | |
| 0.7, | |
| 0.7165 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 25, | |
| "steps": 23, | |
| "score": 0.791, | |
| "total_reward": 13.4466, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.623, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9, | |
| 0.9667, | |
| 0.9083, | |
| 0.7917, | |
| 0.8833, | |
| 0.875, | |
| 0.7583, | |
| 0.8167, | |
| 0.8333, | |
| 0.187, | |
| 0.8167, | |
| 0.8083, | |
| 0.6917, | |
| 0.8333, | |
| 0.775, | |
| 0.8679 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 26, | |
| "steps": 29, | |
| "score": 0.5823, | |
| "total_reward": 11.646, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.895, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9, | |
| 0.9167, | |
| 0.9583, | |
| 0.9, | |
| 0.8917, | |
| 0.02, | |
| 0.2297, | |
| 0.8167, | |
| 0.76, | |
| 0.8083, | |
| 0.7917, | |
| 0.1813, | |
| 0.13, | |
| 0.0953, | |
| 0.75, | |
| 0.7917, | |
| 0.1413, | |
| 0.08, | |
| 0.8333 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 27, | |
| "steps": 29, | |
| "score": 0.6402, | |
| "total_reward": 12.1644, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.703, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9583, | |
| 0.28, | |
| 0.2847, | |
| 0.9083, | |
| 0.8667, | |
| 0.2483, | |
| 0.8667, | |
| 0.825, | |
| 0.8083, | |
| 0.7917, | |
| 0.825, | |
| 0.02, | |
| 0.75, | |
| 0.8167, | |
| 0.65, | |
| 0.0703, | |
| 0.725, | |
| 0.8094 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 28, | |
| "steps": 23, | |
| "score": 0.7219, | |
| "total_reward": 12.9944, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.671, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9083, | |
| 0.9167, | |
| 0.9083, | |
| 0.8667, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.258, | |
| 0.02, | |
| 0.165, | |
| 0.8167, | |
| 0.8, | |
| 0.8417, | |
| 0.7833, | |
| 0.6667, | |
| 0.8414 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task2", | |
| "seed": 29, | |
| "steps": 27, | |
| "score": 0.5586, | |
| "total_reward": 10.614, | |
| "completion_rate": 0.733, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.478, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9, | |
| 0.2717, | |
| 0.9083, | |
| 0.02, | |
| 0.8583, | |
| 0.8917, | |
| 0.02, | |
| 0.7417, | |
| 0.8333, | |
| 0.19, | |
| 0.1817, | |
| 0.7667, | |
| 0.7833, | |
| 0.7417, | |
| 0.1133, | |
| 0.0817, | |
| 0.644 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 0, | |
| "steps": 31, | |
| "score": 0.6145, | |
| "total_reward": 12.2902, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.72, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.2467, | |
| 0.875, | |
| 0.8583, | |
| 0.8417, | |
| 0.19, | |
| 0.8083, | |
| 0.7917, | |
| 0.775, | |
| 0.7583, | |
| 0.7417, | |
| 0.09, | |
| 0.0733, | |
| 0.7719 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 1, | |
| "steps": 17, | |
| "score": 0.768, | |
| "total_reward": 13.8236, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.282, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7053 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 2, | |
| "steps": 17, | |
| "score": 0.7237, | |
| "total_reward": 13.0266, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.284, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3517, | |
| 0.2883, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.1787, | |
| 0.6626 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 3, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.5171, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9021 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 4, | |
| "steps": 17, | |
| "score": 0.7999, | |
| "total_reward": 14.3981, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.426, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.2537, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.1787, | |
| 0.7991 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 5, | |
| "steps": 30, | |
| "score": 0.6545, | |
| "total_reward": 10.4723, | |
| "completion_rate": 0.733, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.215, | |
| 0.8333, | |
| 0.8167, | |
| 0.165, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.7623 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 6, | |
| "steps": 15, | |
| "score": 0.8749, | |
| "total_reward": 13.9981, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.28, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.7481 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 7, | |
| "steps": 28, | |
| "score": 0.7465, | |
| "total_reward": 13.4373, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.833, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.9167, | |
| 0.3163, | |
| 0.318, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.8983 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 8, | |
| "steps": 17, | |
| "score": 0.7992, | |
| "total_reward": 14.3856, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.39, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.2953, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.2537, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7866 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 9, | |
| "steps": 28, | |
| "score": 0.7248, | |
| "total_reward": 13.7712, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.834, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.3147, | |
| 0.9083, | |
| 0.318, | |
| 0.3117, | |
| 0.3033, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.8986 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 10, | |
| "steps": 16, | |
| "score": 0.797, | |
| "total_reward": 13.5485, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.432, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.212, | |
| 0.2037, | |
| 0.8333, | |
| 0.825, | |
| 0.7578 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 11, | |
| "steps": 30, | |
| "score": 0.6763, | |
| "total_reward": 12.8505, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.825, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.0817, | |
| 0.8522 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 12, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 13, | |
| "steps": 30, | |
| "score": 0.7935, | |
| "total_reward": 13.4903, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.839, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.2883, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9436 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 14, | |
| "steps": 18, | |
| "score": 0.7309, | |
| "total_reward": 13.8869, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.264, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.3147, | |
| 0.2733, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.187, | |
| 0.8167, | |
| 0.8083, | |
| 0.6989 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 15, | |
| "steps": 18, | |
| "score": 0.7649, | |
| "total_reward": 14.5326, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.33, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3097, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.2703, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.2287, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.8083, | |
| 0.7656 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 16, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 17, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.517, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.902 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 18, | |
| "steps": 15, | |
| "score": 0.8749, | |
| "total_reward": 13.998, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.28, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.748 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 19, | |
| "steps": 17, | |
| "score": 0.7966, | |
| "total_reward": 14.3395, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.279, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2967, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.262, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7475 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 20, | |
| "steps": 30, | |
| "score": 0.6558, | |
| "total_reward": 11.8048, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.82, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.3147, | |
| 0.9, | |
| 0.8833, | |
| 0.2317, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.1483, | |
| 0.7667, | |
| 0.75, | |
| 0.0983, | |
| 0.7167, | |
| 0.8071 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 21, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 22, | |
| "steps": 22, | |
| "score": 0.6604, | |
| "total_reward": 15.1886, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.471, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3517, | |
| 0.9333, | |
| 0.925, | |
| 0.2787, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.237, | |
| 0.8667, | |
| 0.2203, | |
| 0.85, | |
| 0.8417, | |
| 0.1953, | |
| 0.825, | |
| 0.1787, | |
| 0.8083, | |
| 0.8, | |
| 0.1537, | |
| 0.7833, | |
| 0.775, | |
| 0.8149 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 23, | |
| "steps": 16, | |
| "score": 0.793, | |
| "total_reward": 13.4804, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.212, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.3113, | |
| 0.28, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.6808 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 24, | |
| "steps": 30, | |
| "score": 0.6768, | |
| "total_reward": 12.8598, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.824, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.3113, | |
| 0.975, | |
| 0.3267, | |
| 0.9583, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.115, | |
| 0.7333, | |
| 0.7167, | |
| 0.8518 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 25, | |
| "steps": 21, | |
| "score": 0.5985, | |
| "total_reward": 13.1666, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.8917, | |
| 0.2383, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.1883, | |
| 0.18, | |
| 0.8167, | |
| 0.1633, | |
| 0.155, | |
| 0.1467, | |
| 0.7833, | |
| 0.7683 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 26, | |
| "steps": 22, | |
| "score": 0.5962, | |
| "total_reward": 13.1159, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.324, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.8683, | |
| 0.265, | |
| 0.2567, | |
| 0.9333, | |
| 0.925, | |
| 0.2317, | |
| 0.2233, | |
| 0.26, | |
| 0.1983, | |
| 0.825, | |
| 0.8167, | |
| 0.8083, | |
| 0.162, | |
| 0.7917, | |
| 0.1453, | |
| 0.775, | |
| 0.6336 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 27, | |
| "steps": 30, | |
| "score": 0.6573, | |
| "total_reward": 13.146, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.829, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.9167, | |
| 0.9083, | |
| 0.318, | |
| 0.3117, | |
| 0.3033, | |
| 0.8667, | |
| 0.85, | |
| 0.1983, | |
| 0.8167, | |
| 0.8, | |
| 0.1483, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8533 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 28, | |
| "steps": 15, | |
| "score": 0.8749, | |
| "total_reward": 13.9979, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.28, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.7479 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task2", | |
| "seed": 29, | |
| "steps": 17, | |
| "score": 0.7998, | |
| "total_reward": 14.3965, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.421, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.2453, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.2037, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7975 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 0, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.5171, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9021 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 1, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 2, | |
| "steps": 30, | |
| "score": 0.7399, | |
| "total_reward": 11.8385, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.843, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2983, | |
| 0.2817, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8585 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 3, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.5171, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9021 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 4, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.5171, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.1983, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9021 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 5, | |
| "steps": 30, | |
| "score": 0.6545, | |
| "total_reward": 10.4723, | |
| "completion_rate": 0.733, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.215, | |
| 0.8333, | |
| 0.8167, | |
| 0.165, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.7623 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 6, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 7, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1942, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9442 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 8, | |
| "steps": 30, | |
| "score": 0.7399, | |
| "total_reward": 11.8383, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.843, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.2817, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.1983, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8583 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 9, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 10, | |
| "steps": 30, | |
| "score": 0.7398, | |
| "total_reward": 11.8373, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.115, | |
| 0.0983, | |
| 0.7167, | |
| 0.8573 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 11, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.517, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.902 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 12, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 13, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 14, | |
| "steps": 30, | |
| "score": 0.7398, | |
| "total_reward": 11.8376, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.841, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8576 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 15, | |
| "steps": 30, | |
| "score": 0.6973, | |
| "total_reward": 11.1569, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.834, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2983, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.2317, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.1483, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8119 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 16, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 17, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.517, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.902 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 18, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 19, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.517, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.215, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.902 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 20, | |
| "steps": 30, | |
| "score": 0.6117, | |
| "total_reward": 9.7864, | |
| "completion_rate": 0.667, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.795, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2983, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.85, | |
| 0.1983, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.115, | |
| 0.7333, | |
| 0.7167, | |
| 0.7114 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 21, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 22, | |
| "steps": 30, | |
| "score": 0.6545, | |
| "total_reward": 10.4728, | |
| "completion_rate": 0.733, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.817, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2983, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.0817, | |
| 0.7628 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 23, | |
| "steps": 30, | |
| "score": 0.7399, | |
| "total_reward": 11.8385, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.843, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2983, | |
| 0.2817, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8585 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 24, | |
| "steps": 30, | |
| "score": 0.7398, | |
| "total_reward": 11.8376, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.841, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8576 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 25, | |
| "steps": 30, | |
| "score": 0.7399, | |
| "total_reward": 11.838, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.842, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.2817, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.1817, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.858 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 26, | |
| "steps": 30, | |
| "score": 0.7398, | |
| "total_reward": 11.8373, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.115, | |
| 0.0983, | |
| 0.7167, | |
| 0.8573 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 27, | |
| "steps": 30, | |
| "score": 0.6972, | |
| "total_reward": 11.156, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.831, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.0817, | |
| 0.811 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 28, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task2", | |
| "seed": 29, | |
| "steps": 30, | |
| "score": 0.7398, | |
| "total_reward": 11.8373, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.1817, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.0983, | |
| 0.7167, | |
| 0.8573 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 0, | |
| "steps": 31, | |
| "score": 0.6145, | |
| "total_reward": 12.2902, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.72, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.2467, | |
| 0.875, | |
| 0.8583, | |
| 0.8417, | |
| 0.19, | |
| 0.8083, | |
| 0.7917, | |
| 0.775, | |
| 0.7583, | |
| 0.7417, | |
| 0.09, | |
| 0.0733, | |
| 0.7719 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 1, | |
| "steps": 17, | |
| "score": 0.768, | |
| "total_reward": 13.8236, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.282, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7053 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 2, | |
| "steps": 17, | |
| "score": 0.7237, | |
| "total_reward": 13.0266, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.284, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3517, | |
| 0.2883, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.1787, | |
| 0.6626 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 3, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.5171, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9021 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 4, | |
| "steps": 17, | |
| "score": 0.7999, | |
| "total_reward": 14.3981, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.426, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.2537, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.1787, | |
| 0.7991 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 5, | |
| "steps": 30, | |
| "score": 0.6545, | |
| "total_reward": 10.4723, | |
| "completion_rate": 0.733, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.2483, | |
| 0.8667, | |
| 0.215, | |
| 0.8333, | |
| 0.8167, | |
| 0.165, | |
| 0.7833, | |
| 0.1317, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.7623 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 6, | |
| "steps": 15, | |
| "score": 0.8749, | |
| "total_reward": 13.9981, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.28, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.7481 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 7, | |
| "steps": 28, | |
| "score": 0.7465, | |
| "total_reward": 13.4373, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.833, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.9167, | |
| 0.3163, | |
| 0.318, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.8983 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 8, | |
| "steps": 17, | |
| "score": 0.7992, | |
| "total_reward": 14.3856, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.39, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.2953, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.2537, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7866 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 9, | |
| "steps": 28, | |
| "score": 0.7248, | |
| "total_reward": 13.7712, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.834, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.3147, | |
| 0.9083, | |
| 0.318, | |
| 0.3117, | |
| 0.3033, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.8986 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 10, | |
| "steps": 16, | |
| "score": 0.797, | |
| "total_reward": 13.5485, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.432, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.212, | |
| 0.2037, | |
| 0.8333, | |
| 0.825, | |
| 0.7578 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 11, | |
| "steps": 30, | |
| "score": 0.6763, | |
| "total_reward": 12.8505, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.825, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.0817, | |
| 0.8522 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 12, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 13, | |
| "steps": 30, | |
| "score": 0.7935, | |
| "total_reward": 13.4903, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.839, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.2883, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9436 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 14, | |
| "steps": 18, | |
| "score": 0.7309, | |
| "total_reward": 13.8869, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.264, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.3147, | |
| 0.2733, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.187, | |
| 0.8167, | |
| 0.8083, | |
| 0.6989 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 15, | |
| "steps": 18, | |
| "score": 0.7649, | |
| "total_reward": 14.5326, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.33, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3097, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.2703, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.2287, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.8083, | |
| 0.7656 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 16, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 17, | |
| "steps": 30, | |
| "score": 0.7823, | |
| "total_reward": 12.517, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.902 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 18, | |
| "steps": 15, | |
| "score": 0.8749, | |
| "total_reward": 13.998, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.28, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.748 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 19, | |
| "steps": 17, | |
| "score": 0.7966, | |
| "total_reward": 14.3395, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.279, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.2967, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.262, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7475 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 20, | |
| "steps": 30, | |
| "score": 0.6558, | |
| "total_reward": 11.8048, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.82, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.3147, | |
| 0.9, | |
| 0.8833, | |
| 0.2317, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.1483, | |
| 0.7667, | |
| 0.75, | |
| 0.0983, | |
| 0.7167, | |
| 0.8071 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 21, | |
| "steps": 30, | |
| "score": 0.8246, | |
| "total_reward": 13.1941, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9333, | |
| 0.9167, | |
| 0.9, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.9441 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 22, | |
| "steps": 22, | |
| "score": 0.6604, | |
| "total_reward": 15.1886, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.471, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3517, | |
| 0.9333, | |
| 0.925, | |
| 0.2787, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.237, | |
| 0.8667, | |
| 0.2203, | |
| 0.85, | |
| 0.8417, | |
| 0.1953, | |
| 0.825, | |
| 0.1787, | |
| 0.8083, | |
| 0.8, | |
| 0.1537, | |
| 0.7833, | |
| 0.775, | |
| 0.8149 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 23, | |
| "steps": 16, | |
| "score": 0.793, | |
| "total_reward": 13.4804, | |
| "completion_rate": 0.933, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.212, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.3113, | |
| 0.28, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.825, | |
| 0.6808 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 24, | |
| "steps": 30, | |
| "score": 0.6768, | |
| "total_reward": 12.8598, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.824, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.3113, | |
| 0.975, | |
| 0.3267, | |
| 0.9583, | |
| 0.265, | |
| 0.8833, | |
| 0.8667, | |
| 0.85, | |
| 0.8333, | |
| 0.8167, | |
| 0.8, | |
| 0.7833, | |
| 0.7667, | |
| 0.115, | |
| 0.7333, | |
| 0.7167, | |
| 0.8518 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 25, | |
| "steps": 21, | |
| "score": 0.5985, | |
| "total_reward": 13.1666, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.2733, | |
| 0.265, | |
| 0.8917, | |
| 0.2383, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.1883, | |
| 0.18, | |
| 0.8167, | |
| 0.1633, | |
| 0.155, | |
| 0.1467, | |
| 0.7833, | |
| 0.7683 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 26, | |
| "steps": 22, | |
| "score": 0.5962, | |
| "total_reward": 13.1159, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.324, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.99, | |
| 0.9833, | |
| 0.975, | |
| 0.2817, | |
| 0.8683, | |
| 0.265, | |
| 0.2567, | |
| 0.9333, | |
| 0.925, | |
| 0.2317, | |
| 0.2233, | |
| 0.26, | |
| 0.1983, | |
| 0.825, | |
| 0.8167, | |
| 0.8083, | |
| 0.162, | |
| 0.7917, | |
| 0.1453, | |
| 0.775, | |
| 0.6336 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 27, | |
| "steps": 30, | |
| "score": 0.6573, | |
| "total_reward": 13.146, | |
| "completion_rate": 0.867, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.829, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.313, | |
| 0.9167, | |
| 0.9083, | |
| 0.318, | |
| 0.3117, | |
| 0.3033, | |
| 0.8667, | |
| 0.85, | |
| 0.1983, | |
| 0.8167, | |
| 0.8, | |
| 0.1483, | |
| 0.7667, | |
| 0.75, | |
| 0.7333, | |
| 0.7167, | |
| 0.8533 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 28, | |
| "steps": 15, | |
| "score": 0.8749, | |
| "total_reward": 13.9979, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.28, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.8833, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.8417, | |
| 0.8333, | |
| 0.7479 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task2", | |
| "seed": 29, | |
| "steps": 17, | |
| "score": 0.7998, | |
| "total_reward": 14.3965, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.421, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9417, | |
| 0.9333, | |
| 0.925, | |
| 0.9167, | |
| 0.9083, | |
| 0.9, | |
| 0.8917, | |
| 0.2453, | |
| 0.875, | |
| 0.8667, | |
| 0.8583, | |
| 0.85, | |
| 0.2037, | |
| 0.8333, | |
| 0.825, | |
| 0.8167, | |
| 0.7975 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 36, | |
| "score": 0.6105, | |
| "total_reward": 15.2622, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.884, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9233, | |
| 0.9211, | |
| 0.02, | |
| 0.9144, | |
| 0.9122, | |
| 0.9078, | |
| 0.3556, | |
| 0.02, | |
| 0.02, | |
| 0.8467, | |
| 0.8922, | |
| 0.8378, | |
| 0.8833, | |
| 0.9061, | |
| 0.3067, | |
| 0.02, | |
| 0.8722, | |
| 0.073, | |
| 0.8306, | |
| 0.9061, | |
| 0.3397, | |
| 0.3044, | |
| 0.5035 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 40, | |
| "score": 0.7205, | |
| "total_reward": 18.0135, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.662, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.8889, | |
| 0.8822, | |
| 0.91, | |
| 0.8556, | |
| 0.9011, | |
| 0.3167, | |
| 0.8622, | |
| 0.89, | |
| 0.8356, | |
| 0.8633, | |
| 0.3011, | |
| 0.8444, | |
| 0.8378, | |
| 0.8656, | |
| 0.8261, | |
| 0.8589, | |
| 0.8961, | |
| 0.02, | |
| 0.3, | |
| 0.8106, | |
| 0.2933, | |
| 0.7916 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 37, | |
| "score": 0.7627, | |
| "total_reward": 17.5411, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.752, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.3203, | |
| 0.9211, | |
| 0.9189, | |
| 0.8822, | |
| 0.91, | |
| 0.8556, | |
| 0.8511, | |
| 0.8967, | |
| 0.8422, | |
| 0.89, | |
| 0.8356, | |
| 0.8311, | |
| 0.8767, | |
| 0.2944, | |
| 0.8972, | |
| 0.87, | |
| 0.8656, | |
| 0.8789, | |
| 0.02, | |
| 0.8722, | |
| 0.8207 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 36, | |
| "score": 0.6303, | |
| "total_reward": 16.3887, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.798, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9233, | |
| 0.3411, | |
| 0.8667, | |
| 0.9144, | |
| 0.9372, | |
| 0.91, | |
| 0.3278, | |
| 0.3556, | |
| 0.8511, | |
| 0.8989, | |
| 0.2922, | |
| 0.8878, | |
| 0.3056, | |
| 0.8489, | |
| 0.8767, | |
| 0.8922, | |
| 0.333, | |
| 0.2878, | |
| 0.8283, | |
| 0.8589, | |
| 0.3297, | |
| 0.3552, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 41, | |
| "score": 0.6894, | |
| "total_reward": 18.6138, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.405, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.9233, | |
| 0.3711, | |
| 0.8844, | |
| 0.9122, | |
| 0.33, | |
| 0.3186, | |
| 0.9011, | |
| 0.8667, | |
| 0.86, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8767, | |
| 0.8994, | |
| 0.3352, | |
| 0.8678, | |
| 0.3033, | |
| 0.8239, | |
| 0.8744, | |
| 0.8678, | |
| 0.2656, | |
| 0.2933, | |
| 0.2911, | |
| 0.7076 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 31, | |
| "score": 0.6062, | |
| "total_reward": 15.1538, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.9211, | |
| 0.3367, | |
| 0.3344, | |
| 0.3322, | |
| 0.8578, | |
| 0.9306, | |
| 0.9033, | |
| 0.3211, | |
| 0.8467, | |
| 0.9194, | |
| 0.32, | |
| 0.8878, | |
| 0.8856, | |
| 0.8811, | |
| 0.02, | |
| 0.8394, | |
| 0.0752, | |
| 0.87, | |
| 0.8678, | |
| 0.02, | |
| 0.8883, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 39, | |
| "score": 0.6337, | |
| "total_reward": 15.8429, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.872, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.8889, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9006, | |
| 0.8511, | |
| 0.3197, | |
| 0.9194, | |
| 0.28, | |
| 0.8556, | |
| 0.8811, | |
| 0.8789, | |
| 0.8422, | |
| 0.8856, | |
| 0.3311, | |
| 0.8589, | |
| 0.0597, | |
| 0.3222, | |
| 0.27, | |
| 0.8728, | |
| 0.02, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 32, | |
| "score": 0.7179, | |
| "total_reward": 15.793, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.869, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.8911, | |
| 0.8867, | |
| 0.9144, | |
| 0.86, | |
| 0.9056, | |
| 0.9033, | |
| 0.8989, | |
| 0.8967, | |
| 0.3444, | |
| 0.89, | |
| 0.8356, | |
| 0.9083, | |
| 0.02, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8928, | |
| 0.8633, | |
| 0.3111, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 31, | |
| "score": 0.7087, | |
| "total_reward": 16.3004, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.701, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3733, | |
| 0.9211, | |
| 0.8844, | |
| 0.9122, | |
| 0.91, | |
| 0.3578, | |
| 0.9056, | |
| 0.9283, | |
| 0.8667, | |
| 0.8944, | |
| 0.84, | |
| 0.3456, | |
| 0.8833, | |
| 0.8811, | |
| 0.8589, | |
| 0.8767, | |
| 0.0774, | |
| 0.835, | |
| 0.8856, | |
| 0.8633, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 37, | |
| "score": 0.7151, | |
| "total_reward": 17.877, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.558, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9256, | |
| 0.9233, | |
| 0.8689, | |
| 0.3597, | |
| 0.8822, | |
| 0.935, | |
| 0.02, | |
| 0.3556, | |
| 0.9011, | |
| 0.8967, | |
| 0.8944, | |
| 0.84, | |
| 0.8356, | |
| 0.8489, | |
| 0.8244, | |
| 0.835, | |
| 0.3178, | |
| 0.8656, | |
| 0.8261, | |
| 0.8217, | |
| 0.3044, | |
| 0.85, | |
| 0.7724 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 38, | |
| "score": 0.6037, | |
| "total_reward": 17.5072, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.772, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.9461, | |
| 0.8844, | |
| 0.3622, | |
| 0.9078, | |
| 0.02, | |
| 0.3233, | |
| 0.9261, | |
| 0.8967, | |
| 0.8944, | |
| 0.86, | |
| 0.3378, | |
| 0.02, | |
| 0.3263, | |
| 0.8811, | |
| 0.3289, | |
| 0.2967, | |
| 0.8994, | |
| 0.8722, | |
| 0.8678, | |
| 0.3386, | |
| 0.3463, | |
| 0.02, | |
| 0.3089, | |
| 0.8544, | |
| 0.355, | |
| 0.7709 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 32, | |
| "score": 0.5881, | |
| "total_reward": 14.703, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.743, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.8844, | |
| 0.3622, | |
| 0.9078, | |
| 0.9306, | |
| 0.02, | |
| 0.8689, | |
| 0.8967, | |
| 0.8944, | |
| 0.8722, | |
| 0.8878, | |
| 0.8856, | |
| 0.9228, | |
| 0.0819, | |
| 0.2967, | |
| 0.3244, | |
| 0.8722, | |
| 0.02, | |
| 0.3356, | |
| 0.3011, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 36, | |
| "score": 0.6249, | |
| "total_reward": 14.9974, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.716, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.8933, | |
| 0.3411, | |
| 0.9189, | |
| 0.8644, | |
| 0.9122, | |
| 0.8756, | |
| 0.02, | |
| 0.3441, | |
| 0.3189, | |
| 0.3167, | |
| 0.8622, | |
| 0.8878, | |
| 0.8511, | |
| 0.8789, | |
| 0.8244, | |
| 0.8878, | |
| 0.2856, | |
| 0.0663, | |
| 0.8589, | |
| 0.8961, | |
| 0.8772, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 38, | |
| "score": 0.7872, | |
| "total_reward": 18.1053, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.64, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8911, | |
| 0.8667, | |
| 0.9122, | |
| 0.3278, | |
| 0.9056, | |
| 0.8689, | |
| 0.02, | |
| 0.8622, | |
| 0.8378, | |
| 0.8533, | |
| 0.8289, | |
| 0.8767, | |
| 0.8722, | |
| 0.895, | |
| 0.8856, | |
| 0.8633, | |
| 0.8611, | |
| 0.9017, | |
| 0.8939, | |
| 0.3, | |
| 0.8081 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 35, | |
| "score": 0.6988, | |
| "total_reward": 18.1679, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.661, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8733, | |
| 0.8889, | |
| 0.8844, | |
| 0.3322, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.8989, | |
| 0.8622, | |
| 0.89, | |
| 0.3408, | |
| 0.8856, | |
| 0.8833, | |
| 0.3011, | |
| 0.8267, | |
| 0.8372, | |
| 0.87, | |
| 0.0708, | |
| 0.8833, | |
| 0.2811, | |
| 0.8839, | |
| 0.8544, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 38, | |
| "score": 0.68, | |
| "total_reward": 19.0388, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.774, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.9256, | |
| 0.9233, | |
| 0.8689, | |
| 0.3667, | |
| 0.3644, | |
| 0.8922, | |
| 0.935, | |
| 0.8556, | |
| 0.3533, | |
| 0.3211, | |
| 0.3189, | |
| 0.8944, | |
| 0.89, | |
| 0.3378, | |
| 0.8333, | |
| 0.8789, | |
| 0.8767, | |
| 0.8744, | |
| 0.2922, | |
| 0.333, | |
| 0.8656, | |
| 0.8261, | |
| 0.8567, | |
| 0.3, | |
| 0.8478, | |
| 0.8066 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 33, | |
| "score": 0.5966, | |
| "total_reward": 15.511, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.691, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3044, | |
| 0.02, | |
| 0.935, | |
| 0.02, | |
| 0.3256, | |
| 0.8711, | |
| 0.8967, | |
| 0.3444, | |
| 0.02, | |
| 0.89, | |
| 0.8556, | |
| 0.8833, | |
| 0.3311, | |
| 0.3289, | |
| 0.8744, | |
| 0.8878, | |
| 0.8633, | |
| 0.3141, | |
| 0.8589, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 35, | |
| "score": 0.5891, | |
| "total_reward": 16.4939, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.795, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.02, | |
| 0.3433, | |
| 0.3319, | |
| 0.8644, | |
| 0.3622, | |
| 0.91, | |
| 0.3286, | |
| 0.9033, | |
| 0.9011, | |
| 0.8789, | |
| 0.3697, | |
| 0.9194, | |
| 0.84, | |
| 0.8856, | |
| 0.8811, | |
| 0.8789, | |
| 0.3597, | |
| 0.2692, | |
| 0.87, | |
| 0.2878, | |
| 0.8656, | |
| 0.0663, | |
| 0.8239, | |
| 0.8817, | |
| 0.02, | |
| 0.4835 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 35, | |
| "score": 0.6548, | |
| "total_reward": 16.3705, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.573, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.02, | |
| 0.9167, | |
| 0.3644, | |
| 0.3622, | |
| 0.91, | |
| 0.8556, | |
| 0.9033, | |
| 0.8689, | |
| 0.8967, | |
| 0.8922, | |
| 0.915, | |
| 0.02, | |
| 0.8533, | |
| 0.8789, | |
| 0.2967, | |
| 0.3422, | |
| 0.3078, | |
| 0.8656, | |
| 0.8611, | |
| 0.2789, | |
| 0.3297, | |
| 0.7281 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 38, | |
| "score": 0.6912, | |
| "total_reward": 17.2799, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.834, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3156, | |
| 0.9211, | |
| 0.9189, | |
| 0.9144, | |
| 0.9122, | |
| 0.33, | |
| 0.02, | |
| 0.9133, | |
| 0.9261, | |
| 0.8967, | |
| 0.8422, | |
| 0.89, | |
| 0.8356, | |
| 0.8511, | |
| 0.8789, | |
| 0.8922, | |
| 0.87, | |
| 0.3178, | |
| 0.8811, | |
| 0.8589, | |
| 0.8544, | |
| 0.87, | |
| 0.3108, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 44, | |
| "score": 0.6149, | |
| "total_reward": 19.0606, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.859, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.8711, | |
| 0.8667, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.9033, | |
| 0.3281, | |
| 0.3519, | |
| 0.3467, | |
| 0.8922, | |
| 0.89, | |
| 0.3608, | |
| 0.8856, | |
| 0.8833, | |
| 0.8289, | |
| 0.2714, | |
| 0.87, | |
| 0.3508, | |
| 0.8656, | |
| 0.3089, | |
| 0.0597, | |
| 0.8172, | |
| 0.053, | |
| 0.8728, | |
| 0.8083, | |
| 0.3439, | |
| 0.2567, | |
| 0.5146 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 30, | |
| "score": 0.7401, | |
| "total_reward": 15.543, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.8911, | |
| 0.8867, | |
| 0.9144, | |
| 0.3322, | |
| 0.9078, | |
| 0.9056, | |
| 0.8711, | |
| 0.8467, | |
| 0.8922, | |
| 0.89, | |
| 0.02, | |
| 0.8856, | |
| 0.8811, | |
| 0.8589, | |
| 0.8744, | |
| 0.87, | |
| 0.8856, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 45, | |
| "score": 0.5529, | |
| "total_reward": 16.5871, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.02, | |
| 0.9311, | |
| 0.8844, | |
| 0.3622, | |
| 0.36, | |
| 0.9078, | |
| 0.2933, | |
| 0.8689, | |
| 0.9217, | |
| 0.3444, | |
| 0.32, | |
| 0.8556, | |
| 0.3033, | |
| 0.3011, | |
| 0.8767, | |
| 0.3022, | |
| 0.87, | |
| 0.8678, | |
| 0.8811, | |
| 0.0619, | |
| 0.8544, | |
| 0.3022, | |
| 0.8478, | |
| 0.8083, | |
| 0.3119, | |
| 0.0397, | |
| 0.8322, | |
| 0.4222 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 31, | |
| "score": 0.7921, | |
| "total_reward": 18.2193, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.847, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.3481, | |
| 0.9439, | |
| 0.9167, | |
| 0.9144, | |
| 0.86, | |
| 0.9078, | |
| 0.9033, | |
| 0.9261, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.02, | |
| 0.9228, | |
| 0.0819, | |
| 0.8744, | |
| 0.87, | |
| 0.8833, | |
| 0.7254 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 45, | |
| "score": 0.6293, | |
| "total_reward": 16.3622, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.813, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3156, | |
| 0.9483, | |
| 0.9461, | |
| 0.3197, | |
| 0.9072, | |
| 0.8756, | |
| 0.9033, | |
| 0.3541, | |
| 0.02, | |
| 0.9044, | |
| 0.02, | |
| 0.3608, | |
| 0.8511, | |
| 0.2967, | |
| 0.8722, | |
| 0.8356, | |
| 0.8111, | |
| 0.2867, | |
| 0.8544, | |
| 0.845, | |
| 0.3156, | |
| 0.8061, | |
| 0.8367, | |
| 0.7972, | |
| 0.7658 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 34, | |
| "score": 0.6745, | |
| "total_reward": 16.8613, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.809, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8933, | |
| 0.9461, | |
| 0.9189, | |
| 0.8644, | |
| 0.9122, | |
| 0.91, | |
| 0.8556, | |
| 0.8711, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.8922, | |
| 0.8378, | |
| 0.3356, | |
| 0.8833, | |
| 0.8489, | |
| 0.0797, | |
| 0.3244, | |
| 0.3222, | |
| 0.8878, | |
| 0.8906, | |
| 0.9061, | |
| 0.2967, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 43, | |
| "score": 0.5813, | |
| "total_reward": 17.4397, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.815, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8933, | |
| 0.9211, | |
| 0.9439, | |
| 0.9167, | |
| 0.9144, | |
| 0.02, | |
| 0.3308, | |
| 0.8711, | |
| 0.8467, | |
| 0.8922, | |
| 0.8878, | |
| 0.3486, | |
| 0.3033, | |
| 0.2759, | |
| 0.8767, | |
| 0.3244, | |
| 0.3452, | |
| 0.29, | |
| 0.8156, | |
| 0.8633, | |
| 0.2889, | |
| 0.0597, | |
| 0.8544, | |
| 0.3372, | |
| 0.8478, | |
| 0.2956, | |
| 0.2811, | |
| 0.2889, | |
| 0.4707 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 40, | |
| "score": 0.5674, | |
| "total_reward": 15.3205, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.25, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 3, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9206, | |
| 0.3433, | |
| 0.3541, | |
| 0.9189, | |
| 0.9244, | |
| 0.3622, | |
| 0.9078, | |
| 0.9133, | |
| 0.8689, | |
| 0.8644, | |
| 0.885, | |
| 0.02, | |
| 0.8533, | |
| 0.9061, | |
| 0.8267, | |
| 0.9139, | |
| 0.073, | |
| 0.3356, | |
| 0.3441, | |
| 0.0619, | |
| 0.8994, | |
| 0.2722, | |
| 0.323, | |
| 0.3308, | |
| 0.8433, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 32, | |
| "score": 0.6685, | |
| "total_reward": 16.0443, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.793, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9211, | |
| 0.9189, | |
| 0.8844, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3763, | |
| 0.02, | |
| 0.3167, | |
| 0.8944, | |
| 0.89, | |
| 0.9128, | |
| 0.8856, | |
| 0.8461, | |
| 0.8589, | |
| 0.3267, | |
| 0.8372, | |
| 0.9128, | |
| 0.2933, | |
| 0.3461, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 40, | |
| "score": 0.6868, | |
| "total_reward": 17.8577, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.907, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.3411, | |
| 0.9189, | |
| 0.02, | |
| 0.8822, | |
| 0.9028, | |
| 0.02, | |
| 0.8511, | |
| 0.8989, | |
| 0.3467, | |
| 0.3444, | |
| 0.8578, | |
| 0.8856, | |
| 0.8511, | |
| 0.2989, | |
| 0.9094, | |
| 0.87, | |
| 0.8856, | |
| 0.3641, | |
| 0.8567, | |
| 0.3572, | |
| 0.8895, | |
| 0.8083, | |
| 0.8353 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 43, | |
| "score": 0.7114, | |
| "total_reward": 18.4969, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.729, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.3344, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.3267, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.3, | |
| 0.2956, | |
| 0.8611, | |
| 0.2867, | |
| 0.8522, | |
| 0.8478, | |
| 0.8433, | |
| 0.8389, | |
| 0.7841 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 29, | |
| "score": 0.7083, | |
| "total_reward": 17.707, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.721, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6632 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 29, | |
| "score": 0.6919, | |
| "total_reward": 17.2983, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.561, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.3456, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6065 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8008, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.843, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8675 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 29, | |
| "score": 0.7165, | |
| "total_reward": 17.9128, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.721, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.3414, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.664 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 46, | |
| "score": 0.7558, | |
| "total_reward": 18.1385, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.832, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.2711, | |
| 0.8367, | |
| 0.8322, | |
| 0.8229 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 27, | |
| "score": 0.6991, | |
| "total_reward": 16.778, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4, | |
| "trust_calibration": 0.725, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6387 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 42, | |
| "score": 0.7756, | |
| "total_reward": 19.3902, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.3919, | |
| 0.3997, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8478 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 44, | |
| "score": 0.809, | |
| "total_reward": 19.4157, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.853, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3526, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8654 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 40, | |
| "score": 0.782, | |
| "total_reward": 19.5499, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.837, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3841, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8528 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 31, | |
| "score": 0.712, | |
| "total_reward": 17.8008, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.448, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.3281, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.9295, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.3356, | |
| 0.6281 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 40, | |
| "score": 0.7732, | |
| "total_reward": 18.5566, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8349 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 39, | |
| "score": 0.833, | |
| "total_reward": 18.3252, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.811, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3456, | |
| 0.3433, | |
| 0.9189, | |
| 0.9144, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8656, | |
| 0.8611, | |
| 0.8567, | |
| 0.8522, | |
| 0.8478, | |
| 0.8485 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 29, | |
| "score": 0.6889, | |
| "total_reward": 17.9127, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.609, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.3841, | |
| 0.3689, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6353 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 30, | |
| "score": 0.6847, | |
| "total_reward": 18.4869, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.635, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3608, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.0841, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.6404 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 46, | |
| "score": 0.8048, | |
| "total_reward": 19.316, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.842, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8605 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 26, | |
| "score": 0.6967, | |
| "total_reward": 16.7213, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.701, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.6149 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 20, | |
| "score": 0.6193, | |
| "total_reward": 13.0053, | |
| "completion_rate": 0.65, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.576, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 5, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3437, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 46, | |
| "score": 0.7498, | |
| "total_reward": 19.4938, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.3841, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8412 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 36, | |
| "score": 0.7334, | |
| "total_reward": 21.2675, | |
| "completion_rate": 1.0, | |
| "detection_rate": 0.8, | |
| "trust_calibration": 0.747, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.9256, | |
| 0.9233, | |
| 0.3481, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.3326, | |
| 0.9033, | |
| 0.9011, | |
| 0.3259, | |
| 0.8967, | |
| 0.3214, | |
| 0.8922, | |
| 0.89, | |
| 0.3148, | |
| 0.8856, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.9073, | |
| 0.8789, | |
| 0.8544, | |
| 0.7968 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 28, | |
| "score": 0.6922, | |
| "total_reward": 17.3057, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.645, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.3433, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6298 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 46, | |
| "score": 0.7725, | |
| "total_reward": 20.0838, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.836, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.9483, | |
| 0.3711, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.2711, | |
| 0.8367, | |
| 0.8322, | |
| 0.8591 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 34, | |
| "score": 0.6755, | |
| "total_reward": 18.9148, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.71, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.3322, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.3189, | |
| 0.3167, | |
| 0.8944, | |
| 0.3122, | |
| 0.31, | |
| 0.3078, | |
| 0.8856, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.3078, | |
| 0.3033, | |
| 0.8789, | |
| 0.782 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 34, | |
| "score": 0.6561, | |
| "total_reward": 19.0282, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.6667, | |
| "trust_calibration": 0.467, | |
| "adversarial_detections": 2, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.8989, | |
| 0.3667, | |
| 0.3644, | |
| 0.9372, | |
| 0.935, | |
| 0.3578, | |
| 0.3556, | |
| 0.3533, | |
| 0.3289, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.317, | |
| 0.8878, | |
| 0.3126, | |
| 0.8833, | |
| 0.0841, | |
| 0.9184, | |
| 0.9139, | |
| 0.8878, | |
| 0.8656, | |
| 0.3163, | |
| 0.3419, | |
| 0.6213 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 46, | |
| "score": 0.7256, | |
| "total_reward": 20.3155, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.828, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8395 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 28, | |
| "score": 0.7355, | |
| "total_reward": 17.6509, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.725, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6841 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 28, | |
| "score": 0.7258, | |
| "total_reward": 18.144, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.3281, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6801 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 42, | |
| "score": 0.8422, | |
| "total_reward": 18.5276, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8724 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1154, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 46, | |
| "score": 0.7916, | |
| "total_reward": 18.9976, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.917, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8618 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8523, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3315, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8857 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 42, | |
| "score": 0.8421, | |
| "total_reward": 18.5263, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.928, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.3022, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.871 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3311, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8853 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 46, | |
| "score": 0.8162, | |
| "total_reward": 19.5883, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.93, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8825 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8523, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8903 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 46, | |
| "score": 0.7653, | |
| "total_reward": 18.3663, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.909, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8423 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 46, | |
| "score": 0.7652, | |
| "total_reward": 18.3659, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.908, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8419 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 42, | |
| "score": 0.8403, | |
| "total_reward": 18.4862, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8727 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3311, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8854 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3314, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8857 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 42, | |
| "score": 0.8403, | |
| "total_reward": 18.4855, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.872 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 46, | |
| "score": 0.8179, | |
| "total_reward": 19.6285, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.924, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.881 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3311, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8854 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 46, | |
| "score": 0.7123, | |
| "total_reward": 18.5191, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.729, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.3344, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.3267, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.3, | |
| 0.2956, | |
| 0.8789, | |
| 0.2844, | |
| 0.8678, | |
| 0.8611, | |
| 0.8367, | |
| 0.8322, | |
| 0.7797 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 30, | |
| "score": 0.7434, | |
| "total_reward": 18.5852, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.721, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.716 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 28, | |
| "score": 0.7198, | |
| "total_reward": 17.2756, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5556, | |
| "trust_calibration": 0.559, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.3456, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6418 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8008, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.843, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8675 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 28, | |
| "score": 0.7455, | |
| "total_reward": 17.8908, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5556, | |
| "trust_calibration": 0.722, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.3414, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.7 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 46, | |
| "score": 0.7558, | |
| "total_reward": 18.1385, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.832, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.2711, | |
| 0.8367, | |
| 0.8322, | |
| 0.8229 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 27, | |
| "score": 0.7645, | |
| "total_reward": 17.5844, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5556, | |
| "trust_calibration": 0.711, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.9295, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6995 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 42, | |
| "score": 0.7756, | |
| "total_reward": 19.3902, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.3919, | |
| 0.3997, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8478 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 44, | |
| "score": 0.809, | |
| "total_reward": 19.4157, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.853, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3526, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8654 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 40, | |
| "score": 0.782, | |
| "total_reward": 19.5499, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.837, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3841, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8528 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 30, | |
| "score": 0.7394, | |
| "total_reward": 17.7466, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.7143, | |
| "trust_calibration": 0.434, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.3281, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.9317, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.3378, | |
| 0.6536 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 40, | |
| "score": 0.7732, | |
| "total_reward": 18.5566, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8349 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 39, | |
| "score": 0.833, | |
| "total_reward": 18.3252, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.811, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3456, | |
| 0.3433, | |
| 0.9189, | |
| 0.9144, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8656, | |
| 0.8611, | |
| 0.8567, | |
| 0.8522, | |
| 0.8478, | |
| 0.8485 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 30, | |
| "score": 0.7228, | |
| "total_reward": 18.7931, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.609, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.3841, | |
| 0.3689, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.6881 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 30, | |
| "score": 0.7426, | |
| "total_reward": 19.3077, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.622, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3608, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.7087 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 46, | |
| "score": 0.8048, | |
| "total_reward": 19.316, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.842, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8605 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 27, | |
| "score": 0.7333, | |
| "total_reward": 17.5998, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.701, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6635 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 29, | |
| "score": 0.7268, | |
| "total_reward": 18.1697, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5556, | |
| "trust_calibration": 0.66, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3437, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6822 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 46, | |
| "score": 0.7498, | |
| "total_reward": 19.4938, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.3841, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8412 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 35, | |
| "score": 0.7608, | |
| "total_reward": 21.3031, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.748, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.9256, | |
| 0.9233, | |
| 0.3481, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.3326, | |
| 0.9033, | |
| 0.9011, | |
| 0.3259, | |
| 0.8967, | |
| 0.3214, | |
| 0.8922, | |
| 0.89, | |
| 0.3148, | |
| 0.8856, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.905, | |
| 0.8767, | |
| 0.8592 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 27, | |
| "score": 0.7184, | |
| "total_reward": 17.2411, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.629, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.3433, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6449 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 46, | |
| "score": 0.7725, | |
| "total_reward": 20.0838, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.836, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.9483, | |
| 0.3711, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.2711, | |
| 0.8367, | |
| 0.8322, | |
| 0.8591 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 34, | |
| "score": 0.6755, | |
| "total_reward": 18.9148, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.71, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.3322, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.3189, | |
| 0.3167, | |
| 0.8944, | |
| 0.3122, | |
| 0.31, | |
| 0.3078, | |
| 0.8856, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.3078, | |
| 0.3033, | |
| 0.8789, | |
| 0.782 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 33, | |
| "score": 0.6816, | |
| "total_reward": 19.0854, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.492, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.8989, | |
| 0.3667, | |
| 0.3644, | |
| 0.9372, | |
| 0.935, | |
| 0.3578, | |
| 0.3556, | |
| 0.3533, | |
| 0.3289, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.317, | |
| 0.8878, | |
| 0.3126, | |
| 0.8833, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.8856, | |
| 0.8633, | |
| 0.3141, | |
| 0.7297 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 46, | |
| "score": 0.7256, | |
| "total_reward": 20.3155, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.828, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8395 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 27, | |
| "score": 0.7645, | |
| "total_reward": 17.5845, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5556, | |
| "trust_calibration": 0.712, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.9295, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6996 | |
| ] | |
| }, | |
| { | |
| "policy": "trained", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 29, | |
| "score": 0.761, | |
| "total_reward": 19.0244, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.3281, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.7329 | |
| ] | |
| } | |
| ] | |
| } | |