Spaces:
Running
Running
| { | |
| "task": "task3", | |
| "tasks": [ | |
| "task3" | |
| ], | |
| "episodes_per_policy": 100, | |
| "adaptive": false, | |
| "difficulty_controller": { | |
| "adaptive": true, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "difficulty_controller_by_task_policy": { | |
| "task3": { | |
| "random": {}, | |
| "heuristic": {}, | |
| "oracle_lite": {} | |
| } | |
| }, | |
| "summary": { | |
| "random": { | |
| "episodes": 100, | |
| "avg_score": 0.6601, | |
| "avg_completion_rate": 0.8165, | |
| "avg_detection_rate": 0.375, | |
| "avg_trust_calibration": 0.7349, | |
| "avg_steps": 36.13 | |
| }, | |
| "heuristic": { | |
| "episodes": 100, | |
| "avg_score": 0.7314, | |
| "avg_completion_rate": 0.8935, | |
| "avg_detection_rate": 0.7621, | |
| "avg_trust_calibration": 0.74, | |
| "avg_steps": 35.54 | |
| }, | |
| "oracle_lite": { | |
| "episodes": 100, | |
| "avg_score": 0.8562, | |
| "avg_completion_rate": 0.991, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.9304, | |
| "avg_steps": 42.62 | |
| } | |
| }, | |
| "by_task": { | |
| "task3": { | |
| "random": { | |
| "episodes": 100, | |
| "avg_score": 0.6601, | |
| "avg_completion_rate": 0.8165, | |
| "avg_detection_rate": 0.375, | |
| "avg_trust_calibration": 0.7349, | |
| "avg_steps": 36.13 | |
| }, | |
| "heuristic": { | |
| "episodes": 100, | |
| "avg_score": 0.7314, | |
| "avg_completion_rate": 0.8935, | |
| "avg_detection_rate": 0.7621, | |
| "avg_trust_calibration": 0.74, | |
| "avg_steps": 35.54 | |
| }, | |
| "oracle_lite": { | |
| "episodes": 100, | |
| "avg_score": 0.8562, | |
| "avg_completion_rate": 0.991, | |
| "avg_detection_rate": 1.0, | |
| "avg_trust_calibration": 0.9304, | |
| "avg_steps": 42.62 | |
| } | |
| } | |
| }, | |
| "episodes": [ | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 36, | |
| "score": 0.6105, | |
| "total_reward": 15.2622, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.884, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9233, | |
| 0.9211, | |
| 0.02, | |
| 0.9144, | |
| 0.9122, | |
| 0.9078, | |
| 0.3556, | |
| 0.02, | |
| 0.02, | |
| 0.8467, | |
| 0.8922, | |
| 0.8378, | |
| 0.8833, | |
| 0.9061, | |
| 0.3067, | |
| 0.02, | |
| 0.8722, | |
| 0.073, | |
| 0.8306, | |
| 0.9061, | |
| 0.3397, | |
| 0.3044, | |
| 0.5035 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 40, | |
| "score": 0.7205, | |
| "total_reward": 18.0135, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.662, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.8889, | |
| 0.8822, | |
| 0.91, | |
| 0.8556, | |
| 0.9011, | |
| 0.3167, | |
| 0.8622, | |
| 0.89, | |
| 0.8356, | |
| 0.8633, | |
| 0.3011, | |
| 0.8444, | |
| 0.8378, | |
| 0.8656, | |
| 0.8261, | |
| 0.8589, | |
| 0.8961, | |
| 0.02, | |
| 0.3, | |
| 0.8106, | |
| 0.2933, | |
| 0.7916 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 37, | |
| "score": 0.7627, | |
| "total_reward": 17.5411, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.752, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.3203, | |
| 0.9211, | |
| 0.9189, | |
| 0.8822, | |
| 0.91, | |
| 0.8556, | |
| 0.8511, | |
| 0.8967, | |
| 0.8422, | |
| 0.89, | |
| 0.8356, | |
| 0.8311, | |
| 0.8767, | |
| 0.2944, | |
| 0.8972, | |
| 0.87, | |
| 0.8656, | |
| 0.8789, | |
| 0.02, | |
| 0.8722, | |
| 0.8207 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 36, | |
| "score": 0.6303, | |
| "total_reward": 16.3887, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.798, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9233, | |
| 0.3411, | |
| 0.8667, | |
| 0.9144, | |
| 0.9372, | |
| 0.91, | |
| 0.3278, | |
| 0.3556, | |
| 0.8511, | |
| 0.8989, | |
| 0.2922, | |
| 0.8878, | |
| 0.3056, | |
| 0.8489, | |
| 0.8767, | |
| 0.8922, | |
| 0.333, | |
| 0.2878, | |
| 0.8283, | |
| 0.8589, | |
| 0.3297, | |
| 0.3552, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 41, | |
| "score": 0.6894, | |
| "total_reward": 18.6138, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.405, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.9233, | |
| 0.3711, | |
| 0.8844, | |
| 0.9122, | |
| 0.33, | |
| 0.3186, | |
| 0.9011, | |
| 0.8667, | |
| 0.86, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8767, | |
| 0.8994, | |
| 0.3352, | |
| 0.8678, | |
| 0.3033, | |
| 0.8239, | |
| 0.8744, | |
| 0.8678, | |
| 0.2656, | |
| 0.2933, | |
| 0.2911, | |
| 0.7076 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 31, | |
| "score": 0.6062, | |
| "total_reward": 15.1538, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.9211, | |
| 0.3367, | |
| 0.3344, | |
| 0.3322, | |
| 0.8578, | |
| 0.9306, | |
| 0.9033, | |
| 0.3211, | |
| 0.8467, | |
| 0.9194, | |
| 0.32, | |
| 0.8878, | |
| 0.8856, | |
| 0.8811, | |
| 0.02, | |
| 0.8394, | |
| 0.0752, | |
| 0.87, | |
| 0.8678, | |
| 0.02, | |
| 0.8883, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 39, | |
| "score": 0.6337, | |
| "total_reward": 15.8429, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.872, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.8889, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9006, | |
| 0.8511, | |
| 0.3197, | |
| 0.9194, | |
| 0.28, | |
| 0.8556, | |
| 0.8811, | |
| 0.8789, | |
| 0.8422, | |
| 0.8856, | |
| 0.3311, | |
| 0.8589, | |
| 0.0597, | |
| 0.3222, | |
| 0.27, | |
| 0.8728, | |
| 0.02, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 32, | |
| "score": 0.7179, | |
| "total_reward": 15.793, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.869, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.8911, | |
| 0.8867, | |
| 0.9144, | |
| 0.86, | |
| 0.9056, | |
| 0.9033, | |
| 0.8989, | |
| 0.8967, | |
| 0.3444, | |
| 0.89, | |
| 0.8356, | |
| 0.9083, | |
| 0.02, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8928, | |
| 0.8633, | |
| 0.3111, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 31, | |
| "score": 0.7087, | |
| "total_reward": 16.3004, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.701, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3733, | |
| 0.9211, | |
| 0.8844, | |
| 0.9122, | |
| 0.91, | |
| 0.3578, | |
| 0.9056, | |
| 0.9283, | |
| 0.8667, | |
| 0.8944, | |
| 0.84, | |
| 0.3456, | |
| 0.8833, | |
| 0.8811, | |
| 0.8589, | |
| 0.8767, | |
| 0.0774, | |
| 0.835, | |
| 0.8856, | |
| 0.8633, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 37, | |
| "score": 0.7151, | |
| "total_reward": 17.877, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.558, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9256, | |
| 0.9233, | |
| 0.8689, | |
| 0.3597, | |
| 0.8822, | |
| 0.935, | |
| 0.02, | |
| 0.3556, | |
| 0.9011, | |
| 0.8967, | |
| 0.8944, | |
| 0.84, | |
| 0.8356, | |
| 0.8489, | |
| 0.8244, | |
| 0.835, | |
| 0.3178, | |
| 0.8656, | |
| 0.8261, | |
| 0.8217, | |
| 0.3044, | |
| 0.85, | |
| 0.7724 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 38, | |
| "score": 0.6037, | |
| "total_reward": 17.5072, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.772, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.9461, | |
| 0.8844, | |
| 0.3622, | |
| 0.9078, | |
| 0.02, | |
| 0.3233, | |
| 0.9261, | |
| 0.8967, | |
| 0.8944, | |
| 0.86, | |
| 0.3378, | |
| 0.02, | |
| 0.3263, | |
| 0.8811, | |
| 0.3289, | |
| 0.2967, | |
| 0.8994, | |
| 0.8722, | |
| 0.8678, | |
| 0.3386, | |
| 0.3463, | |
| 0.02, | |
| 0.3089, | |
| 0.8544, | |
| 0.355, | |
| 0.7709 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 32, | |
| "score": 0.5881, | |
| "total_reward": 14.703, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.743, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.8844, | |
| 0.3622, | |
| 0.9078, | |
| 0.9306, | |
| 0.02, | |
| 0.8689, | |
| 0.8967, | |
| 0.8944, | |
| 0.8722, | |
| 0.8878, | |
| 0.8856, | |
| 0.9228, | |
| 0.0819, | |
| 0.2967, | |
| 0.3244, | |
| 0.8722, | |
| 0.02, | |
| 0.3356, | |
| 0.3011, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 36, | |
| "score": 0.6249, | |
| "total_reward": 14.9974, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.716, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.8933, | |
| 0.3411, | |
| 0.9189, | |
| 0.8644, | |
| 0.9122, | |
| 0.8756, | |
| 0.02, | |
| 0.3441, | |
| 0.3189, | |
| 0.3167, | |
| 0.8622, | |
| 0.8878, | |
| 0.8511, | |
| 0.8789, | |
| 0.8244, | |
| 0.8878, | |
| 0.2856, | |
| 0.0663, | |
| 0.8589, | |
| 0.8961, | |
| 0.8772, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 38, | |
| "score": 0.7872, | |
| "total_reward": 18.1053, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.64, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8911, | |
| 0.8667, | |
| 0.9122, | |
| 0.3278, | |
| 0.9056, | |
| 0.8689, | |
| 0.02, | |
| 0.8622, | |
| 0.8378, | |
| 0.8533, | |
| 0.8289, | |
| 0.8767, | |
| 0.8722, | |
| 0.895, | |
| 0.8856, | |
| 0.8633, | |
| 0.8611, | |
| 0.9017, | |
| 0.8939, | |
| 0.3, | |
| 0.8081 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 35, | |
| "score": 0.6988, | |
| "total_reward": 18.1679, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.661, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8733, | |
| 0.8889, | |
| 0.8844, | |
| 0.3322, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.8989, | |
| 0.8622, | |
| 0.89, | |
| 0.3408, | |
| 0.8856, | |
| 0.8833, | |
| 0.3011, | |
| 0.8267, | |
| 0.8372, | |
| 0.87, | |
| 0.0708, | |
| 0.8833, | |
| 0.2811, | |
| 0.8839, | |
| 0.8544, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 38, | |
| "score": 0.68, | |
| "total_reward": 19.0388, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.774, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.9256, | |
| 0.9233, | |
| 0.8689, | |
| 0.3667, | |
| 0.3644, | |
| 0.8922, | |
| 0.935, | |
| 0.8556, | |
| 0.3533, | |
| 0.3211, | |
| 0.3189, | |
| 0.8944, | |
| 0.89, | |
| 0.3378, | |
| 0.8333, | |
| 0.8789, | |
| 0.8767, | |
| 0.8744, | |
| 0.2922, | |
| 0.333, | |
| 0.8656, | |
| 0.8261, | |
| 0.8567, | |
| 0.3, | |
| 0.8478, | |
| 0.8066 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 33, | |
| "score": 0.5966, | |
| "total_reward": 15.511, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.691, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3044, | |
| 0.02, | |
| 0.935, | |
| 0.02, | |
| 0.3256, | |
| 0.8711, | |
| 0.8967, | |
| 0.3444, | |
| 0.02, | |
| 0.89, | |
| 0.8556, | |
| 0.8833, | |
| 0.3311, | |
| 0.3289, | |
| 0.8744, | |
| 0.8878, | |
| 0.8633, | |
| 0.3141, | |
| 0.8589, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 35, | |
| "score": 0.5891, | |
| "total_reward": 16.4939, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.795, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.02, | |
| 0.3433, | |
| 0.3319, | |
| 0.8644, | |
| 0.3622, | |
| 0.91, | |
| 0.3286, | |
| 0.9033, | |
| 0.9011, | |
| 0.8789, | |
| 0.3697, | |
| 0.9194, | |
| 0.84, | |
| 0.8856, | |
| 0.8811, | |
| 0.8789, | |
| 0.3597, | |
| 0.2692, | |
| 0.87, | |
| 0.2878, | |
| 0.8656, | |
| 0.0663, | |
| 0.8239, | |
| 0.8817, | |
| 0.02, | |
| 0.4835 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 35, | |
| "score": 0.6548, | |
| "total_reward": 16.3705, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.573, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.02, | |
| 0.9167, | |
| 0.3644, | |
| 0.3622, | |
| 0.91, | |
| 0.8556, | |
| 0.9033, | |
| 0.8689, | |
| 0.8967, | |
| 0.8922, | |
| 0.915, | |
| 0.02, | |
| 0.8533, | |
| 0.8789, | |
| 0.2967, | |
| 0.3422, | |
| 0.3078, | |
| 0.8656, | |
| 0.8611, | |
| 0.2789, | |
| 0.3297, | |
| 0.7281 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 38, | |
| "score": 0.6912, | |
| "total_reward": 17.2799, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.834, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3156, | |
| 0.9211, | |
| 0.9189, | |
| 0.9144, | |
| 0.9122, | |
| 0.33, | |
| 0.02, | |
| 0.9133, | |
| 0.9261, | |
| 0.8967, | |
| 0.8422, | |
| 0.89, | |
| 0.8356, | |
| 0.8511, | |
| 0.8789, | |
| 0.8922, | |
| 0.87, | |
| 0.3178, | |
| 0.8811, | |
| 0.8589, | |
| 0.8544, | |
| 0.87, | |
| 0.3108, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 44, | |
| "score": 0.6149, | |
| "total_reward": 19.0606, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.859, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.8711, | |
| 0.8667, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.9033, | |
| 0.3281, | |
| 0.3519, | |
| 0.3467, | |
| 0.8922, | |
| 0.89, | |
| 0.3608, | |
| 0.8856, | |
| 0.8833, | |
| 0.8289, | |
| 0.2714, | |
| 0.87, | |
| 0.3508, | |
| 0.8656, | |
| 0.3089, | |
| 0.0597, | |
| 0.8172, | |
| 0.053, | |
| 0.8728, | |
| 0.8083, | |
| 0.3439, | |
| 0.2567, | |
| 0.5146 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 30, | |
| "score": 0.7401, | |
| "total_reward": 15.543, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.8911, | |
| 0.8867, | |
| 0.9144, | |
| 0.3322, | |
| 0.9078, | |
| 0.9056, | |
| 0.8711, | |
| 0.8467, | |
| 0.8922, | |
| 0.89, | |
| 0.02, | |
| 0.8856, | |
| 0.8811, | |
| 0.8589, | |
| 0.8744, | |
| 0.87, | |
| 0.8856, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 45, | |
| "score": 0.5529, | |
| "total_reward": 16.5871, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.02, | |
| 0.9311, | |
| 0.8844, | |
| 0.3622, | |
| 0.36, | |
| 0.9078, | |
| 0.2933, | |
| 0.8689, | |
| 0.9217, | |
| 0.3444, | |
| 0.32, | |
| 0.8556, | |
| 0.3033, | |
| 0.3011, | |
| 0.8767, | |
| 0.3022, | |
| 0.87, | |
| 0.8678, | |
| 0.8811, | |
| 0.0619, | |
| 0.8544, | |
| 0.3022, | |
| 0.8478, | |
| 0.8083, | |
| 0.3119, | |
| 0.0397, | |
| 0.8322, | |
| 0.4222 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 31, | |
| "score": 0.7921, | |
| "total_reward": 18.2193, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.847, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.3481, | |
| 0.9439, | |
| 0.9167, | |
| 0.9144, | |
| 0.86, | |
| 0.9078, | |
| 0.9033, | |
| 0.9261, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.02, | |
| 0.9228, | |
| 0.0819, | |
| 0.8744, | |
| 0.87, | |
| 0.8833, | |
| 0.7254 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 45, | |
| "score": 0.6293, | |
| "total_reward": 16.3622, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.813, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3156, | |
| 0.9483, | |
| 0.9461, | |
| 0.3197, | |
| 0.9072, | |
| 0.8756, | |
| 0.9033, | |
| 0.3541, | |
| 0.02, | |
| 0.9044, | |
| 0.02, | |
| 0.3608, | |
| 0.8511, | |
| 0.2967, | |
| 0.8722, | |
| 0.8356, | |
| 0.8111, | |
| 0.2867, | |
| 0.8544, | |
| 0.845, | |
| 0.3156, | |
| 0.8061, | |
| 0.8367, | |
| 0.7972, | |
| 0.7658 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 34, | |
| "score": 0.6745, | |
| "total_reward": 16.8613, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.809, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8933, | |
| 0.9461, | |
| 0.9189, | |
| 0.8644, | |
| 0.9122, | |
| 0.91, | |
| 0.8556, | |
| 0.8711, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.8922, | |
| 0.8378, | |
| 0.3356, | |
| 0.8833, | |
| 0.8489, | |
| 0.0797, | |
| 0.3244, | |
| 0.3222, | |
| 0.8878, | |
| 0.8906, | |
| 0.9061, | |
| 0.2967, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 43, | |
| "score": 0.5813, | |
| "total_reward": 17.4397, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.815, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8933, | |
| 0.9211, | |
| 0.9439, | |
| 0.9167, | |
| 0.9144, | |
| 0.02, | |
| 0.3308, | |
| 0.8711, | |
| 0.8467, | |
| 0.8922, | |
| 0.8878, | |
| 0.3486, | |
| 0.3033, | |
| 0.2759, | |
| 0.8767, | |
| 0.3244, | |
| 0.3452, | |
| 0.29, | |
| 0.8156, | |
| 0.8633, | |
| 0.2889, | |
| 0.0597, | |
| 0.8544, | |
| 0.3372, | |
| 0.8478, | |
| 0.2956, | |
| 0.2811, | |
| 0.2889, | |
| 0.4707 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 40, | |
| "score": 0.5674, | |
| "total_reward": 15.3205, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.25, | |
| "trust_calibration": 0.816, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 3, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9206, | |
| 0.3433, | |
| 0.3541, | |
| 0.9189, | |
| 0.9244, | |
| 0.3622, | |
| 0.9078, | |
| 0.9133, | |
| 0.8689, | |
| 0.8644, | |
| 0.885, | |
| 0.02, | |
| 0.8533, | |
| 0.9061, | |
| 0.8267, | |
| 0.9139, | |
| 0.073, | |
| 0.3356, | |
| 0.3441, | |
| 0.0619, | |
| 0.8994, | |
| 0.2722, | |
| 0.323, | |
| 0.3308, | |
| 0.8433, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 32, | |
| "score": 0.6685, | |
| "total_reward": 16.0443, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.793, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9211, | |
| 0.9189, | |
| 0.8844, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3763, | |
| 0.02, | |
| 0.3167, | |
| 0.8944, | |
| 0.89, | |
| 0.9128, | |
| 0.8856, | |
| 0.8461, | |
| 0.8589, | |
| 0.3267, | |
| 0.8372, | |
| 0.9128, | |
| 0.2933, | |
| 0.3461, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 40, | |
| "score": 0.6868, | |
| "total_reward": 17.8577, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.907, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.3411, | |
| 0.9189, | |
| 0.02, | |
| 0.8822, | |
| 0.9028, | |
| 0.02, | |
| 0.8511, | |
| 0.8989, | |
| 0.3467, | |
| 0.3444, | |
| 0.8578, | |
| 0.8856, | |
| 0.8511, | |
| 0.2989, | |
| 0.9094, | |
| 0.87, | |
| 0.8856, | |
| 0.3641, | |
| 0.8567, | |
| 0.3572, | |
| 0.8895, | |
| 0.8083, | |
| 0.8353 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 30, | |
| "steps": 28, | |
| "score": 0.4767, | |
| "total_reward": 10.4876, | |
| "completion_rate": 0.5, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.745, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 3, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9233, | |
| 0.02, | |
| 0.9189, | |
| 0.02, | |
| 0.9394, | |
| 0.02, | |
| 0.9078, | |
| 0.8533, | |
| 0.9011, | |
| 0.02, | |
| 0.8967, | |
| 0.3144, | |
| 0.2878, | |
| 0.8533, | |
| 0.3441, | |
| 0.0819, | |
| 0.2967, | |
| 0.0774, | |
| 0.835, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 31, | |
| "steps": 33, | |
| "score": 0.7077, | |
| "total_reward": 19.8143, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.68, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9256, | |
| 0.9483, | |
| 0.3711, | |
| 0.9189, | |
| 0.3344, | |
| 0.9122, | |
| 0.91, | |
| 0.02, | |
| 0.3556, | |
| 0.9033, | |
| 0.3211, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.89, | |
| 0.3456, | |
| 0.8833, | |
| 0.8989, | |
| 0.0797, | |
| 0.8372, | |
| 0.29, | |
| 0.9073, | |
| 0.8633, | |
| 0.8611, | |
| 0.8589, | |
| 0.679 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 32, | |
| "steps": 40, | |
| "score": 0.5915, | |
| "total_reward": 18.3369, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.392, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.9211, | |
| 0.3689, | |
| 0.3444, | |
| 0.3622, | |
| 0.33, | |
| 0.9328, | |
| 0.9056, | |
| 0.3311, | |
| 0.8667, | |
| 0.3214, | |
| 0.8922, | |
| 0.8878, | |
| 0.8856, | |
| 0.3033, | |
| 0.9061, | |
| 0.8789, | |
| 0.8944, | |
| 0.2922, | |
| 0.323, | |
| 0.2956, | |
| 0.3133, | |
| 0.0641, | |
| 0.8839, | |
| 0.8744, | |
| 0.3152, | |
| 0.3178, | |
| 0.3133, | |
| 0.3715 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 33, | |
| "steps": 41, | |
| "score": 0.59, | |
| "total_reward": 15.3412, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.6667, | |
| "trust_calibration": 0.72, | |
| "adversarial_detections": 2, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8911, | |
| 0.9439, | |
| 0.8644, | |
| 0.02, | |
| 0.8578, | |
| 0.02, | |
| 0.9033, | |
| 0.8489, | |
| 0.8967, | |
| 0.3444, | |
| 0.287, | |
| 0.8556, | |
| 0.8311, | |
| 0.3497, | |
| 0.8722, | |
| 0.073, | |
| 0.02, | |
| 0.905, | |
| 0.9006, | |
| 0.8544, | |
| 0.3022, | |
| 0.3308, | |
| 0.3186, | |
| 0.6338 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 34, | |
| "steps": 35, | |
| "score": 0.6414, | |
| "total_reward": 16.0344, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.865, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.8689, | |
| 0.02, | |
| 0.3344, | |
| 0.3392, | |
| 0.353, | |
| 0.9056, | |
| 0.3303, | |
| 0.8667, | |
| 0.8622, | |
| 0.89, | |
| 0.8556, | |
| 0.8511, | |
| 0.8767, | |
| 0.8744, | |
| 0.8722, | |
| 0.32, | |
| 0.8678, | |
| 0.8456, | |
| 0.8611, | |
| 0.8589, | |
| 0.2944, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 35, | |
| "steps": 39, | |
| "score": 0.5508, | |
| "total_reward": 15.9746, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.645, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3133, | |
| 0.3411, | |
| 0.9167, | |
| 0.8822, | |
| 0.3056, | |
| 0.8511, | |
| 0.8989, | |
| 0.3497, | |
| 0.9194, | |
| 0.3192, | |
| 0.34, | |
| 0.02, | |
| 0.8856, | |
| 0.3033, | |
| 0.2989, | |
| 0.8767, | |
| 0.8744, | |
| 0.2922, | |
| 0.02, | |
| 0.8478, | |
| 0.3386, | |
| 0.8789, | |
| 0.8817, | |
| 0.8172, | |
| 0.8895, | |
| 0.3286, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 36, | |
| "steps": 31, | |
| "score": 0.5216, | |
| "total_reward": 12.5188, | |
| "completion_rate": 0.55, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.506, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.02, | |
| 0.8711, | |
| 0.3519, | |
| 0.3367, | |
| 0.3422, | |
| 0.91, | |
| 0.9328, | |
| 0.02, | |
| 0.8711, | |
| 0.8467, | |
| 0.8922, | |
| 0.34, | |
| 0.3078, | |
| 0.8856, | |
| 0.3333, | |
| 0.3311, | |
| 0.02, | |
| 0.8744, | |
| 0.2922, | |
| 0.9095, | |
| 0.8283, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 37, | |
| "steps": 38, | |
| "score": 0.5875, | |
| "total_reward": 16.449, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.866, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.9211, | |
| 0.3689, | |
| 0.02, | |
| 0.9122, | |
| 0.8578, | |
| 0.9033, | |
| 0.3211, | |
| 0.8989, | |
| 0.9217, | |
| 0.86, | |
| 0.3508, | |
| 0.02, | |
| 0.3563, | |
| 0.3641, | |
| 0.8789, | |
| 0.8394, | |
| 0.31, | |
| 0.0708, | |
| 0.3156, | |
| 0.3011, | |
| 0.8984, | |
| 0.3222, | |
| 0.335, | |
| 0.8278, | |
| 0.627 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 38, | |
| "steps": 35, | |
| "score": 0.7022, | |
| "total_reward": 18.2564, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.441, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.8889, | |
| 0.9167, | |
| 0.9072, | |
| 0.8778, | |
| 0.02, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.3574, | |
| 0.84, | |
| 0.0908, | |
| 0.8833, | |
| 0.3311, | |
| 0.8417, | |
| 0.8744, | |
| 0.8722, | |
| 0.8678, | |
| 0.3486, | |
| 0.3133, | |
| 0.3289, | |
| 0.4474 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 39, | |
| "steps": 33, | |
| "score": 0.7547, | |
| "total_reward": 18.1121, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.604, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9233, | |
| 0.9139, | |
| 0.9167, | |
| 0.8822, | |
| 0.02, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.3189, | |
| 0.8444, | |
| 0.3122, | |
| 0.8578, | |
| 0.9106, | |
| 0.8833, | |
| 0.8489, | |
| 0.9161, | |
| 0.835, | |
| 0.8928, | |
| 0.9083, | |
| 0.3141, | |
| 0.3219, | |
| 0.7927 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 40, | |
| "steps": 37, | |
| "score": 0.5943, | |
| "total_reward": 15.4518, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.636, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9233, | |
| 0.8889, | |
| 0.9167, | |
| 0.3574, | |
| 0.8778, | |
| 0.8533, | |
| 0.9011, | |
| 0.8989, | |
| 0.8622, | |
| 0.34, | |
| 0.8878, | |
| 0.02, | |
| 0.2711, | |
| 0.8267, | |
| 0.3244, | |
| 0.33, | |
| 0.8356, | |
| 0.8633, | |
| 0.2811, | |
| 0.0619, | |
| 0.8567, | |
| 0.8172, | |
| 0.27, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 41, | |
| "steps": 38, | |
| "score": 0.7027, | |
| "total_reward": 18.9728, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.857, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.8733, | |
| 0.9461, | |
| 0.9167, | |
| 0.8622, | |
| 0.337, | |
| 0.8756, | |
| 0.3233, | |
| 0.02, | |
| 0.8967, | |
| 0.8944, | |
| 0.32, | |
| 0.8878, | |
| 0.8511, | |
| 0.8789, | |
| 0.8422, | |
| 0.87, | |
| 0.8678, | |
| 0.8833, | |
| 0.3341, | |
| 0.8567, | |
| 0.2744, | |
| 0.3352, | |
| 0.85, | |
| 0.3328, | |
| 0.8448 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 42, | |
| "steps": 34, | |
| "score": 0.7826, | |
| "total_reward": 18.7831, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.768, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.8911, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8967, | |
| 0.8622, | |
| 0.8578, | |
| 0.8656, | |
| 0.02, | |
| 0.3311, | |
| 0.8789, | |
| 0.3267, | |
| 0.9139, | |
| 0.8678, | |
| 0.3286, | |
| 0.9061, | |
| 0.8217, | |
| 0.849 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 43, | |
| "steps": 34, | |
| "score": 0.6248, | |
| "total_reward": 16.2447, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.572, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.02, | |
| 0.8911, | |
| 0.9189, | |
| 0.3667, | |
| 0.9122, | |
| 0.91, | |
| 0.8556, | |
| 0.3533, | |
| 0.8489, | |
| 0.8967, | |
| 0.8944, | |
| 0.3422, | |
| 0.34, | |
| 0.8856, | |
| 0.8833, | |
| 0.3011, | |
| 0.8417, | |
| 0.8744, | |
| 0.8722, | |
| 0.02, | |
| 0.8833, | |
| 0.2811, | |
| 0.8567, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 44, | |
| "steps": 41, | |
| "score": 0.6806, | |
| "total_reward": 19.7386, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.81, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9483, | |
| 0.3411, | |
| 0.9189, | |
| 0.3667, | |
| 0.8822, | |
| 0.8778, | |
| 0.9033, | |
| 0.9011, | |
| 0.8667, | |
| 0.3214, | |
| 0.8922, | |
| 0.34, | |
| 0.8878, | |
| 0.3586, | |
| 0.8811, | |
| 0.8267, | |
| 0.8744, | |
| 0.31, | |
| 0.9106, | |
| 0.8789, | |
| 0.8567, | |
| 0.8522, | |
| 0.8895, | |
| 0.2656, | |
| 0.02, | |
| 0.3261, | |
| 0.8264 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 45, | |
| "steps": 35, | |
| "score": 0.8133, | |
| "total_reward": 19.519, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.742, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.8867, | |
| 0.8622, | |
| 0.8578, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8422, | |
| 0.89, | |
| 0.8556, | |
| 0.3033, | |
| 0.8789, | |
| 0.2967, | |
| 0.9139, | |
| 0.9128, | |
| 0.9083, | |
| 0.2811, | |
| 0.8567, | |
| 0.8578 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 46, | |
| "steps": 36, | |
| "score": 0.7646, | |
| "total_reward": 19.8787, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.813, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9233, | |
| 0.9461, | |
| 0.9439, | |
| 0.9167, | |
| 0.9144, | |
| 0.88, | |
| 0.9328, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.3167, | |
| 0.8922, | |
| 0.317, | |
| 0.8856, | |
| 0.3363, | |
| 0.3089, | |
| 0.8767, | |
| 0.8372, | |
| 0.32, | |
| 0.8856, | |
| 0.9061, | |
| 0.8567, | |
| 0.8559 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 47, | |
| "steps": 31, | |
| "score": 0.7026, | |
| "total_reward": 16.8629, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.89, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.9233, | |
| 0.9189, | |
| 0.8844, | |
| 0.02, | |
| 0.91, | |
| 0.8733, | |
| 0.8489, | |
| 0.8967, | |
| 0.8944, | |
| 0.9172, | |
| 0.89, | |
| 0.3256, | |
| 0.8833, | |
| 0.3311, | |
| 0.3319, | |
| 0.0797, | |
| 0.0774, | |
| 0.87, | |
| 0.8678, | |
| 0.8283, | |
| 0.5511 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 48, | |
| "steps": 40, | |
| "score": 0.6251, | |
| "total_reward": 17.5018, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.739, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.8867, | |
| 0.9144, | |
| 0.3622, | |
| 0.91, | |
| 0.3278, | |
| 0.9056, | |
| 0.3763, | |
| 0.3211, | |
| 0.3489, | |
| 0.3544, | |
| 0.8922, | |
| 0.373, | |
| 0.8533, | |
| 0.8489, | |
| 0.8744, | |
| 0.82, | |
| 0.8156, | |
| 0.8633, | |
| 0.8411, | |
| 0.9017, | |
| 0.3394, | |
| 0.85, | |
| 0.3328, | |
| 0.3133, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 49, | |
| "steps": 34, | |
| "score": 0.7592, | |
| "total_reward": 18.2216, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.658, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9211, | |
| 0.8867, | |
| 0.9144, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.9011, | |
| 0.8667, | |
| 0.8944, | |
| 0.9172, | |
| 0.31, | |
| 0.02, | |
| 0.8833, | |
| 0.3489, | |
| 0.8744, | |
| 0.0752, | |
| 0.8678, | |
| 0.8656, | |
| 0.8633, | |
| 0.8589, | |
| 0.5214 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 50, | |
| "steps": 31, | |
| "score": 0.5546, | |
| "total_reward": 13.3116, | |
| "completion_rate": 0.65, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.768, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.8867, | |
| 0.3344, | |
| 0.9122, | |
| 0.91, | |
| 0.3356, | |
| 0.02, | |
| 0.9011, | |
| 0.02, | |
| 0.02, | |
| 0.8944, | |
| 0.84, | |
| 0.8556, | |
| 0.8511, | |
| 0.9039, | |
| 0.2967, | |
| 0.3422, | |
| 0.87, | |
| 0.8928, | |
| 0.0686, | |
| 0.2833, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 51, | |
| "steps": 35, | |
| "score": 0.6911, | |
| "total_reward": 16.5857, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.634, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9206, | |
| 0.9233, | |
| 0.9139, | |
| 0.8644, | |
| 0.9122, | |
| 0.8778, | |
| 0.9056, | |
| 0.3311, | |
| 0.8967, | |
| 0.3222, | |
| 0.8378, | |
| 0.8856, | |
| 0.8833, | |
| 0.9061, | |
| 0.8767, | |
| 0.9172, | |
| 0.073, | |
| 0.3178, | |
| 0.02, | |
| 0.8633, | |
| 0.02, | |
| 0.8217, | |
| 0.4783 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 52, | |
| "steps": 39, | |
| "score": 0.6835, | |
| "total_reward": 19.1391, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.96, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3608, | |
| 0.9506, | |
| 0.9233, | |
| 0.9211, | |
| 0.3144, | |
| 0.9122, | |
| 0.337, | |
| 0.9056, | |
| 0.3233, | |
| 0.9011, | |
| 0.8989, | |
| 0.8944, | |
| 0.84, | |
| 0.8356, | |
| 0.3333, | |
| 0.8811, | |
| 0.8789, | |
| 0.8644, | |
| 0.8722, | |
| 0.8878, | |
| 0.8811, | |
| 0.8567, | |
| 0.2744, | |
| 0.8772, | |
| 0.27, | |
| 0.8873, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 53, | |
| "steps": 38, | |
| "score": 0.636, | |
| "total_reward": 16.5363, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.643, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9483, | |
| 0.8689, | |
| 0.9167, | |
| 0.8822, | |
| 0.8578, | |
| 0.8733, | |
| 0.8489, | |
| 0.3244, | |
| 0.8922, | |
| 0.34, | |
| 0.3378, | |
| 0.8856, | |
| 0.02, | |
| 0.8789, | |
| 0.9017, | |
| 0.3174, | |
| 0.02, | |
| 0.3378, | |
| 0.8656, | |
| 0.2833, | |
| 0.0641, | |
| 0.8217, | |
| 0.8522, | |
| 0.4564 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 54, | |
| "steps": 36, | |
| "score": 0.6308, | |
| "total_reward": 16.4, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.696, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.9233, | |
| 0.9211, | |
| 0.8667, | |
| 0.3252, | |
| 0.9078, | |
| 0.9056, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.3474, | |
| 0.84, | |
| 0.8878, | |
| 0.8833, | |
| 0.3089, | |
| 0.8744, | |
| 0.87, | |
| 0.3308, | |
| 0.8811, | |
| 0.0619, | |
| 0.02, | |
| 0.3274, | |
| 0.8522, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 55, | |
| "steps": 32, | |
| "score": 0.5079, | |
| "total_reward": 13.2064, | |
| "completion_rate": 0.6, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.674, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 3, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.02, | |
| 0.9256, | |
| 0.3241, | |
| 0.9189, | |
| 0.8644, | |
| 0.3322, | |
| 0.33, | |
| 0.9156, | |
| 0.02, | |
| 0.3511, | |
| 0.02, | |
| 0.8622, | |
| 0.8878, | |
| 0.3126, | |
| 0.8833, | |
| 0.8811, | |
| 0.8789, | |
| 0.8767, | |
| 0.3244, | |
| 0.8722, | |
| 0.333, | |
| 0.0708, | |
| 0.0686, | |
| 0.8611, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 56, | |
| "steps": 32, | |
| "score": 0.5675, | |
| "total_reward": 14.1867, | |
| "completion_rate": 0.65, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.73, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3608, | |
| 0.9256, | |
| 0.02, | |
| 0.8867, | |
| 0.9144, | |
| 0.9122, | |
| 0.02, | |
| 0.3578, | |
| 0.9056, | |
| 0.9033, | |
| 0.8667, | |
| 0.8744, | |
| 0.3422, | |
| 0.363, | |
| 0.9128, | |
| 0.8833, | |
| 0.8489, | |
| 0.3774, | |
| 0.3222, | |
| 0.355, | |
| 0.8678, | |
| 0.0686, | |
| 0.8261, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 57, | |
| "steps": 37, | |
| "score": 0.5298, | |
| "total_reward": 13.7744, | |
| "completion_rate": 0.6, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.716, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.9189, | |
| 0.3667, | |
| 0.3644, | |
| 0.3622, | |
| 0.343, | |
| 0.9056, | |
| 0.02, | |
| 0.8667, | |
| 0.8922, | |
| 0.8878, | |
| 0.8856, | |
| 0.02, | |
| 0.8289, | |
| 0.8922, | |
| 0.8678, | |
| 0.2933, | |
| 0.3141, | |
| 0.0619, | |
| 0.3197, | |
| 0.2744, | |
| 0.8772, | |
| 0.3, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 58, | |
| "steps": 37, | |
| "score": 0.6112, | |
| "total_reward": 17.1148, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.808, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.3433, | |
| 0.3167, | |
| 0.02, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.2911, | |
| 0.3259, | |
| 0.3467, | |
| 0.8422, | |
| 0.8878, | |
| 0.3163, | |
| 0.8811, | |
| 0.8789, | |
| 0.9194, | |
| 0.8522, | |
| 0.8856, | |
| 0.0663, | |
| 0.3341, | |
| 0.8567, | |
| 0.3044, | |
| 0.3022, | |
| 0.3, | |
| 0.4822 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 59, | |
| "steps": 39, | |
| "score": 0.6007, | |
| "total_reward": 16.2199, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.55, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3533, | |
| 0.9139, | |
| 0.9167, | |
| 0.9122, | |
| 0.33, | |
| 0.8756, | |
| 0.02, | |
| 0.02, | |
| 0.8467, | |
| 0.8944, | |
| 0.86, | |
| 0.3378, | |
| 0.3063, | |
| 0.8811, | |
| 0.8789, | |
| 0.8444, | |
| 0.8722, | |
| 0.32, | |
| 0.2956, | |
| 0.0663, | |
| 0.2811, | |
| 0.8839, | |
| 0.8722, | |
| 0.2878, | |
| 0.4134 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 60, | |
| "steps": 39, | |
| "score": 0.7149, | |
| "total_reward": 17.1579, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.717, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.9211, | |
| 0.02, | |
| 0.3044, | |
| 0.92, | |
| 0.9056, | |
| 0.3233, | |
| 0.3211, | |
| 0.8967, | |
| 0.9194, | |
| 0.89, | |
| 0.3056, | |
| 0.8833, | |
| 0.02, | |
| 0.8767, | |
| 0.8994, | |
| 0.87, | |
| 0.8306, | |
| 0.8789, | |
| 0.8194, | |
| 0.8678, | |
| 0.7902 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 61, | |
| "steps": 34, | |
| "score": 0.6745, | |
| "total_reward": 17.5378, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.937, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8911, | |
| 0.9439, | |
| 0.9167, | |
| 0.3644, | |
| 0.3322, | |
| 0.9078, | |
| 0.8533, | |
| 0.9011, | |
| 0.9239, | |
| 0.8944, | |
| 0.8922, | |
| 0.8828, | |
| 0.3356, | |
| 0.3333, | |
| 0.8967, | |
| 0.0774, | |
| 0.8722, | |
| 0.8678, | |
| 0.8906, | |
| 0.0663, | |
| 0.02, | |
| 0.2789, | |
| 0.8567, | |
| 0.5562 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 62, | |
| "steps": 38, | |
| "score": 0.5955, | |
| "total_reward": 16.078, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.583, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8733, | |
| 0.8689, | |
| 0.9144, | |
| 0.3622, | |
| 0.02, | |
| 0.8556, | |
| 0.9033, | |
| 0.9011, | |
| 0.02, | |
| 0.3544, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.8333, | |
| 0.3089, | |
| 0.0797, | |
| 0.3022, | |
| 0.3378, | |
| 0.8633, | |
| 0.8789, | |
| 0.3297, | |
| 0.8794, | |
| 0.3022, | |
| 0.8478, | |
| 0.4237 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 63, | |
| "steps": 35, | |
| "score": 0.6184, | |
| "total_reward": 16.0796, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.677, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.8944, | |
| 0.3, | |
| 0.9078, | |
| 0.9306, | |
| 0.9033, | |
| 0.02, | |
| 0.8667, | |
| 0.3122, | |
| 0.89, | |
| 0.8878, | |
| 0.9106, | |
| 0.8811, | |
| 0.8789, | |
| 0.8744, | |
| 0.3222, | |
| 0.3308, | |
| 0.2933, | |
| 0.2811, | |
| 0.3119, | |
| 0.8961, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 64, | |
| "steps": 43, | |
| "score": 0.6578, | |
| "total_reward": 19.7347, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.572, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3367, | |
| 0.8822, | |
| 0.33, | |
| 0.9056, | |
| 0.3463, | |
| 0.8489, | |
| 0.3167, | |
| 0.02, | |
| 0.3422, | |
| 0.31, | |
| 0.3086, | |
| 0.8311, | |
| 0.3367, | |
| 0.8744, | |
| 0.8722, | |
| 0.9028, | |
| 0.8656, | |
| 0.3263, | |
| 0.8611, | |
| 0.8589, | |
| 0.8744, | |
| 0.87, | |
| 0.8106, | |
| 0.8061, | |
| 0.2889, | |
| 0.7624 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 65, | |
| "steps": 34, | |
| "score": 0.6769, | |
| "total_reward": 16.2452, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.879, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.8933, | |
| 0.9461, | |
| 0.3689, | |
| 0.8844, | |
| 0.86, | |
| 0.8556, | |
| 0.9283, | |
| 0.3511, | |
| 0.8989, | |
| 0.3244, | |
| 0.8978, | |
| 0.8856, | |
| 0.8833, | |
| 0.3011, | |
| 0.8767, | |
| 0.8544, | |
| 0.8878, | |
| 0.8656, | |
| 0.3011, | |
| 0.2789, | |
| 0.8817, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 66, | |
| "steps": 34, | |
| "score": 0.5832, | |
| "total_reward": 15.7473, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.662, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.9211, | |
| 0.9167, | |
| 0.3644, | |
| 0.91, | |
| 0.8756, | |
| 0.9033, | |
| 0.3511, | |
| 0.8989, | |
| 0.02, | |
| 0.8944, | |
| 0.86, | |
| 0.02, | |
| 0.3063, | |
| 0.8789, | |
| 0.8767, | |
| 0.0774, | |
| 0.02, | |
| 0.8878, | |
| 0.8906, | |
| 0.0663, | |
| 0.0641, | |
| 0.8589, | |
| 0.3067, | |
| 0.4526 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 67, | |
| "steps": 34, | |
| "score": 0.5986, | |
| "total_reward": 14.3671, | |
| "completion_rate": 0.7, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.891, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.02, | |
| 0.9233, | |
| 0.3489, | |
| 0.9144, | |
| 0.3622, | |
| 0.36, | |
| 0.9328, | |
| 0.3003, | |
| 0.9011, | |
| 0.8667, | |
| 0.8922, | |
| 0.8378, | |
| 0.02, | |
| 0.8833, | |
| 0.8489, | |
| 0.9194, | |
| 0.89, | |
| 0.8678, | |
| 0.8811, | |
| 0.0619, | |
| 0.3397, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 68, | |
| "steps": 33, | |
| "score": 0.7306, | |
| "total_reward": 18.2654, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.684, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.3481, | |
| 0.9167, | |
| 0.3644, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9033, | |
| 0.3511, | |
| 0.8989, | |
| 0.3297, | |
| 0.8922, | |
| 0.8878, | |
| 0.02, | |
| 0.02, | |
| 0.8811, | |
| 0.9039, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.8656, | |
| 0.8611, | |
| 0.8126 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 69, | |
| "steps": 40, | |
| "score": 0.7144, | |
| "total_reward": 18.574, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.723, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9506, | |
| 0.9233, | |
| 0.9189, | |
| 0.9167, | |
| 0.3422, | |
| 0.8756, | |
| 0.9283, | |
| 0.9011, | |
| 0.8989, | |
| 0.02, | |
| 0.3122, | |
| 0.8556, | |
| 0.8833, | |
| 0.8467, | |
| 0.8222, | |
| 0.8678, | |
| 0.8656, | |
| 0.3133, | |
| 0.8239, | |
| 0.0597, | |
| 0.8544, | |
| 0.87, | |
| 0.8106, | |
| 0.5244 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 70, | |
| "steps": 34, | |
| "score": 0.6867, | |
| "total_reward": 17.1674, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.786, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.02, | |
| 0.8889, | |
| 0.9144, | |
| 0.33, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.8489, | |
| 0.3497, | |
| 0.8944, | |
| 0.8922, | |
| 0.8828, | |
| 0.8856, | |
| 0.8833, | |
| 0.8789, | |
| 0.3774, | |
| 0.87, | |
| 0.3706, | |
| 0.0663, | |
| 0.8239, | |
| 0.8567, | |
| 0.5184 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 71, | |
| "steps": 29, | |
| "score": 0.5734, | |
| "total_reward": 14.3348, | |
| "completion_rate": 0.65, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.702, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 3, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.8911, | |
| 0.3459, | |
| 0.9167, | |
| 0.3644, | |
| 0.88, | |
| 0.9328, | |
| 0.8711, | |
| 0.8989, | |
| 0.8967, | |
| 0.3574, | |
| 0.3422, | |
| 0.89, | |
| 0.8856, | |
| 0.02, | |
| 0.8811, | |
| 0.0819, | |
| 0.3267, | |
| 0.3244, | |
| 0.8722, | |
| 0.353, | |
| 0.0708, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 72, | |
| "steps": 30, | |
| "score": 0.7073, | |
| "total_reward": 16.2683, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.584, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9483, | |
| 0.3489, | |
| 0.8844, | |
| 0.91, | |
| 0.9078, | |
| 0.9306, | |
| 0.9283, | |
| 0.9011, | |
| 0.3259, | |
| 0.8644, | |
| 0.84, | |
| 0.8878, | |
| 0.9106, | |
| 0.3033, | |
| 0.8811, | |
| 0.9039, | |
| 0.8394, | |
| 0.87, | |
| 0.02, | |
| 0.8906, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 73, | |
| "steps": 34, | |
| "score": 0.7782, | |
| "total_reward": 18.6761, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.803, | |
| "adversarial_detections": 2, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.9189, | |
| 0.9167, | |
| 0.3644, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3533, | |
| 0.02, | |
| 0.8467, | |
| 0.8944, | |
| 0.28, | |
| 0.8878, | |
| 0.8333, | |
| 0.8811, | |
| 0.8267, | |
| 0.8372, | |
| 0.8328, | |
| 0.8656, | |
| 0.8883, | |
| 0.9006, | |
| 0.8577 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 74, | |
| "steps": 33, | |
| "score": 0.7164, | |
| "total_reward": 17.1935, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.957, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9506, | |
| 0.9233, | |
| 0.9211, | |
| 0.8844, | |
| 0.86, | |
| 0.9056, | |
| 0.3233, | |
| 0.3189, | |
| 0.9217, | |
| 0.8944, | |
| 0.3352, | |
| 0.89, | |
| 0.8856, | |
| 0.8311, | |
| 0.0819, | |
| 0.9161, | |
| 0.8722, | |
| 0.8678, | |
| 0.8906, | |
| 0.8633, | |
| 0.8589, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 75, | |
| "steps": 40, | |
| "score": 0.6588, | |
| "total_reward": 18.4472, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.642, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.8911, | |
| 0.8667, | |
| 0.3344, | |
| 0.86, | |
| 0.3278, | |
| 0.3256, | |
| 0.9033, | |
| 0.3511, | |
| 0.3259, | |
| 0.8444, | |
| 0.89, | |
| 0.8856, | |
| 0.3103, | |
| 0.8811, | |
| 0.8867, | |
| 0.8994, | |
| 0.8722, | |
| 0.3378, | |
| 0.3333, | |
| 0.2811, | |
| 0.9017, | |
| 0.8544, | |
| 0.8917, | |
| 0.2756, | |
| 0.7515 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 76, | |
| "steps": 28, | |
| "score": 0.7202, | |
| "total_reward": 15.8447, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.793, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3526, | |
| 0.9233, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.935, | |
| 0.9078, | |
| 0.8983, | |
| 0.8689, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.8878, | |
| 0.9033, | |
| 0.3189, | |
| 0.8394, | |
| 0.3222, | |
| 0.333, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 77, | |
| "steps": 34, | |
| "score": 0.6731, | |
| "total_reward": 16.8286, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.682, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.02, | |
| 0.9167, | |
| 0.9394, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.8989, | |
| 0.8894, | |
| 0.3422, | |
| 0.89, | |
| 0.2756, | |
| 0.8311, | |
| 0.3467, | |
| 0.2944, | |
| 0.9117, | |
| 0.8678, | |
| 0.3363, | |
| 0.02, | |
| 0.8567, | |
| 0.775 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 78, | |
| "steps": 44, | |
| "score": 0.627, | |
| "total_reward": 18.1844, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.849, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.8933, | |
| 0.3159, | |
| 0.8822, | |
| 0.8578, | |
| 0.9033, | |
| 0.9011, | |
| 0.8917, | |
| 0.8944, | |
| 0.8578, | |
| 0.3356, | |
| 0.8833, | |
| 0.2989, | |
| 0.3267, | |
| 0.3374, | |
| 0.2922, | |
| 0.32, | |
| 0.8156, | |
| 0.8711, | |
| 0.8589, | |
| 0.3022, | |
| 0.3, | |
| 0.0508, | |
| 0.8883, | |
| 0.8661, | |
| 0.3119, | |
| 0.2567, | |
| 0.6445 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 79, | |
| "steps": 37, | |
| "score": 0.6279, | |
| "total_reward": 16.9527, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.561, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9233, | |
| 0.8867, | |
| 0.9144, | |
| 0.9122, | |
| 0.02, | |
| 0.8756, | |
| 0.9011, | |
| 0.8989, | |
| 0.3467, | |
| 0.8622, | |
| 0.89, | |
| 0.8556, | |
| 0.8511, | |
| 0.9039, | |
| 0.3267, | |
| 0.0774, | |
| 0.02, | |
| 0.343, | |
| 0.8656, | |
| 0.3011, | |
| 0.8589, | |
| 0.3067, | |
| 0.8172, | |
| 0.333, | |
| 0.4381 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 80, | |
| "steps": 40, | |
| "score": 0.6766, | |
| "total_reward": 18.9455, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.946, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9483, | |
| 0.3489, | |
| 0.8644, | |
| 0.3622, | |
| 0.91, | |
| 0.9328, | |
| 0.9056, | |
| 0.8511, | |
| 0.3189, | |
| 0.3467, | |
| 0.3144, | |
| 0.32, | |
| 0.8933, | |
| 0.8811, | |
| 0.8267, | |
| 0.8994, | |
| 0.8722, | |
| 0.8678, | |
| 0.0686, | |
| 0.9061, | |
| 0.0619, | |
| 0.8194, | |
| 0.323, | |
| 0.8456, | |
| 0.5627 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 81, | |
| "steps": 33, | |
| "score": 0.7394, | |
| "total_reward": 17.745, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.704, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9233, | |
| 0.9211, | |
| 0.9167, | |
| 0.8822, | |
| 0.91, | |
| 0.9078, | |
| 0.3256, | |
| 0.3533, | |
| 0.9261, | |
| 0.8467, | |
| 0.02, | |
| 0.8922, | |
| 0.3078, | |
| 0.8856, | |
| 0.8833, | |
| 0.0841, | |
| 0.8967, | |
| 0.8744, | |
| 0.87, | |
| 0.9073, | |
| 0.8261, | |
| 0.6676 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 82, | |
| "steps": 39, | |
| "score": 0.6307, | |
| "total_reward": 17.661, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.581, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9233, | |
| 0.8889, | |
| 0.3667, | |
| 0.9122, | |
| 0.2978, | |
| 0.3586, | |
| 0.9033, | |
| 0.3289, | |
| 0.9294, | |
| 0.89, | |
| 0.8556, | |
| 0.3663, | |
| 0.3311, | |
| 0.3289, | |
| 0.8767, | |
| 0.8994, | |
| 0.3222, | |
| 0.32, | |
| 0.3528, | |
| 0.3683, | |
| 0.8411, | |
| 0.8567, | |
| 0.02, | |
| 0.815, | |
| 0.8478, | |
| 0.7211 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 83, | |
| "steps": 33, | |
| "score": 0.6648, | |
| "total_reward": 15.9564, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.25, | |
| "trust_calibration": 0.917, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 3, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.3663, | |
| 0.9211, | |
| 0.9439, | |
| 0.9144, | |
| 0.3322, | |
| 0.9078, | |
| 0.9306, | |
| 0.8961, | |
| 0.8789, | |
| 0.8967, | |
| 0.8944, | |
| 0.86, | |
| 0.8356, | |
| 0.8811, | |
| 0.9184, | |
| 0.3474, | |
| 0.3552, | |
| 0.073, | |
| 0.0708, | |
| 0.8633, | |
| 0.9039, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 84, | |
| "steps": 39, | |
| "score": 0.7325, | |
| "total_reward": 19.0457, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.851, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.9233, | |
| 0.02, | |
| 0.3067, | |
| 0.9122, | |
| 0.8578, | |
| 0.3256, | |
| 0.8511, | |
| 0.8989, | |
| 0.8444, | |
| 0.8922, | |
| 0.8578, | |
| 0.8833, | |
| 0.8811, | |
| 0.8767, | |
| 0.8744, | |
| 0.3252, | |
| 0.8678, | |
| 0.8656, | |
| 0.3363, | |
| 0.8589, | |
| 0.8961, | |
| 0.85, | |
| 0.2978, | |
| 0.8586 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 85, | |
| "steps": 32, | |
| "score": 0.661, | |
| "total_reward": 15.2038, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.807, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9483, | |
| 0.9211, | |
| 0.9189, | |
| 0.9144, | |
| 0.9122, | |
| 0.33, | |
| 0.3356, | |
| 0.3533, | |
| 0.3211, | |
| 0.8967, | |
| 0.86, | |
| 0.8533, | |
| 0.8811, | |
| 0.3289, | |
| 0.8767, | |
| 0.8744, | |
| 0.8722, | |
| 0.8856, | |
| 0.0663, | |
| 0.8861, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 86, | |
| "steps": 44, | |
| "score": 0.6056, | |
| "total_reward": 19.3805, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.837, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.8933, | |
| 0.3489, | |
| 0.8844, | |
| 0.9372, | |
| 0.9078, | |
| 0.3556, | |
| 0.8689, | |
| 0.8967, | |
| 0.3222, | |
| 0.89, | |
| 0.3078, | |
| 0.8856, | |
| 0.9083, | |
| 0.3311, | |
| 0.2967, | |
| 0.3274, | |
| 0.88, | |
| 0.8678, | |
| 0.2926, | |
| 0.3133, | |
| 0.8861, | |
| 0.0619, | |
| 0.8544, | |
| 0.3022, | |
| 0.8678, | |
| 0.8456, | |
| 0.8061, | |
| 0.3119, | |
| 0.02, | |
| 0.5089 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 87, | |
| "steps": 37, | |
| "score": 0.6574, | |
| "total_reward": 16.4362, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.807, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8933, | |
| 0.9211, | |
| 0.9439, | |
| 0.8644, | |
| 0.86, | |
| 0.9078, | |
| 0.8733, | |
| 0.9011, | |
| 0.8667, | |
| 0.8922, | |
| 0.8556, | |
| 0.8833, | |
| 0.3311, | |
| 0.8789, | |
| 0.8767, | |
| 0.3422, | |
| 0.8878, | |
| 0.2856, | |
| 0.8261, | |
| 0.0619, | |
| 0.8994, | |
| 0.02, | |
| 0.303, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 88, | |
| "steps": 36, | |
| "score": 0.5911, | |
| "total_reward": 15.369, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.855, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.02, | |
| 0.8733, | |
| 0.9461, | |
| 0.8667, | |
| 0.9144, | |
| 0.905, | |
| 0.9078, | |
| 0.9056, | |
| 0.3233, | |
| 0.8989, | |
| 0.8967, | |
| 0.8922, | |
| 0.3178, | |
| 0.02, | |
| 0.02, | |
| 0.0841, | |
| 0.8967, | |
| 0.0774, | |
| 0.0752, | |
| 0.8678, | |
| 0.8633, | |
| 0.8789, | |
| 0.8194, | |
| 0.3022, | |
| 0.4961 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 89, | |
| "steps": 34, | |
| "score": 0.7869, | |
| "total_reward": 20.4589, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.809, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8956, | |
| 0.9233, | |
| 0.3481, | |
| 0.9439, | |
| 0.3367, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.9239, | |
| 0.8644, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.925, | |
| 0.3441, | |
| 0.8967, | |
| 0.3474, | |
| 0.87, | |
| 0.8928, | |
| 0.8283, | |
| 0.8789, | |
| 0.8766 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 90, | |
| "steps": 31, | |
| "score": 0.6723, | |
| "total_reward": 14.7899, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.902, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9206, | |
| 0.8889, | |
| 0.3367, | |
| 0.8822, | |
| 0.8578, | |
| 0.9056, | |
| 0.9033, | |
| 0.9089, | |
| 0.02, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.8856, | |
| 0.8833, | |
| 0.9061, | |
| 0.8767, | |
| 0.0774, | |
| 0.02, | |
| 0.9095, | |
| 0.8633, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 91, | |
| "steps": 44, | |
| "score": 0.6543, | |
| "total_reward": 18.9747, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.289, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3111, | |
| 0.9189, | |
| 0.3667, | |
| 0.88, | |
| 0.3278, | |
| 0.9056, | |
| 0.8711, | |
| 0.8667, | |
| 0.3222, | |
| 0.34, | |
| 0.9128, | |
| 0.3133, | |
| 0.3311, | |
| 0.3367, | |
| 0.84, | |
| 0.8156, | |
| 0.8633, | |
| 0.8239, | |
| 0.2767, | |
| 0.8544, | |
| 0.85, | |
| 0.8728, | |
| 0.8883, | |
| 0.8039, | |
| 0.2744, | |
| 0.672 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 92, | |
| "steps": 39, | |
| "score": 0.6758, | |
| "total_reward": 16.8941, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.804, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 1, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3689, | |
| 0.3367, | |
| 0.9144, | |
| 0.3322, | |
| 0.8578, | |
| 0.9033, | |
| 0.8667, | |
| 0.3122, | |
| 0.2856, | |
| 0.8489, | |
| 0.2967, | |
| 0.8994, | |
| 0.8378, | |
| 0.8633, | |
| 0.8239, | |
| 0.8544, | |
| 0.8522, | |
| 0.85, | |
| 0.8106, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 93, | |
| "steps": 38, | |
| "score": 0.7063, | |
| "total_reward": 17.6574, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.542, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.8756, | |
| 0.8711, | |
| 0.3389, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.9178, | |
| 0.9033, | |
| 0.8689, | |
| 0.8644, | |
| 0.3422, | |
| 0.8878, | |
| 0.8833, | |
| 0.8289, | |
| 0.8767, | |
| 0.8372, | |
| 0.02, | |
| 0.8306, | |
| 0.8789, | |
| 0.8567, | |
| 0.3274, | |
| 0.3352, | |
| 0.27, | |
| 0.7487 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 94, | |
| "steps": 35, | |
| "score": 0.6388, | |
| "total_reward": 15.969, | |
| "completion_rate": 0.75, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.941, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9233, | |
| 0.9139, | |
| 0.02, | |
| 0.9144, | |
| 0.02, | |
| 0.935, | |
| 0.9078, | |
| 0.3256, | |
| 0.8711, | |
| 0.3489, | |
| 0.3237, | |
| 0.8422, | |
| 0.8878, | |
| 0.8333, | |
| 0.8789, | |
| 0.3044, | |
| 0.3222, | |
| 0.073, | |
| 0.2878, | |
| 0.8633, | |
| 0.9006, | |
| 0.8567, | |
| 0.6701 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 95, | |
| "steps": 35, | |
| "score": 0.6469, | |
| "total_reward": 17.465, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.834, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8933, | |
| 0.8689, | |
| 0.3667, | |
| 0.3644, | |
| 0.307, | |
| 0.3578, | |
| 0.9056, | |
| 0.9283, | |
| 0.9011, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.89, | |
| 0.8856, | |
| 0.9161, | |
| 0.2989, | |
| 0.3267, | |
| 0.3474, | |
| 0.0752, | |
| 0.073, | |
| 0.8306, | |
| 0.8261, | |
| 0.3089, | |
| 0.8194, | |
| 0.5106 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 96, | |
| "steps": 36, | |
| "score": 0.7393, | |
| "total_reward": 17.7436, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.619, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.8733, | |
| 0.3167, | |
| 0.92, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.3189, | |
| 0.8622, | |
| 0.02, | |
| 0.8878, | |
| 0.8933, | |
| 0.8811, | |
| 0.9039, | |
| 0.8767, | |
| 0.8744, | |
| 0.8722, | |
| 0.87, | |
| 0.8306, | |
| 0.02, | |
| 0.3289, | |
| 0.8961, | |
| 0.7897 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 97, | |
| "steps": 36, | |
| "score": 0.7834, | |
| "total_reward": 18.8027, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.813, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.8711, | |
| 0.9439, | |
| 0.8844, | |
| 0.86, | |
| 0.9078, | |
| 0.9306, | |
| 0.8711, | |
| 0.8989, | |
| 0.8967, | |
| 0.3214, | |
| 0.84, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3244, | |
| 0.835, | |
| 0.2878, | |
| 0.3156, | |
| 0.8611, | |
| 0.8589, | |
| 0.8544, | |
| 0.8557 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 98, | |
| "steps": 37, | |
| "score": 0.7572, | |
| "total_reward": 18.9302, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.605, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.02, | |
| 0.8889, | |
| 0.8967, | |
| 0.8622, | |
| 0.9078, | |
| 0.3256, | |
| 0.8711, | |
| 0.8989, | |
| 0.8944, | |
| 0.3422, | |
| 0.89, | |
| 0.3308, | |
| 0.8333, | |
| 0.3341, | |
| 0.8767, | |
| 0.9172, | |
| 0.8678, | |
| 0.8656, | |
| 0.8811, | |
| 0.8839, | |
| 0.8194, | |
| 0.8014 | |
| ] | |
| }, | |
| { | |
| "policy": "random", | |
| "task_type": "task3", | |
| "seed": 99, | |
| "steps": 37, | |
| "score": 0.6531, | |
| "total_reward": 18.9394, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.623, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9483, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.333, | |
| 0.9078, | |
| 0.3011, | |
| 0.9239, | |
| 0.3144, | |
| 0.89, | |
| 0.8556, | |
| 0.3033, | |
| 0.8811, | |
| 0.8789, | |
| 0.9017, | |
| 0.0774, | |
| 0.835, | |
| 0.0708, | |
| 0.8656, | |
| 0.02, | |
| 0.9039, | |
| 0.3297, | |
| 0.8172, | |
| 0.3, | |
| 0.4885 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 43, | |
| "score": 0.7114, | |
| "total_reward": 18.4969, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.729, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.3344, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.3267, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.3, | |
| 0.2956, | |
| 0.8611, | |
| 0.2867, | |
| 0.8522, | |
| 0.8478, | |
| 0.8433, | |
| 0.8389, | |
| 0.7841 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 29, | |
| "score": 0.7083, | |
| "total_reward": 17.707, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.721, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6632 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 29, | |
| "score": 0.6919, | |
| "total_reward": 17.2983, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.561, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.3456, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6065 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8008, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.843, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8675 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 29, | |
| "score": 0.7165, | |
| "total_reward": 17.9128, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.721, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.3414, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.664 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 46, | |
| "score": 0.7558, | |
| "total_reward": 18.1385, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.832, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.2711, | |
| 0.8367, | |
| 0.8322, | |
| 0.8229 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 27, | |
| "score": 0.6991, | |
| "total_reward": 16.778, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4, | |
| "trust_calibration": 0.725, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6387 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 42, | |
| "score": 0.7756, | |
| "total_reward": 19.3902, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.3919, | |
| 0.3997, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8478 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 44, | |
| "score": 0.809, | |
| "total_reward": 19.4157, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.853, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3526, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8654 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 40, | |
| "score": 0.782, | |
| "total_reward": 19.5499, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.837, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3841, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8528 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 31, | |
| "score": 0.712, | |
| "total_reward": 17.8008, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.448, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.3281, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.9295, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.3356, | |
| 0.6281 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 40, | |
| "score": 0.7732, | |
| "total_reward": 18.5566, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8349 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 39, | |
| "score": 0.833, | |
| "total_reward": 18.3252, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.811, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3456, | |
| 0.3433, | |
| 0.9189, | |
| 0.9144, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8656, | |
| 0.8611, | |
| 0.8567, | |
| 0.8522, | |
| 0.8478, | |
| 0.8485 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 29, | |
| "score": 0.6889, | |
| "total_reward": 17.9127, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.609, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.3841, | |
| 0.3689, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6353 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 30, | |
| "score": 0.6847, | |
| "total_reward": 18.4869, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.635, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3608, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.0841, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.6404 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 46, | |
| "score": 0.8048, | |
| "total_reward": 19.316, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.842, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8605 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 26, | |
| "score": 0.6967, | |
| "total_reward": 16.7213, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.701, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.6149 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 20, | |
| "score": 0.6193, | |
| "total_reward": 13.0053, | |
| "completion_rate": 0.65, | |
| "detection_rate": 0.0, | |
| "trust_calibration": 0.576, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 5, | |
| "status": "failed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3437, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.01 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 46, | |
| "score": 0.7498, | |
| "total_reward": 19.4938, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.3841, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8412 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 42, | |
| "score": 0.8546, | |
| "total_reward": 18.8009, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8676 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 36, | |
| "score": 0.7334, | |
| "total_reward": 21.2675, | |
| "completion_rate": 1.0, | |
| "detection_rate": 0.8, | |
| "trust_calibration": 0.747, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3778, | |
| 0.9256, | |
| 0.9233, | |
| 0.3481, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.3326, | |
| 0.9033, | |
| 0.9011, | |
| 0.3259, | |
| 0.8967, | |
| 0.3214, | |
| 0.8922, | |
| 0.89, | |
| 0.3148, | |
| 0.8856, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.9073, | |
| 0.8789, | |
| 0.8544, | |
| 0.7968 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 28, | |
| "score": 0.6922, | |
| "total_reward": 17.3057, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.645, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.3433, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6298 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 46, | |
| "score": 0.7725, | |
| "total_reward": 20.0838, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.836, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.9483, | |
| 0.3711, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.2711, | |
| 0.8367, | |
| 0.8322, | |
| 0.8591 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 34, | |
| "score": 0.6755, | |
| "total_reward": 18.9148, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.71, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.3322, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.3189, | |
| 0.3167, | |
| 0.8944, | |
| 0.3122, | |
| 0.31, | |
| 0.3078, | |
| 0.8856, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.3078, | |
| 0.3033, | |
| 0.8789, | |
| 0.782 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 34, | |
| "score": 0.6561, | |
| "total_reward": 19.0282, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.6667, | |
| "trust_calibration": 0.467, | |
| "adversarial_detections": 2, | |
| "adversarial_poisonings": 1, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.8989, | |
| 0.3667, | |
| 0.3644, | |
| 0.9372, | |
| 0.935, | |
| 0.3578, | |
| 0.3556, | |
| 0.3533, | |
| 0.3289, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.317, | |
| 0.8878, | |
| 0.3126, | |
| 0.8833, | |
| 0.0841, | |
| 0.9184, | |
| 0.9139, | |
| 0.8878, | |
| 0.8656, | |
| 0.3163, | |
| 0.3419, | |
| 0.6213 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 46, | |
| "score": 0.7256, | |
| "total_reward": 20.3155, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.828, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.8395 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 28, | |
| "score": 0.7355, | |
| "total_reward": 17.6509, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.725, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6841 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 28, | |
| "score": 0.7258, | |
| "total_reward": 18.144, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.3281, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6801 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 30, | |
| "steps": 39, | |
| "score": 0.6238, | |
| "total_reward": 18.7149, | |
| "completion_rate": 0.75, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.712, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3411, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.3322, | |
| 0.33, | |
| 0.9078, | |
| 0.3256, | |
| 0.3233, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.3144, | |
| 0.8922, | |
| 0.31, | |
| 0.3078, | |
| 0.3233, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3056, | |
| 0.3011, | |
| 0.2967, | |
| 0.8722, | |
| 0.2878, | |
| 0.7538 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 31, | |
| "steps": 37, | |
| "score": 0.6915, | |
| "total_reward": 20.0529, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.911, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.3322, | |
| 0.91, | |
| 0.3278, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.3697, | |
| 0.3774, | |
| 0.3772, | |
| 0.89, | |
| 0.3728, | |
| 0.3706, | |
| 0.9228, | |
| 0.8944, | |
| 0.87, | |
| 0.8656, | |
| 0.2911, | |
| 0.8567, | |
| 0.8522, | |
| 0.843 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 32, | |
| "steps": 30, | |
| "score": 0.689, | |
| "total_reward": 17.9142, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.6, | |
| "trust_calibration": 0.712, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.3326, | |
| 0.3303, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.8922, | |
| 0.87, | |
| 0.3208, | |
| 0.6714 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 33, | |
| "steps": 31, | |
| "score": 0.7464, | |
| "total_reward": 18.6604, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.6667, | |
| "trust_calibration": 0.845, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3437, | |
| 0.3414, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.8856, | |
| 0.7574 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 34, | |
| "steps": 40, | |
| "score": 0.8105, | |
| "total_reward": 18.6408, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.838, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3433, | |
| 0.3411, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.853 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 35, | |
| "steps": 28, | |
| "score": 0.7257, | |
| "total_reward": 18.1436, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.708, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3437, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6797 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 36, | |
| "steps": 42, | |
| "score": 0.7886, | |
| "total_reward": 18.9271, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.854, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.3433, | |
| 0.3481, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8527 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 37, | |
| "steps": 28, | |
| "score": 0.6822, | |
| "total_reward": 17.056, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4, | |
| "trust_calibration": 0.659, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.62 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 38, | |
| "steps": 43, | |
| "score": 0.8297, | |
| "total_reward": 19.0823, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.774, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9233, | |
| 0.9189, | |
| 0.9144, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.3, | |
| 0.8656, | |
| 0.8611, | |
| 0.8567, | |
| 0.8522, | |
| 0.8478, | |
| 0.8433, | |
| 0.8389, | |
| 0.8479 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 39, | |
| "steps": 28, | |
| "score": 0.7161, | |
| "total_reward": 17.9036, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.71, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6637 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 40, | |
| "steps": 37, | |
| "score": 0.6368, | |
| "total_reward": 19.7412, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.678, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.9122, | |
| 0.33, | |
| 0.9078, | |
| 0.9056, | |
| 0.3233, | |
| 0.9011, | |
| 0.3189, | |
| 0.3167, | |
| 0.8944, | |
| 0.3122, | |
| 0.89, | |
| 0.3078, | |
| 0.8856, | |
| 0.3033, | |
| 0.8811, | |
| 0.8967, | |
| 0.3122, | |
| 0.3078, | |
| 0.8833, | |
| 0.2989, | |
| 0.2944, | |
| 0.87, | |
| 0.7673 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 41, | |
| "steps": 38, | |
| "score": 0.6416, | |
| "total_reward": 20.5321, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.732, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3411, | |
| 0.9189, | |
| 0.3367, | |
| 0.9144, | |
| 0.9122, | |
| 0.33, | |
| 0.9078, | |
| 0.3256, | |
| 0.3233, | |
| 0.3211, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.3122, | |
| 0.89, | |
| 0.3078, | |
| 0.3056, | |
| 0.8833, | |
| 0.3189, | |
| 0.8944, | |
| 0.31, | |
| 0.3056, | |
| 0.8811, | |
| 0.8589, | |
| 0.3097, | |
| 0.8722, | |
| 0.323, | |
| 0.7961 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 42, | |
| "steps": 31, | |
| "score": 0.6759, | |
| "total_reward": 17.5723, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.442, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.3414, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.3056, | |
| 0.5724 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 43, | |
| "steps": 32, | |
| "score": 0.6896, | |
| "total_reward": 17.9304, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.681, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3456, | |
| 0.9233, | |
| 0.9211, | |
| 0.3719, | |
| 0.3797, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.3356, | |
| 0.6467 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 44, | |
| "steps": 38, | |
| "score": 0.6652, | |
| "total_reward": 19.2895, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.721, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.8967, | |
| 0.9394, | |
| 0.9372, | |
| 0.36, | |
| 0.3578, | |
| 0.3556, | |
| 0.9283, | |
| 0.3511, | |
| 0.3489, | |
| 0.3167, | |
| 0.8944, | |
| 0.8922, | |
| 0.343, | |
| 0.3508, | |
| 0.9011, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.7757 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 45, | |
| "steps": 40, | |
| "score": 0.8827, | |
| "total_reward": 18.5376, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.839, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8709 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 46, | |
| "steps": 40, | |
| "score": 0.6245, | |
| "total_reward": 18.7359, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.429, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9506, | |
| 0.9483, | |
| 0.9461, | |
| 0.3689, | |
| 0.3667, | |
| 0.3644, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3763, | |
| 0.3841, | |
| 0.3839, | |
| 0.3817, | |
| 0.8944, | |
| 0.3192, | |
| 0.3178, | |
| 0.8856, | |
| 0.9228, | |
| 0.9184, | |
| 0.3422, | |
| 0.3728, | |
| 0.3683, | |
| 0.9006, | |
| 0.8994, | |
| 0.85, | |
| 0.3156, | |
| 0.6634 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 47, | |
| "steps": 29, | |
| "score": 0.7165, | |
| "total_reward": 17.9132, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.722, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6644 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 48, | |
| "steps": 28, | |
| "score": 0.7257, | |
| "total_reward": 18.1437, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.708, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6798 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 49, | |
| "steps": 28, | |
| "score": 0.7355, | |
| "total_reward": 17.6512, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.726, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.9273, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6844 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 50, | |
| "steps": 42, | |
| "score": 0.717, | |
| "total_reward": 18.643, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.82, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3919, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8092 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 51, | |
| "steps": 34, | |
| "score": 0.7274, | |
| "total_reward": 19.6391, | |
| "completion_rate": 0.95, | |
| "detection_rate": 0.625, | |
| "trust_calibration": 0.682, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.3414, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.9073, | |
| 0.3311, | |
| 0.715 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 52, | |
| "steps": 46, | |
| "score": 0.5965, | |
| "total_reward": 19.0884, | |
| "completion_rate": 0.7, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.686, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.9256, | |
| 0.3433, | |
| 0.9211, | |
| 0.3619, | |
| 0.9144, | |
| 0.34, | |
| 0.3578, | |
| 0.3333, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.3774, | |
| 0.8922, | |
| 0.375, | |
| 0.8878, | |
| 0.3706, | |
| 0.3683, | |
| 0.3661, | |
| 0.3639, | |
| 0.2967, | |
| 0.3594, | |
| 0.3572, | |
| 0.355, | |
| 0.8656, | |
| 0.3089, | |
| 0.8722, | |
| 0.8656, | |
| 0.8589, | |
| 0.8522, | |
| 0.7165 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 53, | |
| "steps": 44, | |
| "score": 0.8286, | |
| "total_reward": 19.0588, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8632 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 54, | |
| "steps": 35, | |
| "score": 0.6607, | |
| "total_reward": 18.4987, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.75, | |
| "trust_calibration": 0.358, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3797, | |
| 0.3874, | |
| 0.3952, | |
| 0.36, | |
| 0.9078, | |
| 0.3326, | |
| 0.3233, | |
| 0.9011, | |
| 0.8989, | |
| 0.3237, | |
| 0.8944, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.9073, | |
| 0.3661, | |
| 0.8984, | |
| 0.6167 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 55, | |
| "steps": 29, | |
| "score": 0.728, | |
| "total_reward": 18.1995, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.713, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.3756, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6787 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 56, | |
| "steps": 37, | |
| "score": 0.6768, | |
| "total_reward": 20.9793, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.796, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.9506, | |
| 0.9483, | |
| 0.9461, | |
| 0.3689, | |
| 0.3667, | |
| 0.3644, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.3256, | |
| 0.3233, | |
| 0.9011, | |
| 0.3189, | |
| 0.8967, | |
| 0.3144, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.8856, | |
| 0.3033, | |
| 0.8989, | |
| 0.2967, | |
| 0.8922, | |
| 0.8878, | |
| 0.2856, | |
| 0.8811, | |
| 0.2967, | |
| 0.8722, | |
| 0.8317 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 57, | |
| "steps": 38, | |
| "score": 0.6683, | |
| "total_reward": 20.0487, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.771, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3389, | |
| 0.9167, | |
| 0.3344, | |
| 0.3322, | |
| 0.33, | |
| 0.9078, | |
| 0.3256, | |
| 0.9033, | |
| 0.3211, | |
| 0.8989, | |
| 0.3167, | |
| 0.3144, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.3486, | |
| 0.3563, | |
| 0.3819, | |
| 0.9161, | |
| 0.34, | |
| 0.8833, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8058 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 58, | |
| "steps": 29, | |
| "score": 0.7278, | |
| "total_reward": 18.1944, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5556, | |
| "trust_calibration": 0.731, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3481, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6999 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 59, | |
| "steps": 43, | |
| "score": 0.7547, | |
| "total_reward": 19.6215, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.735, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3526, | |
| 0.9233, | |
| 0.9211, | |
| 0.3719, | |
| 0.3797, | |
| 0.3644, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.3, | |
| 0.8656, | |
| 0.2911, | |
| 0.8567, | |
| 0.8522, | |
| 0.8478, | |
| 0.8433, | |
| 0.8389, | |
| 0.8208 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 60, | |
| "steps": 36, | |
| "score": 0.7716, | |
| "total_reward": 18.5184, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.836, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3411, | |
| 0.9189, | |
| 0.9167, | |
| 0.3344, | |
| 0.3322, | |
| 0.33, | |
| 0.3278, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8439 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 61, | |
| "steps": 42, | |
| "score": 0.7616, | |
| "total_reward": 19.8021, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.836, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.2711, | |
| 0.8483 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 62, | |
| "steps": 43, | |
| "score": 0.7892, | |
| "total_reward": 18.9402, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.925, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3548, | |
| 0.9506, | |
| 0.3733, | |
| 0.9189, | |
| 0.9144, | |
| 0.91, | |
| 0.9056, | |
| 0.9011, | |
| 0.8967, | |
| 0.3222, | |
| 0.3178, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8656, | |
| 0.8611, | |
| 0.8567, | |
| 0.8522, | |
| 0.8478, | |
| 0.8433, | |
| 0.8389, | |
| 0.8682 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 63, | |
| "steps": 44, | |
| "score": 0.8031, | |
| "total_reward": 18.4705, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.841, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.2711, | |
| 0.8367, | |
| 0.8449 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 64, | |
| "steps": 37, | |
| "score": 0.7634, | |
| "total_reward": 19.0848, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.76, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3841, | |
| 0.9189, | |
| 0.3997, | |
| 0.3994, | |
| 0.3972, | |
| 0.33, | |
| 0.9328, | |
| 0.3556, | |
| 0.9011, | |
| 0.8967, | |
| 0.8922, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8656, | |
| 0.8611, | |
| 0.8567, | |
| 0.8522, | |
| 0.8227 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 65, | |
| "steps": 40, | |
| "score": 0.797, | |
| "total_reward": 19.1287, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.838, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.3919, | |
| 0.3997, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.853 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 66, | |
| "steps": 29, | |
| "score": 0.7165, | |
| "total_reward": 17.9135, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.723, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.337, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6647 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 67, | |
| "steps": 32, | |
| "score": 0.6707, | |
| "total_reward": 18.1095, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.44, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3503, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.3333, | |
| 0.5864 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 68, | |
| "steps": 42, | |
| "score": 0.7575, | |
| "total_reward": 19.6962, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.641, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.3741, | |
| 0.9189, | |
| 0.3897, | |
| 0.3974, | |
| 0.3622, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.7994 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 69, | |
| "steps": 30, | |
| "score": 0.6917, | |
| "total_reward": 17.9836, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.658, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9506, | |
| 0.9483, | |
| 0.9461, | |
| 0.3689, | |
| 0.3667, | |
| 0.3644, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.6454 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 70, | |
| "steps": 27, | |
| "score": 0.7, | |
| "total_reward": 19.5996, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.716, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3919, | |
| 0.9167, | |
| 0.3994, | |
| 0.3972, | |
| 0.395, | |
| 0.3928, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.3144, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.8856, | |
| 0.3033, | |
| 0.8811, | |
| 0.8789, | |
| 0.2967, | |
| 0.8744, | |
| 0.2922, | |
| 0.8166 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 71, | |
| "steps": 29, | |
| "score": 0.6808, | |
| "total_reward": 17.7021, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.375, | |
| "trust_calibration": 0.687, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.3841, | |
| 0.3919, | |
| 0.3997, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6174 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 72, | |
| "steps": 34, | |
| "score": 0.6937, | |
| "total_reward": 19.4234, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5714, | |
| "trust_calibration": 0.723, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 3, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3763, | |
| 0.9211, | |
| 0.3919, | |
| 0.3997, | |
| 0.3644, | |
| 0.9122, | |
| 0.91, | |
| 0.3348, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.8833, | |
| 0.2811, | |
| 0.6916 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 73, | |
| "steps": 26, | |
| "score": 0.6967, | |
| "total_reward": 16.7204, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.697, | |
| "adversarial_detections": 3, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.614 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 74, | |
| "steps": 28, | |
| "score": 0.675, | |
| "total_reward": 18.8991, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.806, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.3414, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.3256, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.31, | |
| 0.8878, | |
| 0.3056, | |
| 0.8833, | |
| 0.8811, | |
| 0.2989, | |
| 0.2967, | |
| 0.8744, | |
| 0.2922, | |
| 0.8193 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 75, | |
| "steps": 40, | |
| "score": 0.7977, | |
| "total_reward": 19.1445, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.837, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8529 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 76, | |
| "steps": 42, | |
| "score": 0.8335, | |
| "total_reward": 19.1712, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.842, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8671 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 77, | |
| "steps": 29, | |
| "score": 0.6992, | |
| "total_reward": 18.1794, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.645, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.3259, | |
| 0.8967, | |
| 0.8944, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6451 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 78, | |
| "steps": 28, | |
| "score": 0.7161, | |
| "total_reward": 17.9032, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.709, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3503, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6633 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 79, | |
| "steps": 44, | |
| "score": 0.7563, | |
| "total_reward": 19.6626, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.829, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.2844, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8421 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 80, | |
| "steps": 46, | |
| "score": 0.7065, | |
| "total_reward": 16.9564, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.803, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.3111, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.8411, | |
| 0.8367, | |
| 0.8322, | |
| 0.7808 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 81, | |
| "steps": 44, | |
| "score": 0.8031, | |
| "total_reward": 18.4703, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.84, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.2711, | |
| 0.8367, | |
| 0.8447 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 82, | |
| "steps": 44, | |
| "score": 0.8286, | |
| "total_reward": 19.0588, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.844, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8456, | |
| 0.8411, | |
| 0.8367, | |
| 0.8632 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 83, | |
| "steps": 33, | |
| "score": 0.7082, | |
| "total_reward": 18.4133, | |
| "completion_rate": 0.8, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.826, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3456, | |
| 0.9483, | |
| 0.9461, | |
| 0.8989, | |
| 0.3667, | |
| 0.3644, | |
| 0.9372, | |
| 0.935, | |
| 0.8878, | |
| 0.9306, | |
| 0.3533, | |
| 0.3511, | |
| 0.3489, | |
| 0.3467, | |
| 0.9194, | |
| 0.3422, | |
| 0.34, | |
| 0.9033, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8656, | |
| 0.8611, | |
| 0.8133 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 84, | |
| "steps": 29, | |
| "score": 0.71, | |
| "total_reward": 18.4609, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.716, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.3686, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.3214, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6795 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 85, | |
| "steps": 29, | |
| "score": 0.6923, | |
| "total_reward": 17.999, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.718, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6627 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 86, | |
| "steps": 33, | |
| "score": 0.6856, | |
| "total_reward": 18.5122, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.605, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.3663, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.9095, | |
| 0.3333, | |
| 0.6263 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 87, | |
| "steps": 27, | |
| "score": 0.6991, | |
| "total_reward": 16.7786, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.4, | |
| "trust_calibration": 0.727, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 6, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.1019, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.6393 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 88, | |
| "steps": 42, | |
| "score": 0.7762, | |
| "total_reward": 19.4062, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.835, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.28, | |
| 0.8456, | |
| 0.8411, | |
| 0.8479 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 89, | |
| "steps": 29, | |
| "score": 0.7259, | |
| "total_reward": 18.1473, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.696, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.3503, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6745 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 90, | |
| "steps": 28, | |
| "score": 0.7161, | |
| "total_reward": 17.9035, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.71, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.3392, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.9228, | |
| 0.9184, | |
| 0.9139, | |
| 0.6635 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 91, | |
| "steps": 29, | |
| "score": 0.7262, | |
| "total_reward": 18.1541, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.723, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3459, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.3259, | |
| 0.8967, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6813 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 92, | |
| "steps": 38, | |
| "score": 0.7692, | |
| "total_reward": 18.4611, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.842, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3389, | |
| 0.3367, | |
| 0.3344, | |
| 0.3322, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8411 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 93, | |
| "steps": 45, | |
| "score": 0.6795, | |
| "total_reward": 19.7049, | |
| "completion_rate": 0.85, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.749, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3478, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.3897, | |
| 0.9144, | |
| 0.3972, | |
| 0.395, | |
| 0.9078, | |
| 0.3906, | |
| 0.3883, | |
| 0.3861, | |
| 0.8967, | |
| 0.3222, | |
| 0.8878, | |
| 0.8833, | |
| 0.8789, | |
| 0.8744, | |
| 0.3, | |
| 0.2956, | |
| 0.8789, | |
| 0.8722, | |
| 0.8478, | |
| 0.8433, | |
| 0.8389, | |
| 0.2644, | |
| 0.7847 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 94, | |
| "steps": 32, | |
| "score": 0.6725, | |
| "total_reward": 18.1568, | |
| "completion_rate": 0.8, | |
| "detection_rate": 0.3333, | |
| "trust_calibration": 0.85, | |
| "adversarial_detections": 1, | |
| "adversarial_poisonings": 2, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.8989, | |
| 0.3667, | |
| 0.3644, | |
| 0.9372, | |
| 0.935, | |
| 0.3578, | |
| 0.8856, | |
| 0.9283, | |
| 0.3511, | |
| 0.3489, | |
| 0.3467, | |
| 0.3444, | |
| 0.8922, | |
| 0.89, | |
| 0.8878, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.8922, | |
| 0.8678, | |
| 0.8633, | |
| 0.6213 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 95, | |
| "steps": 40, | |
| "score": 0.7977, | |
| "total_reward": 19.1447, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.838, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.3733, | |
| 0.9461, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.2756, | |
| 0.853 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 96, | |
| "steps": 31, | |
| "score": 0.6978, | |
| "total_reward": 17.4444, | |
| "completion_rate": 0.85, | |
| "detection_rate": 0.5, | |
| "trust_calibration": 0.726, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 4, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9189, | |
| 0.9167, | |
| 0.3344, | |
| 0.3322, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.9033, | |
| 0.9011, | |
| 0.8989, | |
| 0.8967, | |
| 0.8944, | |
| 0.8922, | |
| 0.093, | |
| 0.0908, | |
| 0.0886, | |
| 0.0863, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.9073, | |
| 0.66 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 97, | |
| "steps": 38, | |
| "score": 0.8207, | |
| "total_reward": 18.8758, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.829, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9528, | |
| 0.9506, | |
| 0.9483, | |
| 0.3711, | |
| 0.3689, | |
| 0.3667, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8552 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 98, | |
| "steps": 29, | |
| "score": 0.7165, | |
| "total_reward": 17.9132, | |
| "completion_rate": 0.9, | |
| "detection_rate": 0.4444, | |
| "trust_calibration": 0.722, | |
| "adversarial_detections": 4, | |
| "adversarial_poisonings": 5, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.9189, | |
| 0.9167, | |
| 0.9144, | |
| 0.9122, | |
| 0.91, | |
| 0.9078, | |
| 0.9056, | |
| 0.3303, | |
| 0.9011, | |
| 0.8989, | |
| 0.0997, | |
| 0.0974, | |
| 0.0952, | |
| 0.093, | |
| 0.0908, | |
| 0.925, | |
| 0.9206, | |
| 0.9161, | |
| 0.9117, | |
| 0.6644 | |
| ] | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "task_type": "task3", | |
| "seed": 99, | |
| "steps": 38, | |
| "score": 0.7037, | |
| "total_reward": 21.1121, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.723, | |
| "adversarial_detections": 0, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9278, | |
| 0.9256, | |
| 0.9233, | |
| 0.9211, | |
| 0.3389, | |
| 0.9167, | |
| 0.9144, | |
| 0.3322, | |
| 0.91, | |
| 0.3278, | |
| 0.9056, | |
| 0.9033, | |
| 0.3211, | |
| 0.8989, | |
| 0.3167, | |
| 0.8944, | |
| 0.8922, | |
| 0.31, | |
| 0.8878, | |
| 0.3056, | |
| 0.3211, | |
| 0.8967, | |
| 0.3122, | |
| 0.8878, | |
| 0.3033, | |
| 0.8789, | |
| 0.8744, | |
| 0.87, | |
| 0.8287 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 0, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 1, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 2, | |
| "steps": 42, | |
| "score": 0.8422, | |
| "total_reward": 18.5276, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8724 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 3, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 4, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1154, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 5, | |
| "steps": 46, | |
| "score": 0.7916, | |
| "total_reward": 18.9976, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.917, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8618 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 6, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8523, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 7, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 8, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3315, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8857 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 9, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 10, | |
| "steps": 42, | |
| "score": 0.8421, | |
| "total_reward": 18.5263, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.928, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.3022, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.871 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 11, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 12, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 13, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 14, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3311, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8853 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 15, | |
| "steps": 46, | |
| "score": 0.8162, | |
| "total_reward": 19.5883, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.93, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8825 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 16, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8523, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 17, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8903 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 18, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8108, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 19, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 20, | |
| "steps": 46, | |
| "score": 0.7653, | |
| "total_reward": 18.3663, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.909, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8423 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 21, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 22, | |
| "steps": 46, | |
| "score": 0.7652, | |
| "total_reward": 18.3659, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.908, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8419 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 23, | |
| "steps": 42, | |
| "score": 0.8403, | |
| "total_reward": 18.4862, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8727 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 24, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3311, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8854 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 25, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3314, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8857 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 26, | |
| "steps": 42, | |
| "score": 0.8403, | |
| "total_reward": 18.4855, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.872 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 27, | |
| "steps": 46, | |
| "score": 0.8179, | |
| "total_reward": 19.6285, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.924, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.881 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 28, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 29, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3311, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8854 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 30, | |
| "steps": 44, | |
| "score": 0.8148, | |
| "total_reward": 18.741, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.922, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.3111, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8653 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 31, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 32, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8851 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 33, | |
| "steps": 46, | |
| "score": 0.8178, | |
| "total_reward": 19.6279, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.922, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8804 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 34, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 35, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3313, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8855 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 36, | |
| "steps": 44, | |
| "score": 0.8167, | |
| "total_reward": 18.7838, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.927, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.3467, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8664 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 37, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 38, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 39, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 40, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 41, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 42, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 43, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 44, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 45, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 46, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1154, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 47, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 48, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3312, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8855 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 49, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 50, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.3022, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.885 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 51, | |
| "steps": 46, | |
| "score": 0.8178, | |
| "total_reward": 19.6279, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.922, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8804 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 52, | |
| "steps": 46, | |
| "score": 0.7899, | |
| "total_reward": 18.9582, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.926, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8641 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 53, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3723, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.93, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8848 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 54, | |
| "steps": 46, | |
| "score": 0.7916, | |
| "total_reward": 18.9976, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.918, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8619 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 55, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8851 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 56, | |
| "steps": 46, | |
| "score": 0.7899, | |
| "total_reward": 18.957, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.922, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.2889, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8629 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 57, | |
| "steps": 46, | |
| "score": 0.8179, | |
| "total_reward": 19.6284, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.924, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.881 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 58, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.885 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 59, | |
| "steps": 42, | |
| "score": 0.8403, | |
| "total_reward": 18.4861, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8726 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 60, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 61, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 62, | |
| "steps": 46, | |
| "score": 0.7933, | |
| "total_reward": 19.0389, | |
| "completion_rate": 0.95, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.916, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8615 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 63, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 64, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8851 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 65, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 66, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 67, | |
| "steps": 46, | |
| "score": 0.8179, | |
| "total_reward": 19.6293, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.927, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8818 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 68, | |
| "steps": 46, | |
| "score": 0.8162, | |
| "total_reward": 19.5881, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.929, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8823 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 69, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 70, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3728, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8854 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 71, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3313, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8856 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 72, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3724, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8849 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 73, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 74, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3724, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.3111, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8849 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 75, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 76, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 77, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 78, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8903 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 79, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 80, | |
| "steps": 46, | |
| "score": 0.7652, | |
| "total_reward": 18.3653, | |
| "completion_rate": 0.9, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.905, | |
| "adversarial_detections": 5, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.3111, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.2933, | |
| 0.8589, | |
| 0.8544, | |
| 0.85, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8412 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 81, | |
| "steps": 42, | |
| "score": 0.867, | |
| "total_reward": 19.0739, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.935, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8904 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 82, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.32, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.885 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 83, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8524, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 84, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3315, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8857 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 85, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3313, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.933, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.3422, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.3244, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8856 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 86, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8523, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 87, | |
| "steps": 40, | |
| "score": 0.8977, | |
| "total_reward": 18.8523, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.9095, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8938 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 88, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.885 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 89, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 90, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 91, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3721, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.93, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8846 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 92, | |
| "steps": 44, | |
| "score": 0.8423, | |
| "total_reward": 19.3725, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.931, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.3333, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.885 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 93, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.3556, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 94, | |
| "steps": 46, | |
| "score": 0.8161, | |
| "total_reward": 19.5872, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.926, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.3289, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.2978, | |
| 0.8633, | |
| 0.8589, | |
| 0.8544, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8739, | |
| 0.8814 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 95, | |
| "steps": 44, | |
| "score": 0.8405, | |
| "total_reward": 19.3315, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.3467, | |
| 0.9122, | |
| 0.3378, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.8633, | |
| 0.8589, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8784, | |
| 0.8857 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 96, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.3511, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 97, | |
| "steps": 40, | |
| "score": 0.8958, | |
| "total_reward": 18.8109, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.932, | |
| "adversarial_detections": 6, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.894 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 98, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1153, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.8856, | |
| 0.8811, | |
| 0.3067, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| }, | |
| { | |
| "policy": "oracle_lite", | |
| "task_type": "task3", | |
| "seed": 99, | |
| "steps": 42, | |
| "score": 0.8689, | |
| "total_reward": 19.1154, | |
| "completion_rate": 1.0, | |
| "detection_rate": 1.0, | |
| "trust_calibration": 0.934, | |
| "adversarial_detections": 7, | |
| "adversarial_poisonings": 0, | |
| "status": "completed", | |
| "difficulty_profile": { | |
| "adaptive": false, | |
| "episodes_seen": 0, | |
| "rolling_detection_rate": 0.0, | |
| "adversarial_threshold": 0.7, | |
| "high_stakes_ratio": 0.35, | |
| "verify_budget_penalty": 0, | |
| "adversary_benign_confidence": 0.88, | |
| "adversary_poison_confidence": 0.92 | |
| }, | |
| "rewards": [ | |
| 0.9256, | |
| 0.9211, | |
| 0.9167, | |
| 0.9122, | |
| 0.9078, | |
| 0.9033, | |
| 0.8989, | |
| 0.8944, | |
| 0.89, | |
| 0.3156, | |
| 0.8811, | |
| 0.8767, | |
| 0.8722, | |
| 0.8678, | |
| 0.905, | |
| 0.9006, | |
| 0.8961, | |
| 0.8917, | |
| 0.8873, | |
| 0.8828, | |
| 0.8901 | |
| ] | |
| } | |
| ], | |
| "chart": "outputs/baseline_comparison.png" | |
| } | |