sentinel-env / outputs /evaluation_results.json
XcodeAddy's picture
Add GPU trust environment and GRPO replay pipeline
a36db1b
raw
history blame
326 kB
{
"task": "task3",
"tasks": [
"task3"
],
"episodes_per_policy": 100,
"adaptive": false,
"difficulty_controller": {
"adaptive": true,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"difficulty_controller_by_task_policy": {
"task3": {
"random": {},
"heuristic": {},
"oracle_lite": {}
}
},
"summary": {
"random": {
"episodes": 100,
"avg_score": 0.6601,
"avg_completion_rate": 0.8165,
"avg_detection_rate": 0.375,
"avg_trust_calibration": 0.7349,
"avg_steps": 36.13
},
"heuristic": {
"episodes": 100,
"avg_score": 0.7314,
"avg_completion_rate": 0.8935,
"avg_detection_rate": 0.7621,
"avg_trust_calibration": 0.74,
"avg_steps": 35.54
},
"oracle_lite": {
"episodes": 100,
"avg_score": 0.8562,
"avg_completion_rate": 0.991,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.9304,
"avg_steps": 42.62
}
},
"by_task": {
"task3": {
"random": {
"episodes": 100,
"avg_score": 0.6601,
"avg_completion_rate": 0.8165,
"avg_detection_rate": 0.375,
"avg_trust_calibration": 0.7349,
"avg_steps": 36.13
},
"heuristic": {
"episodes": 100,
"avg_score": 0.7314,
"avg_completion_rate": 0.8935,
"avg_detection_rate": 0.7621,
"avg_trust_calibration": 0.74,
"avg_steps": 35.54
},
"oracle_lite": {
"episodes": 100,
"avg_score": 0.8562,
"avg_completion_rate": 0.991,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.9304,
"avg_steps": 42.62
}
}
},
"episodes": [
{
"policy": "random",
"task_type": "task3",
"seed": 0,
"steps": 36,
"score": 0.6105,
"total_reward": 15.2622,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.884,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9233,
0.9211,
0.02,
0.9144,
0.9122,
0.9078,
0.3556,
0.02,
0.02,
0.8467,
0.8922,
0.8378,
0.8833,
0.9061,
0.3067,
0.02,
0.8722,
0.073,
0.8306,
0.9061,
0.3397,
0.3044,
0.5035
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 1,
"steps": 40,
"score": 0.7205,
"total_reward": 18.0135,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.662,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.8889,
0.8822,
0.91,
0.8556,
0.9011,
0.3167,
0.8622,
0.89,
0.8356,
0.8633,
0.3011,
0.8444,
0.8378,
0.8656,
0.8261,
0.8589,
0.8961,
0.02,
0.3,
0.8106,
0.2933,
0.7916
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 2,
"steps": 37,
"score": 0.7627,
"total_reward": 17.5411,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.752,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.3203,
0.9211,
0.9189,
0.8822,
0.91,
0.8556,
0.8511,
0.8967,
0.8422,
0.89,
0.8356,
0.8311,
0.8767,
0.2944,
0.8972,
0.87,
0.8656,
0.8789,
0.02,
0.8722,
0.8207
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 3,
"steps": 36,
"score": 0.6303,
"total_reward": 16.3887,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.798,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9233,
0.3411,
0.8667,
0.9144,
0.9372,
0.91,
0.3278,
0.3556,
0.8511,
0.8989,
0.2922,
0.8878,
0.3056,
0.8489,
0.8767,
0.8922,
0.333,
0.2878,
0.8283,
0.8589,
0.3297,
0.3552,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 4,
"steps": 41,
"score": 0.6894,
"total_reward": 18.6138,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.405,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.9233,
0.3711,
0.8844,
0.9122,
0.33,
0.3186,
0.9011,
0.8667,
0.86,
0.8878,
0.8833,
0.8789,
0.8767,
0.8994,
0.3352,
0.8678,
0.3033,
0.8239,
0.8744,
0.8678,
0.2656,
0.2933,
0.2911,
0.7076
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 5,
"steps": 31,
"score": 0.6062,
"total_reward": 15.1538,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.816,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.9211,
0.3367,
0.3344,
0.3322,
0.8578,
0.9306,
0.9033,
0.3211,
0.8467,
0.9194,
0.32,
0.8878,
0.8856,
0.8811,
0.02,
0.8394,
0.0752,
0.87,
0.8678,
0.02,
0.8883,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 6,
"steps": 39,
"score": 0.6337,
"total_reward": 15.8429,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.872,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.8889,
0.9167,
0.9144,
0.9122,
0.91,
0.9006,
0.8511,
0.3197,
0.9194,
0.28,
0.8556,
0.8811,
0.8789,
0.8422,
0.8856,
0.3311,
0.8589,
0.0597,
0.3222,
0.27,
0.8728,
0.02,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 7,
"steps": 32,
"score": 0.7179,
"total_reward": 15.793,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.869,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.8911,
0.8867,
0.9144,
0.86,
0.9056,
0.9033,
0.8989,
0.8967,
0.3444,
0.89,
0.8356,
0.9083,
0.02,
0.8789,
0.8744,
0.87,
0.8928,
0.8633,
0.3111,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 8,
"steps": 31,
"score": 0.7087,
"total_reward": 16.3004,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.701,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3733,
0.9211,
0.8844,
0.9122,
0.91,
0.3578,
0.9056,
0.9283,
0.8667,
0.8944,
0.84,
0.3456,
0.8833,
0.8811,
0.8589,
0.8767,
0.0774,
0.835,
0.8856,
0.8633,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 9,
"steps": 37,
"score": 0.7151,
"total_reward": 17.877,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.558,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9256,
0.9233,
0.8689,
0.3597,
0.8822,
0.935,
0.02,
0.3556,
0.9011,
0.8967,
0.8944,
0.84,
0.8356,
0.8489,
0.8244,
0.835,
0.3178,
0.8656,
0.8261,
0.8217,
0.3044,
0.85,
0.7724
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 10,
"steps": 38,
"score": 0.6037,
"total_reward": 17.5072,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.772,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.9461,
0.8844,
0.3622,
0.9078,
0.02,
0.3233,
0.9261,
0.8967,
0.8944,
0.86,
0.3378,
0.02,
0.3263,
0.8811,
0.3289,
0.2967,
0.8994,
0.8722,
0.8678,
0.3386,
0.3463,
0.02,
0.3089,
0.8544,
0.355,
0.7709
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 11,
"steps": 32,
"score": 0.5881,
"total_reward": 14.703,
"completion_rate": 0.7,
"detection_rate": 0.3333,
"trust_calibration": 0.743,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.9211,
0.3459,
0.8844,
0.3622,
0.9078,
0.9306,
0.02,
0.8689,
0.8967,
0.8944,
0.8722,
0.8878,
0.8856,
0.9228,
0.0819,
0.2967,
0.3244,
0.8722,
0.02,
0.3356,
0.3011,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 12,
"steps": 36,
"score": 0.6249,
"total_reward": 14.9974,
"completion_rate": 0.75,
"detection_rate": 0.3333,
"trust_calibration": 0.716,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.8933,
0.3411,
0.9189,
0.8644,
0.9122,
0.8756,
0.02,
0.3441,
0.3189,
0.3167,
0.8622,
0.8878,
0.8511,
0.8789,
0.8244,
0.8878,
0.2856,
0.0663,
0.8589,
0.8961,
0.8772,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 13,
"steps": 38,
"score": 0.7872,
"total_reward": 18.1053,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.64,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8911,
0.8667,
0.9122,
0.3278,
0.9056,
0.8689,
0.02,
0.8622,
0.8378,
0.8533,
0.8289,
0.8767,
0.8722,
0.895,
0.8856,
0.8633,
0.8611,
0.9017,
0.8939,
0.3,
0.8081
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 14,
"steps": 35,
"score": 0.6988,
"total_reward": 18.1679,
"completion_rate": 0.95,
"detection_rate": 0.0,
"trust_calibration": 0.661,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8733,
0.8889,
0.8844,
0.3322,
0.91,
0.9078,
0.9056,
0.9033,
0.8989,
0.8622,
0.89,
0.3408,
0.8856,
0.8833,
0.3011,
0.8267,
0.8372,
0.87,
0.0708,
0.8833,
0.2811,
0.8839,
0.8544,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 15,
"steps": 38,
"score": 0.68,
"total_reward": 19.0388,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.774,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.9256,
0.9233,
0.8689,
0.3667,
0.3644,
0.8922,
0.935,
0.8556,
0.3533,
0.3211,
0.3189,
0.8944,
0.89,
0.3378,
0.8333,
0.8789,
0.8767,
0.8744,
0.2922,
0.333,
0.8656,
0.8261,
0.8567,
0.3,
0.8478,
0.8066
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 16,
"steps": 33,
"score": 0.5966,
"total_reward": 15.511,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.691,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.3044,
0.02,
0.935,
0.02,
0.3256,
0.8711,
0.8967,
0.3444,
0.02,
0.89,
0.8556,
0.8833,
0.3311,
0.3289,
0.8744,
0.8878,
0.8633,
0.3141,
0.8589,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 17,
"steps": 35,
"score": 0.5891,
"total_reward": 16.4939,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.795,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.02,
0.3433,
0.3319,
0.8644,
0.3622,
0.91,
0.3286,
0.9033,
0.9011,
0.8789,
0.3697,
0.9194,
0.84,
0.8856,
0.8811,
0.8789,
0.3597,
0.2692,
0.87,
0.2878,
0.8656,
0.0663,
0.8239,
0.8817,
0.02,
0.4835
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 18,
"steps": 35,
"score": 0.6548,
"total_reward": 16.3705,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.573,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.02,
0.9167,
0.3644,
0.3622,
0.91,
0.8556,
0.9033,
0.8689,
0.8967,
0.8922,
0.915,
0.02,
0.8533,
0.8789,
0.2967,
0.3422,
0.3078,
0.8656,
0.8611,
0.2789,
0.3297,
0.7281
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 19,
"steps": 38,
"score": 0.6912,
"total_reward": 17.2799,
"completion_rate": 0.9,
"detection_rate": 0.0,
"trust_calibration": 0.834,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3156,
0.9211,
0.9189,
0.9144,
0.9122,
0.33,
0.02,
0.9133,
0.9261,
0.8967,
0.8422,
0.89,
0.8356,
0.8511,
0.8789,
0.8922,
0.87,
0.3178,
0.8811,
0.8589,
0.8544,
0.87,
0.3108,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 20,
"steps": 44,
"score": 0.6149,
"total_reward": 19.0606,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.859,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.8711,
0.8667,
0.9144,
0.9122,
0.337,
0.9078,
0.9033,
0.3281,
0.3519,
0.3467,
0.8922,
0.89,
0.3608,
0.8856,
0.8833,
0.8289,
0.2714,
0.87,
0.3508,
0.8656,
0.3089,
0.0597,
0.8172,
0.053,
0.8728,
0.8083,
0.3439,
0.2567,
0.5146
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 21,
"steps": 30,
"score": 0.7401,
"total_reward": 15.543,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.709,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.8911,
0.8867,
0.9144,
0.3322,
0.9078,
0.9056,
0.8711,
0.8467,
0.8922,
0.89,
0.02,
0.8856,
0.8811,
0.8589,
0.8744,
0.87,
0.8856,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 22,
"steps": 45,
"score": 0.5529,
"total_reward": 16.5871,
"completion_rate": 0.7,
"detection_rate": 0.0,
"trust_calibration": 0.709,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.02,
0.9311,
0.8844,
0.3622,
0.36,
0.9078,
0.2933,
0.8689,
0.9217,
0.3444,
0.32,
0.8556,
0.3033,
0.3011,
0.8767,
0.3022,
0.87,
0.8678,
0.8811,
0.0619,
0.8544,
0.3022,
0.8478,
0.8083,
0.3119,
0.0397,
0.8322,
0.4222
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 23,
"steps": 31,
"score": 0.7921,
"total_reward": 18.2193,
"completion_rate": 0.95,
"detection_rate": 0.5,
"trust_calibration": 0.847,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.3481,
0.9439,
0.9167,
0.9144,
0.86,
0.9078,
0.9033,
0.9261,
0.8967,
0.8944,
0.8922,
0.89,
0.8878,
0.02,
0.9228,
0.0819,
0.8744,
0.87,
0.8833,
0.7254
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 24,
"steps": 45,
"score": 0.6293,
"total_reward": 16.3622,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.813,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3156,
0.9483,
0.9461,
0.3197,
0.9072,
0.8756,
0.9033,
0.3541,
0.02,
0.9044,
0.02,
0.3608,
0.8511,
0.2967,
0.8722,
0.8356,
0.8111,
0.2867,
0.8544,
0.845,
0.3156,
0.8061,
0.8367,
0.7972,
0.7658
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 25,
"steps": 34,
"score": 0.6745,
"total_reward": 16.8613,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.809,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8933,
0.9461,
0.9189,
0.8644,
0.9122,
0.91,
0.8556,
0.8711,
0.8989,
0.3237,
0.8944,
0.8922,
0.8378,
0.3356,
0.8833,
0.8489,
0.0797,
0.3244,
0.3222,
0.8878,
0.8906,
0.9061,
0.2967,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 26,
"steps": 43,
"score": 0.5813,
"total_reward": 17.4397,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.815,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8933,
0.9211,
0.9439,
0.9167,
0.9144,
0.02,
0.3308,
0.8711,
0.8467,
0.8922,
0.8878,
0.3486,
0.3033,
0.2759,
0.8767,
0.3244,
0.3452,
0.29,
0.8156,
0.8633,
0.2889,
0.0597,
0.8544,
0.3372,
0.8478,
0.2956,
0.2811,
0.2889,
0.4707
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 27,
"steps": 40,
"score": 0.5674,
"total_reward": 15.3205,
"completion_rate": 0.7,
"detection_rate": 0.25,
"trust_calibration": 0.816,
"adversarial_detections": 1,
"adversarial_poisonings": 3,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9206,
0.3433,
0.3541,
0.9189,
0.9244,
0.3622,
0.9078,
0.9133,
0.8689,
0.8644,
0.885,
0.02,
0.8533,
0.9061,
0.8267,
0.9139,
0.073,
0.3356,
0.3441,
0.0619,
0.8994,
0.2722,
0.323,
0.3308,
0.8433,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 28,
"steps": 32,
"score": 0.6685,
"total_reward": 16.0443,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.793,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9211,
0.9189,
0.8844,
0.9122,
0.91,
0.9078,
0.9056,
0.3763,
0.02,
0.3167,
0.8944,
0.89,
0.9128,
0.8856,
0.8461,
0.8589,
0.3267,
0.8372,
0.9128,
0.2933,
0.3461,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 29,
"steps": 40,
"score": 0.6868,
"total_reward": 17.8577,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.907,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.3411,
0.9189,
0.02,
0.8822,
0.9028,
0.02,
0.8511,
0.8989,
0.3467,
0.3444,
0.8578,
0.8856,
0.8511,
0.2989,
0.9094,
0.87,
0.8856,
0.3641,
0.8567,
0.3572,
0.8895,
0.8083,
0.8353
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 30,
"steps": 28,
"score": 0.4767,
"total_reward": 10.4876,
"completion_rate": 0.5,
"detection_rate": 0.0,
"trust_calibration": 0.745,
"adversarial_detections": 0,
"adversarial_poisonings": 3,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9233,
0.02,
0.9189,
0.02,
0.9394,
0.02,
0.9078,
0.8533,
0.9011,
0.02,
0.8967,
0.3144,
0.2878,
0.8533,
0.3441,
0.0819,
0.2967,
0.0774,
0.835,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 31,
"steps": 33,
"score": 0.7077,
"total_reward": 19.8143,
"completion_rate": 0.95,
"detection_rate": 0.5,
"trust_calibration": 0.68,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9256,
0.9483,
0.3711,
0.9189,
0.3344,
0.9122,
0.91,
0.02,
0.3556,
0.9033,
0.3211,
0.8989,
0.8967,
0.8944,
0.89,
0.3456,
0.8833,
0.8989,
0.0797,
0.8372,
0.29,
0.9073,
0.8633,
0.8611,
0.8589,
0.679
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 32,
"steps": 40,
"score": 0.5915,
"total_reward": 18.3369,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.392,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.9211,
0.3689,
0.3444,
0.3622,
0.33,
0.9328,
0.9056,
0.3311,
0.8667,
0.3214,
0.8922,
0.8878,
0.8856,
0.3033,
0.9061,
0.8789,
0.8944,
0.2922,
0.323,
0.2956,
0.3133,
0.0641,
0.8839,
0.8744,
0.3152,
0.3178,
0.3133,
0.3715
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 33,
"steps": 41,
"score": 0.59,
"total_reward": 15.3412,
"completion_rate": 0.7,
"detection_rate": 0.6667,
"trust_calibration": 0.72,
"adversarial_detections": 2,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8911,
0.9439,
0.8644,
0.02,
0.8578,
0.02,
0.9033,
0.8489,
0.8967,
0.3444,
0.287,
0.8556,
0.8311,
0.3497,
0.8722,
0.073,
0.02,
0.905,
0.9006,
0.8544,
0.3022,
0.3308,
0.3186,
0.6338
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 34,
"steps": 35,
"score": 0.6414,
"total_reward": 16.0344,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.865,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.8689,
0.02,
0.3344,
0.3392,
0.353,
0.9056,
0.3303,
0.8667,
0.8622,
0.89,
0.8556,
0.8511,
0.8767,
0.8744,
0.8722,
0.32,
0.8678,
0.8456,
0.8611,
0.8589,
0.2944,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 35,
"steps": 39,
"score": 0.5508,
"total_reward": 15.9746,
"completion_rate": 0.7,
"detection_rate": 0.5,
"trust_calibration": 0.645,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3133,
0.3411,
0.9167,
0.8822,
0.3056,
0.8511,
0.8989,
0.3497,
0.9194,
0.3192,
0.34,
0.02,
0.8856,
0.3033,
0.2989,
0.8767,
0.8744,
0.2922,
0.02,
0.8478,
0.3386,
0.8789,
0.8817,
0.8172,
0.8895,
0.3286,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 36,
"steps": 31,
"score": 0.5216,
"total_reward": 12.5188,
"completion_rate": 0.55,
"detection_rate": 0.5,
"trust_calibration": 0.506,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.02,
0.8711,
0.3519,
0.3367,
0.3422,
0.91,
0.9328,
0.02,
0.8711,
0.8467,
0.8922,
0.34,
0.3078,
0.8856,
0.3333,
0.3311,
0.02,
0.8744,
0.2922,
0.9095,
0.8283,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 37,
"steps": 38,
"score": 0.5875,
"total_reward": 16.449,
"completion_rate": 0.7,
"detection_rate": 0.5,
"trust_calibration": 0.866,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.9211,
0.3689,
0.02,
0.9122,
0.8578,
0.9033,
0.3211,
0.8989,
0.9217,
0.86,
0.3508,
0.02,
0.3563,
0.3641,
0.8789,
0.8394,
0.31,
0.0708,
0.3156,
0.3011,
0.8984,
0.3222,
0.335,
0.8278,
0.627
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 38,
"steps": 35,
"score": 0.7022,
"total_reward": 18.2564,
"completion_rate": 0.9,
"detection_rate": 0.0,
"trust_calibration": 0.441,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.8889,
0.9167,
0.9072,
0.8778,
0.02,
0.9033,
0.9011,
0.8989,
0.8967,
0.3574,
0.84,
0.0908,
0.8833,
0.3311,
0.8417,
0.8744,
0.8722,
0.8678,
0.3486,
0.3133,
0.3289,
0.4474
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 39,
"steps": 33,
"score": 0.7547,
"total_reward": 18.1121,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.604,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9233,
0.9139,
0.9167,
0.8822,
0.02,
0.9078,
0.9056,
0.9033,
0.3189,
0.8444,
0.3122,
0.8578,
0.9106,
0.8833,
0.8489,
0.9161,
0.835,
0.8928,
0.9083,
0.3141,
0.3219,
0.7927
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 40,
"steps": 37,
"score": 0.5943,
"total_reward": 15.4518,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.636,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9233,
0.8889,
0.9167,
0.3574,
0.8778,
0.8533,
0.9011,
0.8989,
0.8622,
0.34,
0.8878,
0.02,
0.2711,
0.8267,
0.3244,
0.33,
0.8356,
0.8633,
0.2811,
0.0619,
0.8567,
0.8172,
0.27,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 41,
"steps": 38,
"score": 0.7027,
"total_reward": 18.9728,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.857,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.8733,
0.9461,
0.9167,
0.8622,
0.337,
0.8756,
0.3233,
0.02,
0.8967,
0.8944,
0.32,
0.8878,
0.8511,
0.8789,
0.8422,
0.87,
0.8678,
0.8833,
0.3341,
0.8567,
0.2744,
0.3352,
0.85,
0.3328,
0.8448
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 42,
"steps": 34,
"score": 0.7826,
"total_reward": 18.7831,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.768,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.8911,
0.9167,
0.9144,
0.9122,
0.9078,
0.9033,
0.8989,
0.8967,
0.8622,
0.8578,
0.8656,
0.02,
0.3311,
0.8789,
0.3267,
0.9139,
0.8678,
0.3286,
0.9061,
0.8217,
0.849
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 43,
"steps": 34,
"score": 0.6248,
"total_reward": 16.2447,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.572,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.02,
0.8911,
0.9189,
0.3667,
0.9122,
0.91,
0.8556,
0.3533,
0.8489,
0.8967,
0.8944,
0.3422,
0.34,
0.8856,
0.8833,
0.3011,
0.8417,
0.8744,
0.8722,
0.02,
0.8833,
0.2811,
0.8567,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 44,
"steps": 41,
"score": 0.6806,
"total_reward": 19.7386,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.81,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9483,
0.3411,
0.9189,
0.3667,
0.8822,
0.8778,
0.9033,
0.9011,
0.8667,
0.3214,
0.8922,
0.34,
0.8878,
0.3586,
0.8811,
0.8267,
0.8744,
0.31,
0.9106,
0.8789,
0.8567,
0.8522,
0.8895,
0.2656,
0.02,
0.3261,
0.8264
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 45,
"steps": 35,
"score": 0.8133,
"total_reward": 19.519,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.742,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.8867,
0.8622,
0.8578,
0.9033,
0.9011,
0.8989,
0.8967,
0.8422,
0.89,
0.8556,
0.3033,
0.8789,
0.2967,
0.9139,
0.9128,
0.9083,
0.2811,
0.8567,
0.8578
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 46,
"steps": 36,
"score": 0.7646,
"total_reward": 19.8787,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.813,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9233,
0.9461,
0.9439,
0.9167,
0.9144,
0.88,
0.9328,
0.9056,
0.9033,
0.9011,
0.8989,
0.3167,
0.8922,
0.317,
0.8856,
0.3363,
0.3089,
0.8767,
0.8372,
0.32,
0.8856,
0.9061,
0.8567,
0.8559
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 47,
"steps": 31,
"score": 0.7026,
"total_reward": 16.8629,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.89,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.9233,
0.9189,
0.8844,
0.02,
0.91,
0.8733,
0.8489,
0.8967,
0.8944,
0.9172,
0.89,
0.3256,
0.8833,
0.3311,
0.3319,
0.0797,
0.0774,
0.87,
0.8678,
0.8283,
0.5511
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 48,
"steps": 40,
"score": 0.6251,
"total_reward": 17.5018,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.739,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.8867,
0.9144,
0.3622,
0.91,
0.3278,
0.9056,
0.3763,
0.3211,
0.3489,
0.3544,
0.8922,
0.373,
0.8533,
0.8489,
0.8744,
0.82,
0.8156,
0.8633,
0.8411,
0.9017,
0.3394,
0.85,
0.3328,
0.3133,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 49,
"steps": 34,
"score": 0.7592,
"total_reward": 18.2216,
"completion_rate": 0.95,
"detection_rate": 0.0,
"trust_calibration": 0.658,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9211,
0.8867,
0.9144,
0.9122,
0.9078,
0.9033,
0.9011,
0.8667,
0.8944,
0.9172,
0.31,
0.02,
0.8833,
0.3489,
0.8744,
0.0752,
0.8678,
0.8656,
0.8633,
0.8589,
0.5214
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 50,
"steps": 31,
"score": 0.5546,
"total_reward": 13.3116,
"completion_rate": 0.65,
"detection_rate": 0.0,
"trust_calibration": 0.768,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.8867,
0.3344,
0.9122,
0.91,
0.3356,
0.02,
0.9011,
0.02,
0.02,
0.8944,
0.84,
0.8556,
0.8511,
0.9039,
0.2967,
0.3422,
0.87,
0.8928,
0.0686,
0.2833,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 51,
"steps": 35,
"score": 0.6911,
"total_reward": 16.5857,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.634,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9206,
0.9233,
0.9139,
0.8644,
0.9122,
0.8778,
0.9056,
0.3311,
0.8967,
0.3222,
0.8378,
0.8856,
0.8833,
0.9061,
0.8767,
0.9172,
0.073,
0.3178,
0.02,
0.8633,
0.02,
0.8217,
0.4783
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 52,
"steps": 39,
"score": 0.6835,
"total_reward": 19.1391,
"completion_rate": 0.95,
"detection_rate": 0.5,
"trust_calibration": 0.96,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3608,
0.9506,
0.9233,
0.9211,
0.3144,
0.9122,
0.337,
0.9056,
0.3233,
0.9011,
0.8989,
0.8944,
0.84,
0.8356,
0.3333,
0.8811,
0.8789,
0.8644,
0.8722,
0.8878,
0.8811,
0.8567,
0.2744,
0.8772,
0.27,
0.8873,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 53,
"steps": 38,
"score": 0.636,
"total_reward": 16.5363,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.643,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9483,
0.8689,
0.9167,
0.8822,
0.8578,
0.8733,
0.8489,
0.3244,
0.8922,
0.34,
0.3378,
0.8856,
0.02,
0.8789,
0.9017,
0.3174,
0.02,
0.3378,
0.8656,
0.2833,
0.0641,
0.8217,
0.8522,
0.4564
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 54,
"steps": 36,
"score": 0.6308,
"total_reward": 16.4,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.696,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.9233,
0.9211,
0.8667,
0.3252,
0.9078,
0.9056,
0.9011,
0.8989,
0.3237,
0.3474,
0.84,
0.8878,
0.8833,
0.3089,
0.8744,
0.87,
0.3308,
0.8811,
0.0619,
0.02,
0.3274,
0.8522,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 55,
"steps": 32,
"score": 0.5079,
"total_reward": 13.2064,
"completion_rate": 0.6,
"detection_rate": 0.0,
"trust_calibration": 0.674,
"adversarial_detections": 0,
"adversarial_poisonings": 3,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.02,
0.9256,
0.3241,
0.9189,
0.8644,
0.3322,
0.33,
0.9156,
0.02,
0.3511,
0.02,
0.8622,
0.8878,
0.3126,
0.8833,
0.8811,
0.8789,
0.8767,
0.3244,
0.8722,
0.333,
0.0708,
0.0686,
0.8611,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 56,
"steps": 32,
"score": 0.5675,
"total_reward": 14.1867,
"completion_rate": 0.65,
"detection_rate": 0.0,
"trust_calibration": 0.73,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3608,
0.9256,
0.02,
0.8867,
0.9144,
0.9122,
0.02,
0.3578,
0.9056,
0.9033,
0.8667,
0.8744,
0.3422,
0.363,
0.9128,
0.8833,
0.8489,
0.3774,
0.3222,
0.355,
0.8678,
0.0686,
0.8261,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 57,
"steps": 37,
"score": 0.5298,
"total_reward": 13.7744,
"completion_rate": 0.6,
"detection_rate": 0.0,
"trust_calibration": 0.716,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.9189,
0.3667,
0.3644,
0.3622,
0.343,
0.9056,
0.02,
0.8667,
0.8922,
0.8878,
0.8856,
0.02,
0.8289,
0.8922,
0.8678,
0.2933,
0.3141,
0.0619,
0.3197,
0.2744,
0.8772,
0.3,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 58,
"steps": 37,
"score": 0.6112,
"total_reward": 17.1148,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.808,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.3433,
0.3167,
0.02,
0.9122,
0.91,
0.9078,
0.9056,
0.2911,
0.3259,
0.3467,
0.8422,
0.8878,
0.3163,
0.8811,
0.8789,
0.9194,
0.8522,
0.8856,
0.0663,
0.3341,
0.8567,
0.3044,
0.3022,
0.3,
0.4822
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 59,
"steps": 39,
"score": 0.6007,
"total_reward": 16.2199,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.55,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3533,
0.9139,
0.9167,
0.9122,
0.33,
0.8756,
0.02,
0.02,
0.8467,
0.8944,
0.86,
0.3378,
0.3063,
0.8811,
0.8789,
0.8444,
0.8722,
0.32,
0.2956,
0.0663,
0.2811,
0.8839,
0.8722,
0.2878,
0.4134
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 60,
"steps": 39,
"score": 0.7149,
"total_reward": 17.1579,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.717,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.9211,
0.02,
0.3044,
0.92,
0.9056,
0.3233,
0.3211,
0.8967,
0.9194,
0.89,
0.3056,
0.8833,
0.02,
0.8767,
0.8994,
0.87,
0.8306,
0.8789,
0.8194,
0.8678,
0.7902
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 61,
"steps": 34,
"score": 0.6745,
"total_reward": 17.5378,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.937,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8911,
0.9439,
0.9167,
0.3644,
0.3322,
0.9078,
0.8533,
0.9011,
0.9239,
0.8944,
0.8922,
0.8828,
0.3356,
0.3333,
0.8967,
0.0774,
0.8722,
0.8678,
0.8906,
0.0663,
0.02,
0.2789,
0.8567,
0.5562
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 62,
"steps": 38,
"score": 0.5955,
"total_reward": 16.078,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.583,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8733,
0.8689,
0.9144,
0.3622,
0.02,
0.8556,
0.9033,
0.9011,
0.02,
0.3544,
0.8922,
0.89,
0.8878,
0.8333,
0.3089,
0.0797,
0.3022,
0.3378,
0.8633,
0.8789,
0.3297,
0.8794,
0.3022,
0.8478,
0.4237
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 63,
"steps": 35,
"score": 0.6184,
"total_reward": 16.0796,
"completion_rate": 0.75,
"detection_rate": 0.5,
"trust_calibration": 0.677,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3511,
0.9167,
0.8944,
0.3,
0.9078,
0.9306,
0.9033,
0.02,
0.8667,
0.3122,
0.89,
0.8878,
0.9106,
0.8811,
0.8789,
0.8744,
0.3222,
0.3308,
0.2933,
0.2811,
0.3119,
0.8961,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 64,
"steps": 43,
"score": 0.6578,
"total_reward": 19.7347,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.572,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9233,
0.9211,
0.3367,
0.8822,
0.33,
0.9056,
0.3463,
0.8489,
0.3167,
0.02,
0.3422,
0.31,
0.3086,
0.8311,
0.3367,
0.8744,
0.8722,
0.9028,
0.8656,
0.3263,
0.8611,
0.8589,
0.8744,
0.87,
0.8106,
0.8061,
0.2889,
0.7624
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 65,
"steps": 34,
"score": 0.6769,
"total_reward": 16.2452,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.879,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.8933,
0.9461,
0.3689,
0.8844,
0.86,
0.8556,
0.9283,
0.3511,
0.8989,
0.3244,
0.8978,
0.8856,
0.8833,
0.3011,
0.8767,
0.8544,
0.8878,
0.8656,
0.3011,
0.2789,
0.8817,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 66,
"steps": 34,
"score": 0.5832,
"total_reward": 15.7473,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.662,
"adversarial_detections": 0,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.9211,
0.9167,
0.3644,
0.91,
0.8756,
0.9033,
0.3511,
0.8989,
0.02,
0.8944,
0.86,
0.02,
0.3063,
0.8789,
0.8767,
0.0774,
0.02,
0.8878,
0.8906,
0.0663,
0.0641,
0.8589,
0.3067,
0.4526
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 67,
"steps": 34,
"score": 0.5986,
"total_reward": 14.3671,
"completion_rate": 0.7,
"detection_rate": 0.0,
"trust_calibration": 0.891,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.02,
0.9233,
0.3489,
0.9144,
0.3622,
0.36,
0.9328,
0.3003,
0.9011,
0.8667,
0.8922,
0.8378,
0.02,
0.8833,
0.8489,
0.9194,
0.89,
0.8678,
0.8811,
0.0619,
0.3397,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 68,
"steps": 33,
"score": 0.7306,
"total_reward": 18.2654,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.684,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.3481,
0.9167,
0.3644,
0.9122,
0.91,
0.9078,
0.9033,
0.3511,
0.8989,
0.3297,
0.8922,
0.8878,
0.02,
0.02,
0.8811,
0.9039,
0.8767,
0.8722,
0.9095,
0.8656,
0.8611,
0.8126
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 69,
"steps": 40,
"score": 0.7144,
"total_reward": 18.574,
"completion_rate": 0.95,
"detection_rate": 0.0,
"trust_calibration": 0.723,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9506,
0.9233,
0.9189,
0.9167,
0.3422,
0.8756,
0.9283,
0.9011,
0.8989,
0.02,
0.3122,
0.8556,
0.8833,
0.8467,
0.8222,
0.8678,
0.8656,
0.3133,
0.8239,
0.0597,
0.8544,
0.87,
0.8106,
0.5244
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 70,
"steps": 34,
"score": 0.6867,
"total_reward": 17.1674,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.786,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.02,
0.8889,
0.9144,
0.33,
0.9078,
0.9056,
0.9033,
0.8489,
0.3497,
0.8944,
0.8922,
0.8828,
0.8856,
0.8833,
0.8789,
0.3774,
0.87,
0.3706,
0.0663,
0.8239,
0.8567,
0.5184
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 71,
"steps": 29,
"score": 0.5734,
"total_reward": 14.3348,
"completion_rate": 0.65,
"detection_rate": 0.0,
"trust_calibration": 0.702,
"adversarial_detections": 0,
"adversarial_poisonings": 3,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.8911,
0.3459,
0.9167,
0.3644,
0.88,
0.9328,
0.8711,
0.8989,
0.8967,
0.3574,
0.3422,
0.89,
0.8856,
0.02,
0.8811,
0.0819,
0.3267,
0.3244,
0.8722,
0.353,
0.0708,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 72,
"steps": 30,
"score": 0.7073,
"total_reward": 16.2683,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.584,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9483,
0.3489,
0.8844,
0.91,
0.9078,
0.9306,
0.9283,
0.9011,
0.3259,
0.8644,
0.84,
0.8878,
0.9106,
0.3033,
0.8811,
0.9039,
0.8394,
0.87,
0.02,
0.8906,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 73,
"steps": 34,
"score": 0.7782,
"total_reward": 18.6761,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.803,
"adversarial_detections": 2,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.9189,
0.9167,
0.3644,
0.91,
0.9078,
0.9056,
0.3533,
0.02,
0.8467,
0.8944,
0.28,
0.8878,
0.8333,
0.8811,
0.8267,
0.8372,
0.8328,
0.8656,
0.8883,
0.9006,
0.8577
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 74,
"steps": 33,
"score": 0.7164,
"total_reward": 17.1935,
"completion_rate": 0.9,
"detection_rate": 0.3333,
"trust_calibration": 0.957,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.9233,
0.9211,
0.8844,
0.86,
0.9056,
0.3233,
0.3189,
0.9217,
0.8944,
0.3352,
0.89,
0.8856,
0.8311,
0.0819,
0.9161,
0.8722,
0.8678,
0.8906,
0.8633,
0.8589,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 75,
"steps": 40,
"score": 0.6588,
"total_reward": 18.4472,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.642,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.8911,
0.8667,
0.3344,
0.86,
0.3278,
0.3256,
0.9033,
0.3511,
0.3259,
0.8444,
0.89,
0.8856,
0.3103,
0.8811,
0.8867,
0.8994,
0.8722,
0.3378,
0.3333,
0.2811,
0.9017,
0.8544,
0.8917,
0.2756,
0.7515
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 76,
"steps": 28,
"score": 0.7202,
"total_reward": 15.8447,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.793,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3526,
0.9233,
0.9189,
0.9167,
0.9144,
0.9122,
0.935,
0.9078,
0.8983,
0.8689,
0.8967,
0.8944,
0.8922,
0.8878,
0.9033,
0.3189,
0.8394,
0.3222,
0.333,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 77,
"steps": 34,
"score": 0.6731,
"total_reward": 16.8286,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.682,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9233,
0.9211,
0.02,
0.9167,
0.9394,
0.91,
0.9078,
0.9056,
0.9033,
0.8989,
0.8894,
0.3422,
0.89,
0.2756,
0.8311,
0.3467,
0.2944,
0.9117,
0.8678,
0.3363,
0.02,
0.8567,
0.775
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 78,
"steps": 44,
"score": 0.627,
"total_reward": 18.1844,
"completion_rate": 0.8,
"detection_rate": 0.5,
"trust_calibration": 0.849,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.8933,
0.3159,
0.8822,
0.8578,
0.9033,
0.9011,
0.8917,
0.8944,
0.8578,
0.3356,
0.8833,
0.2989,
0.3267,
0.3374,
0.2922,
0.32,
0.8156,
0.8711,
0.8589,
0.3022,
0.3,
0.0508,
0.8883,
0.8661,
0.3119,
0.2567,
0.6445
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 79,
"steps": 37,
"score": 0.6279,
"total_reward": 16.9527,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.561,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9233,
0.8867,
0.9144,
0.9122,
0.02,
0.8756,
0.9011,
0.8989,
0.3467,
0.8622,
0.89,
0.8556,
0.8511,
0.9039,
0.3267,
0.0774,
0.02,
0.343,
0.8656,
0.3011,
0.8589,
0.3067,
0.8172,
0.333,
0.4381
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 80,
"steps": 40,
"score": 0.6766,
"total_reward": 18.9455,
"completion_rate": 0.9,
"detection_rate": 0.0,
"trust_calibration": 0.946,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9483,
0.3489,
0.8644,
0.3622,
0.91,
0.9328,
0.9056,
0.8511,
0.3189,
0.3467,
0.3144,
0.32,
0.8933,
0.8811,
0.8267,
0.8994,
0.8722,
0.8678,
0.0686,
0.9061,
0.0619,
0.8194,
0.323,
0.8456,
0.5627
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 81,
"steps": 33,
"score": 0.7394,
"total_reward": 17.745,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.704,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9233,
0.9211,
0.9167,
0.8822,
0.91,
0.9078,
0.3256,
0.3533,
0.9261,
0.8467,
0.02,
0.8922,
0.3078,
0.8856,
0.8833,
0.0841,
0.8967,
0.8744,
0.87,
0.9073,
0.8261,
0.6676
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 82,
"steps": 39,
"score": 0.6307,
"total_reward": 17.661,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.581,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9233,
0.8889,
0.3667,
0.9122,
0.2978,
0.3586,
0.9033,
0.3289,
0.9294,
0.89,
0.8556,
0.3663,
0.3311,
0.3289,
0.8767,
0.8994,
0.3222,
0.32,
0.3528,
0.3683,
0.8411,
0.8567,
0.02,
0.815,
0.8478,
0.7211
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 83,
"steps": 33,
"score": 0.6648,
"total_reward": 15.9564,
"completion_rate": 0.8,
"detection_rate": 0.25,
"trust_calibration": 0.917,
"adversarial_detections": 1,
"adversarial_poisonings": 3,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.3663,
0.9211,
0.9439,
0.9144,
0.3322,
0.9078,
0.9306,
0.8961,
0.8789,
0.8967,
0.8944,
0.86,
0.8356,
0.8811,
0.9184,
0.3474,
0.3552,
0.073,
0.0708,
0.8633,
0.9039,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 84,
"steps": 39,
"score": 0.7325,
"total_reward": 19.0457,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.851,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.02,
0.3067,
0.9122,
0.8578,
0.3256,
0.8511,
0.8989,
0.8444,
0.8922,
0.8578,
0.8833,
0.8811,
0.8767,
0.8744,
0.3252,
0.8678,
0.8656,
0.3363,
0.8589,
0.8961,
0.85,
0.2978,
0.8586
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 85,
"steps": 32,
"score": 0.661,
"total_reward": 15.2038,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.807,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9483,
0.9211,
0.9189,
0.9144,
0.9122,
0.33,
0.3356,
0.3533,
0.3211,
0.8967,
0.86,
0.8533,
0.8811,
0.3289,
0.8767,
0.8744,
0.8722,
0.8856,
0.0663,
0.8861,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 86,
"steps": 44,
"score": 0.6056,
"total_reward": 19.3805,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.837,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.8933,
0.3489,
0.8844,
0.9372,
0.9078,
0.3556,
0.8689,
0.8967,
0.3222,
0.89,
0.3078,
0.8856,
0.9083,
0.3311,
0.2967,
0.3274,
0.88,
0.8678,
0.2926,
0.3133,
0.8861,
0.0619,
0.8544,
0.3022,
0.8678,
0.8456,
0.8061,
0.3119,
0.02,
0.5089
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 87,
"steps": 37,
"score": 0.6574,
"total_reward": 16.4362,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.807,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8933,
0.9211,
0.9439,
0.8644,
0.86,
0.9078,
0.8733,
0.9011,
0.8667,
0.8922,
0.8556,
0.8833,
0.3311,
0.8789,
0.8767,
0.3422,
0.8878,
0.2856,
0.8261,
0.0619,
0.8994,
0.02,
0.303,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 88,
"steps": 36,
"score": 0.5911,
"total_reward": 15.369,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.855,
"adversarial_detections": 0,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.02,
0.8733,
0.9461,
0.8667,
0.9144,
0.905,
0.9078,
0.9056,
0.3233,
0.8989,
0.8967,
0.8922,
0.3178,
0.02,
0.02,
0.0841,
0.8967,
0.0774,
0.0752,
0.8678,
0.8633,
0.8789,
0.8194,
0.3022,
0.4961
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 89,
"steps": 34,
"score": 0.7869,
"total_reward": 20.4589,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.809,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9233,
0.3481,
0.9439,
0.3367,
0.9144,
0.9122,
0.337,
0.9056,
0.9033,
0.9011,
0.9239,
0.8644,
0.8922,
0.89,
0.8878,
0.925,
0.3441,
0.8967,
0.3474,
0.87,
0.8928,
0.8283,
0.8789,
0.8766
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 90,
"steps": 31,
"score": 0.6723,
"total_reward": 14.7899,
"completion_rate": 0.8,
"detection_rate": 0.3333,
"trust_calibration": 0.902,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9206,
0.8889,
0.3367,
0.8822,
0.8578,
0.9056,
0.9033,
0.9089,
0.02,
0.8922,
0.89,
0.8878,
0.8856,
0.8833,
0.9061,
0.8767,
0.0774,
0.02,
0.9095,
0.8633,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 91,
"steps": 44,
"score": 0.6543,
"total_reward": 18.9747,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.289,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3111,
0.9189,
0.3667,
0.88,
0.3278,
0.9056,
0.8711,
0.8667,
0.3222,
0.34,
0.9128,
0.3133,
0.3311,
0.3367,
0.84,
0.8156,
0.8633,
0.8239,
0.2767,
0.8544,
0.85,
0.8728,
0.8883,
0.8039,
0.2744,
0.672
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 92,
"steps": 39,
"score": 0.6758,
"total_reward": 16.8941,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.804,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9256,
0.9233,
0.9211,
0.3689,
0.3367,
0.9144,
0.3322,
0.8578,
0.9033,
0.8667,
0.3122,
0.2856,
0.8489,
0.2967,
0.8994,
0.8378,
0.8633,
0.8239,
0.8544,
0.8522,
0.85,
0.8106,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 93,
"steps": 38,
"score": 0.7063,
"total_reward": 17.6574,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.542,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.8711,
0.3389,
0.9167,
0.9144,
0.9122,
0.9178,
0.9033,
0.8689,
0.8644,
0.3422,
0.8878,
0.8833,
0.8289,
0.8767,
0.8372,
0.02,
0.8306,
0.8789,
0.8567,
0.3274,
0.3352,
0.27,
0.7487
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 94,
"steps": 35,
"score": 0.6388,
"total_reward": 15.969,
"completion_rate": 0.75,
"detection_rate": 0.5,
"trust_calibration": 0.941,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.9139,
0.02,
0.9144,
0.02,
0.935,
0.9078,
0.3256,
0.8711,
0.3489,
0.3237,
0.8422,
0.8878,
0.8333,
0.8789,
0.3044,
0.3222,
0.073,
0.2878,
0.8633,
0.9006,
0.8567,
0.6701
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 95,
"steps": 35,
"score": 0.6469,
"total_reward": 17.465,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.834,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.8689,
0.3667,
0.3644,
0.307,
0.3578,
0.9056,
0.9283,
0.9011,
0.8967,
0.8944,
0.8922,
0.89,
0.8856,
0.9161,
0.2989,
0.3267,
0.3474,
0.0752,
0.073,
0.8306,
0.8261,
0.3089,
0.8194,
0.5106
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 96,
"steps": 36,
"score": 0.7393,
"total_reward": 17.7436,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.619,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8733,
0.3167,
0.92,
0.9056,
0.9033,
0.9011,
0.3189,
0.8622,
0.02,
0.8878,
0.8933,
0.8811,
0.9039,
0.8767,
0.8744,
0.8722,
0.87,
0.8306,
0.02,
0.3289,
0.8961,
0.7897
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 97,
"steps": 36,
"score": 0.7834,
"total_reward": 18.8027,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.813,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.8711,
0.9439,
0.8844,
0.86,
0.9078,
0.9306,
0.8711,
0.8989,
0.8967,
0.3214,
0.84,
0.8856,
0.8811,
0.8767,
0.3244,
0.835,
0.2878,
0.3156,
0.8611,
0.8589,
0.8544,
0.8557
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 98,
"steps": 37,
"score": 0.7572,
"total_reward": 18.9302,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.605,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.02,
0.8889,
0.8967,
0.8622,
0.9078,
0.3256,
0.8711,
0.8989,
0.8944,
0.3422,
0.89,
0.3308,
0.8333,
0.3341,
0.8767,
0.9172,
0.8678,
0.8656,
0.8811,
0.8839,
0.8194,
0.8014
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 99,
"steps": 37,
"score": 0.6531,
"total_reward": 18.9394,
"completion_rate": 0.9,
"detection_rate": 0.0,
"trust_calibration": 0.623,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9483,
0.9211,
0.9189,
0.9167,
0.9144,
0.333,
0.9078,
0.3011,
0.9239,
0.3144,
0.89,
0.8556,
0.3033,
0.8811,
0.8789,
0.9017,
0.0774,
0.835,
0.0708,
0.8656,
0.02,
0.9039,
0.3297,
0.8172,
0.3,
0.4885
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 0,
"steps": 43,
"score": 0.7114,
"total_reward": 18.4969,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.729,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.3344,
0.91,
0.9056,
0.9011,
0.3267,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.3,
0.2956,
0.8611,
0.2867,
0.8522,
0.8478,
0.8433,
0.8389,
0.7841
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 1,
"steps": 29,
"score": 0.7083,
"total_reward": 17.707,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.721,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6632
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 2,
"steps": 29,
"score": 0.6919,
"total_reward": 17.2983,
"completion_rate": 0.85,
"detection_rate": 0.4444,
"trust_calibration": 0.561,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.3456,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6065
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 3,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8008,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.843,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.8675
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 4,
"steps": 29,
"score": 0.7165,
"total_reward": 17.9128,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.721,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.3414,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.664
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 5,
"steps": 46,
"score": 0.7558,
"total_reward": 18.1385,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.832,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.32,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.2711,
0.8367,
0.8322,
0.8229
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 6,
"steps": 27,
"score": 0.6991,
"total_reward": 16.778,
"completion_rate": 0.85,
"detection_rate": 0.4,
"trust_calibration": 0.725,
"adversarial_detections": 4,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.6387
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 7,
"steps": 42,
"score": 0.7756,
"total_reward": 19.3902,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.3919,
0.3997,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.28,
0.8456,
0.8411,
0.8478
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 8,
"steps": 44,
"score": 0.809,
"total_reward": 19.4157,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.853,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3526,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8367,
0.8654
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 9,
"steps": 40,
"score": 0.782,
"total_reward": 19.5499,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.837,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3841,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8528
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 10,
"steps": 31,
"score": 0.712,
"total_reward": 17.8008,
"completion_rate": 0.85,
"detection_rate": 0.625,
"trust_calibration": 0.448,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.3281,
0.8989,
0.0997,
0.0974,
0.0952,
0.9295,
0.925,
0.9206,
0.9161,
0.9117,
0.3356,
0.6281
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 11,
"steps": 40,
"score": 0.7732,
"total_reward": 18.5566,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.2978,
0.2933,
0.8589,
0.8544,
0.85,
0.8456,
0.8349
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 12,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8411,
0.8676
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 13,
"steps": 39,
"score": 0.833,
"total_reward": 18.3252,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.811,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3456,
0.3433,
0.9189,
0.9144,
0.91,
0.9056,
0.9011,
0.8967,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.87,
0.8656,
0.8611,
0.8567,
0.8522,
0.8478,
0.8485
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 14,
"steps": 29,
"score": 0.6889,
"total_reward": 17.9127,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.609,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.3841,
0.3689,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.3237,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.6353
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 15,
"steps": 30,
"score": 0.6847,
"total_reward": 18.4869,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.635,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3608,
0.9256,
0.9233,
0.9211,
0.3459,
0.9167,
0.9144,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.0841,
0.9184,
0.9139,
0.9095,
0.6404
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 16,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8411,
0.8676
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 17,
"steps": 46,
"score": 0.8048,
"total_reward": 19.316,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.842,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.28,
0.8456,
0.8411,
0.8367,
0.8322,
0.8605
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 18,
"steps": 26,
"score": 0.6967,
"total_reward": 16.7213,
"completion_rate": 0.85,
"detection_rate": 0.3333,
"trust_calibration": 0.701,
"adversarial_detections": 3,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.6149
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 19,
"steps": 20,
"score": 0.6193,
"total_reward": 13.0053,
"completion_rate": 0.65,
"detection_rate": 0.0,
"trust_calibration": 0.576,
"adversarial_detections": 0,
"adversarial_poisonings": 5,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.9211,
0.9189,
0.3437,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.01
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 20,
"steps": 46,
"score": 0.7498,
"total_reward": 19.4938,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.3841,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.8456,
0.8411,
0.8367,
0.8322,
0.8412
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 21,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8676
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 22,
"steps": 36,
"score": 0.7334,
"total_reward": 21.2675,
"completion_rate": 1.0,
"detection_rate": 0.8,
"trust_calibration": 0.747,
"adversarial_detections": 4,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.9256,
0.9233,
0.3481,
0.9189,
0.9167,
0.9144,
0.9122,
0.337,
0.9078,
0.3326,
0.9033,
0.9011,
0.3259,
0.8967,
0.3214,
0.8922,
0.89,
0.3148,
0.8856,
0.0863,
0.9206,
0.9161,
0.9117,
0.9073,
0.8789,
0.8544,
0.7968
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 23,
"steps": 28,
"score": 0.6922,
"total_reward": 17.3057,
"completion_rate": 0.85,
"detection_rate": 0.4444,
"trust_calibration": 0.645,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.3433,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6298
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 24,
"steps": 46,
"score": 0.7725,
"total_reward": 20.0838,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.836,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.9483,
0.3711,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.2711,
0.8367,
0.8322,
0.8591
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 25,
"steps": 34,
"score": 0.6755,
"total_reward": 18.9148,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.71,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.3322,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.3189,
0.3167,
0.8944,
0.3122,
0.31,
0.3078,
0.8856,
0.9011,
0.8967,
0.8922,
0.3078,
0.3033,
0.8789,
0.782
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 26,
"steps": 34,
"score": 0.6561,
"total_reward": 19.0282,
"completion_rate": 0.8,
"detection_rate": 0.6667,
"trust_calibration": 0.467,
"adversarial_detections": 2,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.8989,
0.3667,
0.3644,
0.9372,
0.935,
0.3578,
0.3556,
0.3533,
0.3289,
0.8967,
0.8944,
0.8922,
0.317,
0.8878,
0.3126,
0.8833,
0.0841,
0.9184,
0.9139,
0.8878,
0.8656,
0.3163,
0.3419,
0.6213
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 27,
"steps": 46,
"score": 0.7256,
"total_reward": 20.3155,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.828,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.2756,
0.8411,
0.8367,
0.8322,
0.8395
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 28,
"steps": 28,
"score": 0.7355,
"total_reward": 17.6509,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.725,
"adversarial_detections": 5,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.6841
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 29,
"steps": 28,
"score": 0.7258,
"total_reward": 18.144,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.709,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.3281,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6801
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 30,
"steps": 39,
"score": 0.6238,
"total_reward": 18.7149,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.712,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3411,
0.9189,
0.9167,
0.9144,
0.3322,
0.33,
0.9078,
0.3256,
0.3233,
0.9011,
0.8989,
0.8967,
0.3144,
0.8922,
0.31,
0.3078,
0.3233,
0.8989,
0.8944,
0.89,
0.3056,
0.3011,
0.2967,
0.8722,
0.2878,
0.7538
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 31,
"steps": 37,
"score": 0.6915,
"total_reward": 20.0529,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.911,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.3322,
0.91,
0.3278,
0.9056,
0.9033,
0.9011,
0.8989,
0.3697,
0.3774,
0.3772,
0.89,
0.3728,
0.3706,
0.9228,
0.8944,
0.87,
0.8656,
0.2911,
0.8567,
0.8522,
0.843
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 32,
"steps": 30,
"score": 0.689,
"total_reward": 17.9142,
"completion_rate": 0.8,
"detection_rate": 0.6,
"trust_calibration": 0.712,
"adversarial_detections": 3,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.9122,
0.91,
0.9078,
0.3326,
0.3303,
0.9011,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.8922,
0.87,
0.3208,
0.6714
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 33,
"steps": 31,
"score": 0.7464,
"total_reward": 18.6604,
"completion_rate": 0.9,
"detection_rate": 0.6667,
"trust_calibration": 0.845,
"adversarial_detections": 4,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.3437,
0.3414,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.9011,
0.8989,
0.3237,
0.8944,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.8856,
0.7574
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 34,
"steps": 40,
"score": 0.8105,
"total_reward": 18.6408,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.838,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3433,
0.3411,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.853
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 35,
"steps": 28,
"score": 0.7257,
"total_reward": 18.1436,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.708,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.3437,
0.9144,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6797
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 36,
"steps": 42,
"score": 0.7886,
"total_reward": 18.9271,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.854,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.3433,
0.3481,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.2978,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.8527
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 37,
"steps": 28,
"score": 0.6822,
"total_reward": 17.056,
"completion_rate": 0.85,
"detection_rate": 0.4,
"trust_calibration": 0.659,
"adversarial_detections": 4,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.62
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 38,
"steps": 43,
"score": 0.8297,
"total_reward": 19.0823,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.774,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9233,
0.9189,
0.9144,
0.91,
0.9056,
0.9011,
0.8967,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.3,
0.8656,
0.8611,
0.8567,
0.8522,
0.8478,
0.8433,
0.8389,
0.8479
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 39,
"steps": 28,
"score": 0.7161,
"total_reward": 17.9036,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.71,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6637
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 40,
"steps": 37,
"score": 0.6368,
"total_reward": 19.7412,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.678,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.9122,
0.33,
0.9078,
0.9056,
0.3233,
0.9011,
0.3189,
0.3167,
0.8944,
0.3122,
0.89,
0.3078,
0.8856,
0.3033,
0.8811,
0.8967,
0.3122,
0.3078,
0.8833,
0.2989,
0.2944,
0.87,
0.7673
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 41,
"steps": 38,
"score": 0.6416,
"total_reward": 20.5321,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.732,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3411,
0.9189,
0.3367,
0.9144,
0.9122,
0.33,
0.9078,
0.3256,
0.3233,
0.3211,
0.8989,
0.8967,
0.8944,
0.3122,
0.89,
0.3078,
0.3056,
0.8833,
0.3189,
0.8944,
0.31,
0.3056,
0.8811,
0.8589,
0.3097,
0.8722,
0.323,
0.7961
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 42,
"steps": 31,
"score": 0.6759,
"total_reward": 17.5723,
"completion_rate": 0.85,
"detection_rate": 0.4444,
"trust_calibration": 0.442,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.3414,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.3056,
0.5724
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 43,
"steps": 32,
"score": 0.6896,
"total_reward": 17.9304,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.681,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3456,
0.9233,
0.9211,
0.3719,
0.3797,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.3356,
0.6467
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 44,
"steps": 38,
"score": 0.6652,
"total_reward": 19.2895,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.721,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.8967,
0.9394,
0.9372,
0.36,
0.3578,
0.3556,
0.9283,
0.3511,
0.3489,
0.3167,
0.8944,
0.8922,
0.343,
0.3508,
0.9011,
0.8767,
0.8722,
0.2978,
0.8633,
0.8589,
0.2844,
0.85,
0.7757
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 45,
"steps": 40,
"score": 0.8827,
"total_reward": 18.5376,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.839,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8709
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 46,
"steps": 40,
"score": 0.6245,
"total_reward": 18.7359,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.429,
"adversarial_detections": 3,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9506,
0.9483,
0.9461,
0.3689,
0.3667,
0.3644,
0.9122,
0.91,
0.9078,
0.9056,
0.3763,
0.3841,
0.3839,
0.3817,
0.8944,
0.3192,
0.3178,
0.8856,
0.9228,
0.9184,
0.3422,
0.3728,
0.3683,
0.9006,
0.8994,
0.85,
0.3156,
0.6634
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 47,
"steps": 29,
"score": 0.7165,
"total_reward": 17.9132,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.722,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6644
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 48,
"steps": 28,
"score": 0.7257,
"total_reward": 18.1437,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.708,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3459,
0.9167,
0.9144,
0.9122,
0.337,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6798
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 49,
"steps": 28,
"score": 0.7355,
"total_reward": 17.6512,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.726,
"adversarial_detections": 5,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.6844
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 50,
"steps": 42,
"score": 0.717,
"total_reward": 18.643,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.82,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3919,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.8633,
0.8589,
0.2844,
0.28,
0.8456,
0.8411,
0.8092
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 51,
"steps": 34,
"score": 0.7274,
"total_reward": 19.6391,
"completion_rate": 0.95,
"detection_rate": 0.625,
"trust_calibration": 0.682,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.3414,
0.9122,
0.337,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.3237,
0.8944,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.9073,
0.3311,
0.715
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 52,
"steps": 46,
"score": 0.5965,
"total_reward": 19.0884,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.686,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.9256,
0.3433,
0.9211,
0.3619,
0.9144,
0.34,
0.3578,
0.3333,
0.9011,
0.8989,
0.8967,
0.3774,
0.8922,
0.375,
0.8878,
0.3706,
0.3683,
0.3661,
0.3639,
0.2967,
0.3594,
0.3572,
0.355,
0.8656,
0.3089,
0.8722,
0.8656,
0.8589,
0.8522,
0.7165
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 53,
"steps": 44,
"score": 0.8286,
"total_reward": 19.0588,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.8367,
0.8632
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 54,
"steps": 35,
"score": 0.6607,
"total_reward": 18.4987,
"completion_rate": 0.8,
"detection_rate": 0.75,
"trust_calibration": 0.358,
"adversarial_detections": 6,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9233,
0.9211,
0.9189,
0.3797,
0.3874,
0.3952,
0.36,
0.9078,
0.3326,
0.3233,
0.9011,
0.8989,
0.3237,
0.8944,
0.8922,
0.89,
0.8878,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.9073,
0.3661,
0.8984,
0.6167
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 55,
"steps": 29,
"score": 0.728,
"total_reward": 18.1995,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.713,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.3756,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6787
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 56,
"steps": 37,
"score": 0.6768,
"total_reward": 20.9793,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.796,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.9506,
0.9483,
0.9461,
0.3689,
0.3667,
0.3644,
0.9122,
0.91,
0.9078,
0.3256,
0.3233,
0.9011,
0.3189,
0.8967,
0.3144,
0.8922,
0.89,
0.8878,
0.8856,
0.3033,
0.8989,
0.2967,
0.8922,
0.8878,
0.2856,
0.8811,
0.2967,
0.8722,
0.8317
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 57,
"steps": 38,
"score": 0.6683,
"total_reward": 20.0487,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.771,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3389,
0.9167,
0.3344,
0.3322,
0.33,
0.9078,
0.3256,
0.9033,
0.3211,
0.8989,
0.3167,
0.3144,
0.8922,
0.89,
0.8878,
0.3486,
0.3563,
0.3819,
0.9161,
0.34,
0.8833,
0.8589,
0.8544,
0.85,
0.8058
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 58,
"steps": 29,
"score": 0.7278,
"total_reward": 18.1944,
"completion_rate": 0.9,
"detection_rate": 0.5556,
"trust_calibration": 0.731,
"adversarial_detections": 5,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3481,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6999
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 59,
"steps": 43,
"score": 0.7547,
"total_reward": 19.6215,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.735,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3526,
0.9233,
0.9211,
0.3719,
0.3797,
0.3644,
0.91,
0.9056,
0.9011,
0.8967,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.3,
0.8656,
0.2911,
0.8567,
0.8522,
0.8478,
0.8433,
0.8389,
0.8208
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 60,
"steps": 36,
"score": 0.7716,
"total_reward": 18.5184,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.836,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3411,
0.9189,
0.9167,
0.3344,
0.3322,
0.33,
0.3278,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.8439
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 61,
"steps": 42,
"score": 0.7616,
"total_reward": 19.8021,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.836,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.28,
0.8456,
0.2711,
0.8483
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 62,
"steps": 43,
"score": 0.7892,
"total_reward": 18.9402,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.925,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.9506,
0.3733,
0.9189,
0.9144,
0.91,
0.9056,
0.9011,
0.8967,
0.3222,
0.3178,
0.8833,
0.8789,
0.8744,
0.87,
0.8656,
0.8611,
0.8567,
0.8522,
0.8478,
0.8433,
0.8389,
0.8682
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 63,
"steps": 44,
"score": 0.8031,
"total_reward": 18.4705,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.841,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.2711,
0.8367,
0.8449
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 64,
"steps": 37,
"score": 0.7634,
"total_reward": 19.0848,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.76,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3841,
0.9189,
0.3997,
0.3994,
0.3972,
0.33,
0.9328,
0.3556,
0.9011,
0.8967,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.87,
0.8656,
0.8611,
0.8567,
0.8522,
0.8227
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 65,
"steps": 40,
"score": 0.797,
"total_reward": 19.1287,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.838,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.3919,
0.3997,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.853
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 66,
"steps": 29,
"score": 0.7165,
"total_reward": 17.9135,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.723,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.337,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6647
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 67,
"steps": 32,
"score": 0.6707,
"total_reward": 18.1095,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.44,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3503,
0.9211,
0.3459,
0.9167,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.9095,
0.3333,
0.5864
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 68,
"steps": 42,
"score": 0.7575,
"total_reward": 19.6962,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.641,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.3741,
0.9189,
0.3897,
0.3974,
0.3622,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.7994
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 69,
"steps": 30,
"score": 0.6917,
"total_reward": 17.9836,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.658,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9506,
0.9483,
0.9461,
0.3689,
0.3667,
0.3644,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.9095,
0.6454
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 70,
"steps": 27,
"score": 0.7,
"total_reward": 19.5996,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.716,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3919,
0.9167,
0.3994,
0.3972,
0.395,
0.3928,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.3144,
0.8922,
0.89,
0.8878,
0.8856,
0.3033,
0.8811,
0.8789,
0.2967,
0.8744,
0.2922,
0.8166
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 71,
"steps": 29,
"score": 0.6808,
"total_reward": 17.7021,
"completion_rate": 0.85,
"detection_rate": 0.375,
"trust_calibration": 0.687,
"adversarial_detections": 3,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3841,
0.3919,
0.3997,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.6174
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 72,
"steps": 34,
"score": 0.6937,
"total_reward": 19.4234,
"completion_rate": 0.9,
"detection_rate": 0.5714,
"trust_calibration": 0.723,
"adversarial_detections": 4,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.3919,
0.3997,
0.3644,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.9095,
0.8833,
0.2811,
0.6916
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 73,
"steps": 26,
"score": 0.6967,
"total_reward": 16.7204,
"completion_rate": 0.85,
"detection_rate": 0.3333,
"trust_calibration": 0.697,
"adversarial_detections": 3,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.614
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 74,
"steps": 28,
"score": 0.675,
"total_reward": 18.8991,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.806,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.3414,
0.9122,
0.91,
0.9078,
0.3256,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.31,
0.8878,
0.3056,
0.8833,
0.8811,
0.2989,
0.2967,
0.8744,
0.2922,
0.8193
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 75,
"steps": 40,
"score": 0.7977,
"total_reward": 19.1445,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.837,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8529
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 76,
"steps": 42,
"score": 0.8335,
"total_reward": 19.1712,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.842,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.8456,
0.8411,
0.8671
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 77,
"steps": 29,
"score": 0.6992,
"total_reward": 18.1794,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.645,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.3259,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.6451
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 78,
"steps": 28,
"score": 0.7161,
"total_reward": 17.9032,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.709,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3503,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6633
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 79,
"steps": 44,
"score": 0.7563,
"total_reward": 19.6626,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.829,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.2844,
0.85,
0.8456,
0.8411,
0.8367,
0.8421
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 80,
"steps": 46,
"score": 0.7065,
"total_reward": 16.9564,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.803,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.3111,
0.3067,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8367,
0.8322,
0.7808
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 81,
"steps": 44,
"score": 0.8031,
"total_reward": 18.4703,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.2711,
0.8367,
0.8447
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 82,
"steps": 44,
"score": 0.8286,
"total_reward": 19.0588,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.8367,
0.8632
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 83,
"steps": 33,
"score": 0.7082,
"total_reward": 18.4133,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.826,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3456,
0.9483,
0.9461,
0.8989,
0.3667,
0.3644,
0.9372,
0.935,
0.8878,
0.9306,
0.3533,
0.3511,
0.3489,
0.3467,
0.9194,
0.3422,
0.34,
0.9033,
0.8789,
0.8744,
0.87,
0.8656,
0.8611,
0.8133
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 84,
"steps": 29,
"score": 0.71,
"total_reward": 18.4609,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.716,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.9233,
0.9211,
0.3459,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.3214,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.6795
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 85,
"steps": 29,
"score": 0.6923,
"total_reward": 17.999,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.718,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.6627
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 86,
"steps": 33,
"score": 0.6856,
"total_reward": 18.5122,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.605,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.3663,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.9095,
0.3333,
0.6263
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 87,
"steps": 27,
"score": 0.6991,
"total_reward": 16.7786,
"completion_rate": 0.85,
"detection_rate": 0.4,
"trust_calibration": 0.727,
"adversarial_detections": 4,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.6393
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 88,
"steps": 42,
"score": 0.7762,
"total_reward": 19.4062,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.28,
0.8456,
0.8411,
0.8479
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 89,
"steps": 29,
"score": 0.7259,
"total_reward": 18.1473,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.696,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3503,
0.9211,
0.9189,
0.9167,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6745
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 90,
"steps": 28,
"score": 0.7161,
"total_reward": 17.9035,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.71,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6635
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 91,
"steps": 29,
"score": 0.7262,
"total_reward": 18.1541,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.723,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3459,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.3259,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6813
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 92,
"steps": 38,
"score": 0.7692,
"total_reward": 18.4611,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.842,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3389,
0.3367,
0.3344,
0.3322,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8411
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 93,
"steps": 45,
"score": 0.6795,
"total_reward": 19.7049,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.749,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.9211,
0.9189,
0.3897,
0.9144,
0.3972,
0.395,
0.9078,
0.3906,
0.3883,
0.3861,
0.8967,
0.3222,
0.8878,
0.8833,
0.8789,
0.8744,
0.3,
0.2956,
0.8789,
0.8722,
0.8478,
0.8433,
0.8389,
0.2644,
0.7847
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 94,
"steps": 32,
"score": 0.6725,
"total_reward": 18.1568,
"completion_rate": 0.8,
"detection_rate": 0.3333,
"trust_calibration": 0.85,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.8989,
0.3667,
0.3644,
0.9372,
0.935,
0.3578,
0.8856,
0.9283,
0.3511,
0.3489,
0.3467,
0.3444,
0.8922,
0.89,
0.8878,
0.0886,
0.0863,
0.9206,
0.8922,
0.8678,
0.8633,
0.6213
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 95,
"steps": 40,
"score": 0.7977,
"total_reward": 19.1447,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.838,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.3733,
0.9461,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.853
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 96,
"steps": 31,
"score": 0.6978,
"total_reward": 17.4444,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.726,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9189,
0.9167,
0.3344,
0.3322,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.9073,
0.66
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 97,
"steps": 38,
"score": 0.8207,
"total_reward": 18.8758,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.829,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8552
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 98,
"steps": 29,
"score": 0.7165,
"total_reward": 17.9132,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.722,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6644
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 99,
"steps": 38,
"score": 0.7037,
"total_reward": 21.1121,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.723,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.3389,
0.9167,
0.9144,
0.3322,
0.91,
0.3278,
0.9056,
0.9033,
0.3211,
0.8989,
0.3167,
0.8944,
0.8922,
0.31,
0.8878,
0.3056,
0.3211,
0.8967,
0.3122,
0.8878,
0.3033,
0.8789,
0.8744,
0.87,
0.8287
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 0,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 1,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 2,
"steps": 42,
"score": 0.8422,
"total_reward": 18.5276,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8724
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 3,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 4,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1154,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 5,
"steps": 46,
"score": 0.7916,
"total_reward": 18.9976,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.917,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.32,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8618
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 6,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8523,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 7,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 8,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3315,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8857
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 9,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 10,
"steps": 42,
"score": 0.8421,
"total_reward": 18.5263,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.928,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.3022,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.871
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 11,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 12,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 13,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 14,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3311,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8853
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 15,
"steps": 46,
"score": 0.8162,
"total_reward": 19.5883,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.93,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8825
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 16,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8523,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 17,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8903
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 18,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 19,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 20,
"steps": 46,
"score": 0.7653,
"total_reward": 18.3663,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.909,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8423
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 21,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 22,
"steps": 46,
"score": 0.7652,
"total_reward": 18.3659,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.908,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.2978,
0.8633,
0.2889,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8419
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 23,
"steps": 42,
"score": 0.8403,
"total_reward": 18.4862,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8727
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 24,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3311,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8854
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 25,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3314,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8857
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 26,
"steps": 42,
"score": 0.8403,
"total_reward": 18.4855,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.3022,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.872
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 27,
"steps": 46,
"score": 0.8179,
"total_reward": 19.6285,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.924,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.2978,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.881
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 28,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 29,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3311,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8854
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 30,
"steps": 44,
"score": 0.8148,
"total_reward": 18.741,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.922,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.3111,
0.3067,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8653
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 31,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 32,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8851
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 33,
"steps": 46,
"score": 0.8178,
"total_reward": 19.6279,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.922,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8804
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 34,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 35,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3313,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8855
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 36,
"steps": 44,
"score": 0.8167,
"total_reward": 18.7838,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.927,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.3467,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8664
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 37,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 38,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 39,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 40,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 41,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 42,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 43,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 44,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 45,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 46,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1154,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 47,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 48,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3312,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8855
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 49,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 50,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.885
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 51,
"steps": 46,
"score": 0.8178,
"total_reward": 19.6279,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.922,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8804
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 52,
"steps": 46,
"score": 0.7899,
"total_reward": 18.9582,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.926,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.3289,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8641
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 53,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3723,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.93,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8848
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 54,
"steps": 46,
"score": 0.7916,
"total_reward": 18.9976,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.918,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.2978,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8619
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 55,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8851
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 56,
"steps": 46,
"score": 0.7899,
"total_reward": 18.957,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.922,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8629
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 57,
"steps": 46,
"score": 0.8179,
"total_reward": 19.6284,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.924,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.881
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 58,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.885
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 59,
"steps": 42,
"score": 0.8403,
"total_reward": 18.4861,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8726
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 60,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 61,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 62,
"steps": 46,
"score": 0.7933,
"total_reward": 19.0389,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.916,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8615
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 63,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 64,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8851
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 65,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 66,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 67,
"steps": 46,
"score": 0.8179,
"total_reward": 19.6293,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.927,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.3378,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8818
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 68,
"steps": 46,
"score": 0.8162,
"total_reward": 19.5881,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.929,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8823
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 69,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 70,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3728,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8854
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 71,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3313,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8856
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 72,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3724,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8849
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 73,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 74,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3724,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8849
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 75,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 76,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 77,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.2978,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 78,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8903
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 79,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 80,
"steps": 46,
"score": 0.7652,
"total_reward": 18.3653,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.905,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.3111,
0.3067,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8412
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 81,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 82,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.32,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.885
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 83,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 84,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3315,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8857
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 85,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3313,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8856
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 86,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8523,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 87,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8523,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 88,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.885
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 89,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 90,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 91,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3721,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.93,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.2978,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8846
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 92,
"steps": 44,
"score": 0.8423,
"total_reward": 19.3725,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.885
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 93,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 94,
"steps": 46,
"score": 0.8161,
"total_reward": 19.5872,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.926,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.2978,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8814
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 95,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3315,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8857
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 96,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 97,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 98,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 99,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1154,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
}
],
"chart": "outputs/baseline_comparison.png"
}