Task_0013_decision_planner / decision_threshold_sweep.json
Dongkkka's picture
Upload folder using huggingface_hub
71d153a verified
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"require_next_class": true,
"thresholds": [
0.2,
0.25,
0.3,
0.35,
0.4,
0.45,
0.5,
0.55,
0.6,
0.65,
0.7
],
"summaries": [
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.2,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8983067854007274,
"per_switch_f1": [
0.8403361344537814,
0.9142857142857143,
0.9402985074626866
],
"per_switch_recall": [
0.78125,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.25,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8983067854007274,
"per_switch_f1": [
0.8403361344537814,
0.9142857142857143,
0.9402985074626866
],
"per_switch_recall": [
0.78125,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.3,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.9006806162890149,
"per_switch_f1": [
0.847457627118644,
0.9142857142857143,
0.9402985074626866
],
"per_switch_recall": [
0.78125,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.35,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8973970197850795,
"per_switch_f1": [
0.8376068376068376,
0.9142857142857143,
0.9402985074626866
],
"per_switch_recall": [
0.765625,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.4,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8940568095483176,
"per_switch_f1": [
0.8275862068965517,
0.9142857142857143,
0.9402985074626866
],
"per_switch_recall": [
0.75,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.45,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8940568095483176,
"per_switch_f1": [
0.8275862068965517,
0.9142857142857143,
0.9402985074626866
],
"per_switch_recall": [
0.75,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.5,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.9010890266576999,
"per_switch_f1": [
0.8421052631578947,
0.9208633093525179,
0.9402985074626866
],
"per_switch_recall": [
0.75,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.55,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8941967960812587,
"per_switch_f1": [
0.8214285714285714,
0.9208633093525179,
0.9402985074626866
],
"per_switch_recall": [
0.71875,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.6,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8990201864113261,
"per_switch_f1": [
0.8288288288288289,
0.9208633093525179,
0.9473684210526315
],
"per_switch_recall": [
0.71875,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.65,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.9014125309089338,
"per_switch_f1": [
0.8288288288288289,
0.9208633093525179,
0.9545454545454545
],
"per_switch_recall": [
0.71875,
1.0,
0.984375
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
},
{
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118,
"threshold": 0.7,
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8987523412258435,
"per_switch_f1": [
0.8288288288288289,
0.9208633093525179,
0.9465648854961832
],
"per_switch_recall": [
0.71875,
1.0,
0.96875
],
"episode_switch_delays": {
"4": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"5": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"14": {
"0->1": null,
"1->2": null,
"2->3": null
},
"19": {
"0->1": 8,
"1->2": 10,
"2->3": 5
}
}
}
],
"reports": [
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.2,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8983067854007274,
"per_switch": [
{
"transition": "0->1",
"tp": 50,
"fp": 5,
"fn": 14,
"tn": 4201,
"precision": 0.9090909090909091,
"recall": 0.78125,
"f1": 0.8403361344537814
},
{
"transition": "1->2",
"tp": 64,
"fp": 12,
"fn": 0,
"tn": 4194,
"precision": 0.8421052631578947,
"recall": 1.0,
"f1": 0.9142857142857143
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.25,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8983067854007274,
"per_switch": [
{
"transition": "0->1",
"tp": 50,
"fp": 5,
"fn": 14,
"tn": 4201,
"precision": 0.9090909090909091,
"recall": 0.78125,
"f1": 0.8403361344537814
},
{
"transition": "1->2",
"tp": 64,
"fp": 12,
"fn": 0,
"tn": 4194,
"precision": 0.8421052631578947,
"recall": 1.0,
"f1": 0.9142857142857143
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.3,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.9006806162890149,
"per_switch": [
{
"transition": "0->1",
"tp": 50,
"fp": 4,
"fn": 14,
"tn": 4202,
"precision": 0.9259259259259259,
"recall": 0.78125,
"f1": 0.847457627118644
},
{
"transition": "1->2",
"tp": 64,
"fp": 12,
"fn": 0,
"tn": 4194,
"precision": 0.8421052631578947,
"recall": 1.0,
"f1": 0.9142857142857143
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.35,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8973970197850795,
"per_switch": [
{
"transition": "0->1",
"tp": 49,
"fp": 4,
"fn": 15,
"tn": 4202,
"precision": 0.9245283018867925,
"recall": 0.765625,
"f1": 0.8376068376068376
},
{
"transition": "1->2",
"tp": 64,
"fp": 12,
"fn": 0,
"tn": 4194,
"precision": 0.8421052631578947,
"recall": 1.0,
"f1": 0.9142857142857143
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.4,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8940568095483176,
"per_switch": [
{
"transition": "0->1",
"tp": 48,
"fp": 4,
"fn": 16,
"tn": 4202,
"precision": 0.9230769230769231,
"recall": 0.75,
"f1": 0.8275862068965517
},
{
"transition": "1->2",
"tp": 64,
"fp": 12,
"fn": 0,
"tn": 4194,
"precision": 0.8421052631578947,
"recall": 1.0,
"f1": 0.9142857142857143
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.45,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8940568095483176,
"per_switch": [
{
"transition": "0->1",
"tp": 48,
"fp": 4,
"fn": 16,
"tn": 4202,
"precision": 0.9230769230769231,
"recall": 0.75,
"f1": 0.8275862068965517
},
{
"transition": "1->2",
"tp": 64,
"fp": 12,
"fn": 0,
"tn": 4194,
"precision": 0.8421052631578947,
"recall": 1.0,
"f1": 0.9142857142857143
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.5,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.9010890266576999,
"per_switch": [
{
"transition": "0->1",
"tp": 48,
"fp": 2,
"fn": 16,
"tn": 4204,
"precision": 0.96,
"recall": 0.75,
"f1": 0.8421052631578947
},
{
"transition": "1->2",
"tp": 64,
"fp": 11,
"fn": 0,
"tn": 4195,
"precision": 0.8533333333333334,
"recall": 1.0,
"f1": 0.9208633093525179
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.55,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8941967960812587,
"per_switch": [
{
"transition": "0->1",
"tp": 46,
"fp": 2,
"fn": 18,
"tn": 4204,
"precision": 0.9583333333333334,
"recall": 0.71875,
"f1": 0.8214285714285714
},
{
"transition": "1->2",
"tp": 64,
"fp": 11,
"fn": 0,
"tn": 4195,
"precision": 0.8533333333333334,
"recall": 1.0,
"f1": 0.9208633093525179
},
{
"transition": "2->3",
"tp": 63,
"fp": 7,
"fn": 1,
"tn": 4199,
"precision": 0.9,
"recall": 0.984375,
"f1": 0.9402985074626866
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.6,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8990201864113261,
"per_switch": [
{
"transition": "0->1",
"tp": 46,
"fp": 1,
"fn": 18,
"tn": 4205,
"precision": 0.9787234042553191,
"recall": 0.71875,
"f1": 0.8288288288288289
},
{
"transition": "1->2",
"tp": 64,
"fp": 11,
"fn": 0,
"tn": 4195,
"precision": 0.8533333333333334,
"recall": 1.0,
"f1": 0.9208633093525179
},
{
"transition": "2->3",
"tp": 63,
"fp": 6,
"fn": 1,
"tn": 4200,
"precision": 0.9130434782608695,
"recall": 0.984375,
"f1": 0.9473684210526315
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.65,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.9014125309089338,
"per_switch": [
{
"transition": "0->1",
"tp": 46,
"fp": 1,
"fn": 18,
"tn": 4205,
"precision": 0.9787234042553191,
"recall": 0.71875,
"f1": 0.8288288288288289
},
{
"transition": "1->2",
"tp": 64,
"fp": 11,
"fn": 0,
"tn": 4195,
"precision": 0.8533333333333334,
"recall": 1.0,
"f1": 0.9208633093525179
},
{
"transition": "2->3",
"tp": 63,
"fp": 5,
"fn": 1,
"tn": 4201,
"precision": 0.9264705882352942,
"recall": 0.984375,
"f1": 0.9545454545454545
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
},
{
"model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt",
"test_episodes": [
4,
5,
14,
19
],
"threshold": 0.7,
"require_next_class": true,
"frame_metrics": {
"class_accuracy": 0.9955503512880562,
"switch_macro_f1": 0.8987523412258435,
"per_switch": [
{
"transition": "0->1",
"tp": 46,
"fp": 1,
"fn": 18,
"tn": 4205,
"precision": 0.9787234042553191,
"recall": 0.71875,
"f1": 0.8288288288288289
},
{
"transition": "1->2",
"tp": 64,
"fp": 11,
"fn": 0,
"tn": 4195,
"precision": 0.8533333333333334,
"recall": 1.0,
"f1": 0.9208633093525179
},
{
"transition": "2->3",
"tp": 62,
"fp": 5,
"fn": 2,
"tn": 4201,
"precision": 0.9253731343283582,
"recall": 0.96875,
"f1": 0.9465648854961832
}
]
},
"episode_metrics": [
{
"episode_index": 4,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9742647058823529,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 10
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 512,
"2->3": 688
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 502,
"from": 1,
"to": 2
},
{
"frame": 678,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 5,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9937722419928826,
"early_switch_count": 0,
"switch_delays": {
"0->1": 5,
"1->2": 1,
"2->3": 1
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 544,
"2->3": 720
},
"boundaries": [
{
"frame": 43,
"from": 0,
"to": 1
},
{
"frame": 543,
"from": 1,
"to": 2
},
{
"frame": 719,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 14,
"chunk_language_accuracy": 0.046875,
"frame_language_accuracy": 0.03823529411764706,
"early_switch_count": 0,
"switch_delays": {
"0->1": null,
"1->2": null,
"2->3": null
},
"sequence_completed": false,
"switch_frames": {},
"boundaries": [
{
"frame": 39,
"from": 0,
"to": 1
},
{
"frame": 484,
"from": 1,
"to": 2
},
{
"frame": 663,
"from": 2,
"to": 3
}
]
},
{
"episode_index": 19,
"chunk_language_accuracy": 1.0,
"frame_language_accuracy": 0.9778420038535646,
"early_switch_count": 0,
"switch_delays": {
"0->1": 8,
"1->2": 10,
"2->3": 5
},
"sequence_completed": true,
"switch_frames": {
"0->1": 48,
"1->2": 496,
"2->3": 672
},
"boundaries": [
{
"frame": 40,
"from": 0,
"to": 1
},
{
"frame": 486,
"from": 1,
"to": 2
},
{
"frame": 667,
"from": 2,
"to": 3
}
]
}
],
"summary": {
"early_switch_count": 0,
"sequence_completion_rate": 0.75,
"chunk_language_accuracy": 0.76171875,
"frame_language_accuracy": 0.7460285614616118
}
}
]
}