{ "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "require_next_class": true, "thresholds": [ 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7 ], "summaries": [ { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.2, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8983067854007274, "per_switch_f1": [ 0.8403361344537814, 0.9142857142857143, 0.9402985074626866 ], "per_switch_recall": [ 0.78125, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.25, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8983067854007274, "per_switch_f1": [ 0.8403361344537814, 0.9142857142857143, 0.9402985074626866 ], "per_switch_recall": [ 0.78125, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.3, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.9006806162890149, "per_switch_f1": [ 0.847457627118644, 0.9142857142857143, 0.9402985074626866 ], "per_switch_recall": [ 0.78125, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.35, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8973970197850795, "per_switch_f1": [ 0.8376068376068376, 0.9142857142857143, 0.9402985074626866 ], "per_switch_recall": [ 0.765625, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.4, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8940568095483176, "per_switch_f1": [ 0.8275862068965517, 0.9142857142857143, 0.9402985074626866 ], "per_switch_recall": [ 0.75, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.45, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8940568095483176, "per_switch_f1": [ 0.8275862068965517, 0.9142857142857143, 0.9402985074626866 ], "per_switch_recall": [ 0.75, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.5, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.9010890266576999, "per_switch_f1": [ 0.8421052631578947, 0.9208633093525179, 0.9402985074626866 ], "per_switch_recall": [ 0.75, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.55, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8941967960812587, "per_switch_f1": [ 0.8214285714285714, 0.9208633093525179, 0.9402985074626866 ], "per_switch_recall": [ 0.71875, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.6, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8990201864113261, "per_switch_f1": [ 0.8288288288288289, 0.9208633093525179, 0.9473684210526315 ], "per_switch_recall": [ 0.71875, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.65, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.9014125309089338, "per_switch_f1": [ 0.8288288288288289, 0.9208633093525179, 0.9545454545454545 ], "per_switch_recall": [ 0.71875, 1.0, 0.984375 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } }, { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118, "threshold": 0.7, "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8987523412258435, "per_switch_f1": [ 0.8288288288288289, 0.9208633093525179, 0.9465648854961832 ], "per_switch_recall": [ 0.71875, 1.0, 0.96875 ], "episode_switch_delays": { "4": { "0->1": 8, "1->2": 10, "2->3": 10 }, "5": { "0->1": 5, "1->2": 1, "2->3": 1 }, "14": { "0->1": null, "1->2": null, "2->3": null }, "19": { "0->1": 8, "1->2": 10, "2->3": 5 } } } ], "reports": [ { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.2, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8983067854007274, "per_switch": [ { "transition": "0->1", "tp": 50, "fp": 5, "fn": 14, "tn": 4201, "precision": 0.9090909090909091, "recall": 0.78125, "f1": 0.8403361344537814 }, { "transition": "1->2", "tp": 64, "fp": 12, "fn": 0, "tn": 4194, "precision": 0.8421052631578947, "recall": 1.0, "f1": 0.9142857142857143 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.25, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8983067854007274, "per_switch": [ { "transition": "0->1", "tp": 50, "fp": 5, "fn": 14, "tn": 4201, "precision": 0.9090909090909091, "recall": 0.78125, "f1": 0.8403361344537814 }, { "transition": "1->2", "tp": 64, "fp": 12, "fn": 0, "tn": 4194, "precision": 0.8421052631578947, "recall": 1.0, "f1": 0.9142857142857143 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.3, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.9006806162890149, "per_switch": [ { "transition": "0->1", "tp": 50, "fp": 4, "fn": 14, "tn": 4202, "precision": 0.9259259259259259, "recall": 0.78125, "f1": 0.847457627118644 }, { "transition": "1->2", "tp": 64, "fp": 12, "fn": 0, "tn": 4194, "precision": 0.8421052631578947, "recall": 1.0, "f1": 0.9142857142857143 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.35, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8973970197850795, "per_switch": [ { "transition": "0->1", "tp": 49, "fp": 4, "fn": 15, "tn": 4202, "precision": 0.9245283018867925, "recall": 0.765625, "f1": 0.8376068376068376 }, { "transition": "1->2", "tp": 64, "fp": 12, "fn": 0, "tn": 4194, "precision": 0.8421052631578947, "recall": 1.0, "f1": 0.9142857142857143 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.4, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8940568095483176, "per_switch": [ { "transition": "0->1", "tp": 48, "fp": 4, "fn": 16, "tn": 4202, "precision": 0.9230769230769231, "recall": 0.75, "f1": 0.8275862068965517 }, { "transition": "1->2", "tp": 64, "fp": 12, "fn": 0, "tn": 4194, "precision": 0.8421052631578947, "recall": 1.0, "f1": 0.9142857142857143 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.45, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8940568095483176, "per_switch": [ { "transition": "0->1", "tp": 48, "fp": 4, "fn": 16, "tn": 4202, "precision": 0.9230769230769231, "recall": 0.75, "f1": 0.8275862068965517 }, { "transition": "1->2", "tp": 64, "fp": 12, "fn": 0, "tn": 4194, "precision": 0.8421052631578947, "recall": 1.0, "f1": 0.9142857142857143 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.5, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.9010890266576999, "per_switch": [ { "transition": "0->1", "tp": 48, "fp": 2, "fn": 16, "tn": 4204, "precision": 0.96, "recall": 0.75, "f1": 0.8421052631578947 }, { "transition": "1->2", "tp": 64, "fp": 11, "fn": 0, "tn": 4195, "precision": 0.8533333333333334, "recall": 1.0, "f1": 0.9208633093525179 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.55, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8941967960812587, "per_switch": [ { "transition": "0->1", "tp": 46, "fp": 2, "fn": 18, "tn": 4204, "precision": 0.9583333333333334, "recall": 0.71875, "f1": 0.8214285714285714 }, { "transition": "1->2", "tp": 64, "fp": 11, "fn": 0, "tn": 4195, "precision": 0.8533333333333334, "recall": 1.0, "f1": 0.9208633093525179 }, { "transition": "2->3", "tp": 63, "fp": 7, "fn": 1, "tn": 4199, "precision": 0.9, "recall": 0.984375, "f1": 0.9402985074626866 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.6, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8990201864113261, "per_switch": [ { "transition": "0->1", "tp": 46, "fp": 1, "fn": 18, "tn": 4205, "precision": 0.9787234042553191, "recall": 0.71875, "f1": 0.8288288288288289 }, { "transition": "1->2", "tp": 64, "fp": 11, "fn": 0, "tn": 4195, "precision": 0.8533333333333334, "recall": 1.0, "f1": 0.9208633093525179 }, { "transition": "2->3", "tp": 63, "fp": 6, "fn": 1, "tn": 4200, "precision": 0.9130434782608695, "recall": 0.984375, "f1": 0.9473684210526315 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.65, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.9014125309089338, "per_switch": [ { "transition": "0->1", "tp": 46, "fp": 1, "fn": 18, "tn": 4205, "precision": 0.9787234042553191, "recall": 0.71875, "f1": 0.8288288288288289 }, { "transition": "1->2", "tp": 64, "fp": 11, "fn": 0, "tn": 4195, "precision": 0.8533333333333334, "recall": 1.0, "f1": 0.9208633093525179 }, { "transition": "2->3", "tp": 63, "fp": 5, "fn": 1, "tn": 4201, "precision": 0.9264705882352942, "recall": 0.984375, "f1": 0.9545454545454545 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } }, { "model_path": "/home/ubuntu/groot_workspace/checkpoints/siglip2_decision_4class_state22_woidle_post1chunk/best_model.pt", "test_episodes": [ 4, 5, 14, 19 ], "threshold": 0.7, "require_next_class": true, "frame_metrics": { "class_accuracy": 0.9955503512880562, "switch_macro_f1": 0.8987523412258435, "per_switch": [ { "transition": "0->1", "tp": 46, "fp": 1, "fn": 18, "tn": 4205, "precision": 0.9787234042553191, "recall": 0.71875, "f1": 0.8288288288288289 }, { "transition": "1->2", "tp": 64, "fp": 11, "fn": 0, "tn": 4195, "precision": 0.8533333333333334, "recall": 1.0, "f1": 0.9208633093525179 }, { "transition": "2->3", "tp": 62, "fp": 5, "fn": 2, "tn": 4201, "precision": 0.9253731343283582, "recall": 0.96875, "f1": 0.9465648854961832 } ] }, "episode_metrics": [ { "episode_index": 4, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9742647058823529, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 10 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 512, "2->3": 688 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 502, "from": 1, "to": 2 }, { "frame": 678, "from": 2, "to": 3 } ] }, { "episode_index": 5, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9937722419928826, "early_switch_count": 0, "switch_delays": { "0->1": 5, "1->2": 1, "2->3": 1 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 544, "2->3": 720 }, "boundaries": [ { "frame": 43, "from": 0, "to": 1 }, { "frame": 543, "from": 1, "to": 2 }, { "frame": 719, "from": 2, "to": 3 } ] }, { "episode_index": 14, "chunk_language_accuracy": 0.046875, "frame_language_accuracy": 0.03823529411764706, "early_switch_count": 0, "switch_delays": { "0->1": null, "1->2": null, "2->3": null }, "sequence_completed": false, "switch_frames": {}, "boundaries": [ { "frame": 39, "from": 0, "to": 1 }, { "frame": 484, "from": 1, "to": 2 }, { "frame": 663, "from": 2, "to": 3 } ] }, { "episode_index": 19, "chunk_language_accuracy": 1.0, "frame_language_accuracy": 0.9778420038535646, "early_switch_count": 0, "switch_delays": { "0->1": 8, "1->2": 10, "2->3": 5 }, "sequence_completed": true, "switch_frames": { "0->1": 48, "1->2": 496, "2->3": 672 }, "boundaries": [ { "frame": 40, "from": 0, "to": 1 }, { "frame": 486, "from": 1, "to": 2 }, { "frame": 667, "from": 2, "to": 3 } ] } ], "summary": { "early_switch_count": 0, "sequence_completion_rate": 0.75, "chunk_language_accuracy": 0.76171875, "frame_language_accuracy": 0.7460285614616118 } } ] }