| { |
| "phase_results": [ |
| { |
| "phase": "simple_multiturn_transcript", |
| "best_selection_metric_value": 6.380033503778396, |
| "best_metrics": { |
| "validation/loss_total": 1.8254770046855688, |
| "validation/loss_response": 2.2467325894481847, |
| "validation/loss_current_user_reconstruction": 2.3184316457444836, |
| "validation/loss_probe": 1.5228226356553571, |
| "validation/loss_past_response_contrastive": 0.7926655631316336, |
| "validation/response_first_token_exact_match": 0.1611842105263158, |
| "validation/current-user_reconstruction_first_token_exact_match": 0.046052631578947366, |
| "validation/probe_first_token_exact_match": 0.7236842105263158, |
| "validation/supervised_turn_count": 304.0, |
| "validation/response_exact_match": 0.0, |
| "validation/response_similarity": 0.09134445212891075, |
| "validation/response_token_f1": 0.1826889042578215, |
| "validation/response_line_recall": 0.0, |
| "validation/response_reconstruction_similarity": 0.16573968437282133, |
| "validation/response_reconstruction_exact_match": 0.0, |
| "validation/response_reconstruction_token_f1": 0.16573968437282133, |
| "validation/probe_exact_match": 0.0, |
| "validation/probe_transcript_similarity": 0.1398519082671757, |
| "validation/probe_token_f1": 0.2797038165343514, |
| "validation/probe_line_recall": 0.0, |
| "validation/response_similarity_by_turn/turn_2": 0.08720086208025773, |
| "validation/response_reconstruction_similarity_by_turn/turn_2": 0.18090273127555526, |
| "validation/probe_transcript_similarity_by_turn/turn_2": 0.17897454452968672, |
| "validation/response_similarity_by_turn/turn_3": 0.08307274176570874, |
| "validation/response_reconstruction_similarity_by_turn/turn_3": 0.16802842587815378, |
| "validation/probe_transcript_similarity_by_turn/turn_3": 0.15218415107405023, |
| "validation/response_similarity_by_turn/turn_4": 0.10114818520261475, |
| "validation/response_reconstruction_similarity_by_turn/turn_4": 0.13753153391386908, |
| "validation/probe_transcript_similarity_by_turn/turn_4": 0.10929045520290324, |
| "validation/response_similarity_by_turn/turn_5": 0.11811670041483704, |
| "validation/response_reconstruction_similarity_by_turn/turn_5": 0.1936274362382737, |
| "validation/probe_transcript_similarity_by_turn/turn_5": 0.0832095478213504, |
| "validation/response_similarity_by_turn/turn_6": 0.08222082107328009, |
| "validation/response_reconstruction_similarity_by_turn/turn_6": 0.11800595238095238, |
| "validation/probe_transcript_similarity_by_turn/turn_6": 0.04485221362207858, |
| "validation/goal_loss": 6.380033503778396 |
| }, |
| "global_step": 321, |
| "train_dataset": { |
| "example_count": 7696, |
| "pair_count_mean": 4.2548076923076925, |
| "pair_count_max": 6, |
| "response_target_tokens_mean_turn2_plus": 126.23921517671518, |
| "response_target_tokens_max_turn2_plus": 1643, |
| "probe_target_tokens_mean_turn2_plus": 471.31613825363826, |
| "probe_target_tokens_max_turn2_plus": 3285, |
| "dataset_counts": { |
| "chatalpaca_multiturn_enriched": 7696 |
| }, |
| "probe_question_text": "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc", |
| "current_user_reconstruction_question_text": "What did me (the user) just ask you? give verbatim message I just previously sent" |
| }, |
| "validation_dataset": { |
| "example_count": 151, |
| "pair_count_mean": 4.139072847682119, |
| "pair_count_max": 6, |
| "response_target_tokens_mean_turn2_plus": 123.21854304635761, |
| "response_target_tokens_max_turn2_plus": 889, |
| "probe_target_tokens_mean_turn2_plus": 448.5298013245033, |
| "probe_target_tokens_max_turn2_plus": 1545, |
| "dataset_counts": { |
| "chatalpaca_multiturn_enriched": 151 |
| }, |
| "probe_question_text": "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc", |
| "current_user_reconstruction_question_text": "What did me (the user) just ask you? give verbatim message I just previously sent" |
| }, |
| "wandb_enabled": true |
| } |
| ] |
| } |