BRlkl's picture
Upload folder using huggingface_hub
48b5767 verified
Raw
History Blame Contribute Delete
4.06 kB
{
"phase_results": [
{
"phase": "simple_multiturn_transcript",
"best_selection_metric_value": 6.619700041489555,
"best_metrics": {
"validation/loss_total": 2.042751923562315,
"validation/loss_response": 2.2427044281789876,
"validation/loss_current_user_reconstruction": 2.211277997021919,
"validation/loss_probe": 1.4743208408693655,
"validation/response_first_token_exact_match": 0.13291139240506328,
"validation/current-user_reconstruction_first_token_exact_match": 0.04008438818565401,
"validation/probe_first_token_exact_match": 0.6455696202531646,
"validation/supervised_turn_count": 474.0,
"validation/response_exact_match": 0.0,
"validation/response_similarity": 0.085281379048045,
"validation/response_token_f1": 0.17056275809609,
"validation/response_line_recall": 0.0,
"validation/response_reconstruction_similarity": 0.1664633586109287,
"validation/response_reconstruction_exact_match": 0.0,
"validation/response_reconstruction_token_f1": 0.1664633586109287,
"validation/probe_exact_match": 0.0,
"validation/probe_transcript_similarity": 0.1282942617309157,
"validation/probe_token_f1": 0.2565885234618314,
"validation/probe_line_recall": 0.0,
"validation/response_similarity_by_turn/turn_2": 0.08411492269405914,
"validation/response_similarity_by_turn/turn_3": 0.09334476082649767,
"validation/response_similarity_by_turn/turn_4": 0.073076306800475,
"validation/response_similarity_by_turn/turn_5": 0.08426057530291096,
"validation/response_similarity_by_turn/turn_6": 0.09150152785411145,
"validation/response_reconstruction_similarity_by_turn/turn_2": 0.21040282742381106,
"validation/response_reconstruction_similarity_by_turn/turn_3": 0.14500742221360224,
"validation/response_reconstruction_similarity_by_turn/turn_4": 0.1341406656584131,
"validation/response_reconstruction_similarity_by_turn/turn_5": 0.1833900922912523,
"validation/response_reconstruction_similarity_by_turn/turn_6": 0.1195707522775192,
"validation/probe_transcript_similarity_by_turn/turn_2": 0.1699551585606979,
"validation/probe_transcript_similarity_by_turn/turn_3": 0.14287379041502474,
"validation/probe_transcript_similarity_by_turn/turn_4": 0.09564061108210939,
"validation/probe_transcript_similarity_by_turn/turn_5": 0.06612852121879803,
"validation/probe_transcript_similarity_by_turn/turn_6": 0.0478720635643674,
"validation/goal_loss": 6.619700041489555
},
"global_step": 386,
"train_dataset": {
"example_count": 7696,
"pair_count_mean": 4.2548076923076925,
"pair_count_max": 6,
"response_target_tokens_mean_turn2_plus": 126.23921517671518,
"response_target_tokens_max_turn2_plus": 1643,
"probe_target_tokens_mean_turn2_plus": 471.31613825363826,
"probe_target_tokens_max_turn2_plus": 3285,
"dataset_counts": {
"chatalpaca_multiturn_enriched": 7696
},
"probe_question_text": "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc"
},
"validation_dataset": {
"example_count": 151,
"pair_count_mean": 4.139072847682119,
"pair_count_max": 6,
"response_target_tokens_mean_turn2_plus": 123.21854304635761,
"response_target_tokens_max_turn2_plus": 889,
"probe_target_tokens_mean_turn2_plus": 448.5298013245033,
"probe_target_tokens_max_turn2_plus": 1545,
"dataset_counts": {
"chatalpaca_multiturn_enriched": 151
},
"probe_question_text": "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc"
},
"wandb_enabled": true
}
]
}