| { |
| "phase_results": [ |
| { |
| "phase": "simple_multiturn_transcript", |
| "best_selection_metric_value": 5.121905698811801, |
| "best_metrics": { |
| "validation/loss_total": 1.6588564595009827, |
| "validation/loss_response": 2.0905123861018247, |
| "validation/loss_probe": 1.227200532900141, |
| "validation/response_first_token_exact_match": 0.21940928270042195, |
| "validation/probe_first_token_exact_match": 1.0, |
| "validation/supervised_turn_count": 474.0, |
| "validation/response_exact_match": 0.007936507936507936, |
| "validation/probe_exact_match": 0.0, |
| "validation/probe_transcript_similarity": 0.26450712637633667, |
| "validation/probe_token_f1": 0.5290142527526733, |
| "validation/probe_line_recall": 0.0, |
| "validation/probe_transcript_similarity_by_turn/turn_2": 0.2855606331489746, |
| "validation/probe_transcript_similarity_by_turn/turn_3": 0.2640905426798366, |
| "validation/probe_transcript_similarity_by_turn/turn_4": 0.24927199699265568, |
| "validation/probe_transcript_similarity_by_turn/turn_5": 0.24620106445427534, |
| "validation/probe_transcript_similarity_by_turn/turn_6": 0.23760515186996223, |
| "validation/goal_loss": 5.121905698811801 |
| }, |
| "global_step": 579, |
| "train_dataset": { |
| "example_count": 7696, |
| "pair_count_mean": 4.2548076923076925, |
| "pair_count_max": 6, |
| "response_target_tokens_mean_turn2_plus": 126.23921517671518, |
| "response_target_tokens_max_turn2_plus": 1643, |
| "probe_target_tokens_mean_turn2_plus": 471.31613825363826, |
| "probe_target_tokens_max_turn2_plus": 3285, |
| "dataset_counts": { |
| "chatalpaca_multiturn_enriched": 7696 |
| }, |
| "probe_question_text": "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc" |
| }, |
| "validation_dataset": { |
| "example_count": 151, |
| "pair_count_mean": 4.139072847682119, |
| "pair_count_max": 6, |
| "response_target_tokens_mean_turn2_plus": 123.21854304635761, |
| "response_target_tokens_max_turn2_plus": 889, |
| "probe_target_tokens_mean_turn2_plus": 448.5298013245033, |
| "probe_target_tokens_max_turn2_plus": 1545, |
| "dataset_counts": { |
| "chatalpaca_multiturn_enriched": 151 |
| }, |
| "probe_question_text": "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc" |
| }, |
| "wandb_enabled": true |
| } |
| ] |
| } |