| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.0, |
| "eval_steps": 5000, |
| "global_step": 157336, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2542329791020491, |
| "grad_norm": 0.8224548697471619, |
| "learning_rate": 1.937686026100893e-05, |
| "loss": 0.4274, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2542329791020491, |
| "eval_bleu": 44.9035, |
| "eval_gen_len": 55.7883, |
| "eval_loss": 0.8018454313278198, |
| "eval_runtime": 349.8887, |
| "eval_samples_per_second": 4.144, |
| "eval_steps_per_second": 0.346, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5084659582040982, |
| "grad_norm": 0.7412045001983643, |
| "learning_rate": 1.8741127985957416e-05, |
| "loss": 0.3884, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5084659582040982, |
| "eval_bleu": 45.2602, |
| "eval_gen_len": 55.9462, |
| "eval_loss": 0.7863603234291077, |
| "eval_runtime": 356.7073, |
| "eval_samples_per_second": 4.065, |
| "eval_steps_per_second": 0.339, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.7626989373061474, |
| "grad_norm": 0.9902288317680359, |
| "learning_rate": 1.8105522908239844e-05, |
| "loss": 0.3703, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7626989373061474, |
| "eval_bleu": 45.5987, |
| "eval_gen_len": 55.831, |
| "eval_loss": 0.7793292999267578, |
| "eval_runtime": 352.9131, |
| "eval_samples_per_second": 4.109, |
| "eval_steps_per_second": 0.343, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.0169319164081965, |
| "grad_norm": 0.823268473148346, |
| "learning_rate": 1.7469790633188332e-05, |
| "loss": 0.3626, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.0169319164081965, |
| "eval_bleu": 45.7527, |
| "eval_gen_len": 55.9131, |
| "eval_loss": 0.7690481543540955, |
| "eval_runtime": 354.1353, |
| "eval_samples_per_second": 4.094, |
| "eval_steps_per_second": 0.342, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.2711648955102457, |
| "grad_norm": 0.8465375900268555, |
| "learning_rate": 1.683418555547076e-05, |
| "loss": 0.3434, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.2711648955102457, |
| "eval_bleu": 45.9693, |
| "eval_gen_len": 55.9407, |
| "eval_loss": 0.7745993733406067, |
| "eval_runtime": 354.895, |
| "eval_samples_per_second": 4.086, |
| "eval_steps_per_second": 0.341, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.5253978746122947, |
| "grad_norm": 0.7541704177856445, |
| "learning_rate": 1.6198453280419245e-05, |
| "loss": 0.3389, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.5253978746122947, |
| "eval_bleu": 46.1257, |
| "eval_gen_len": 55.9559, |
| "eval_loss": 0.7699302434921265, |
| "eval_runtime": 354.6656, |
| "eval_samples_per_second": 4.088, |
| "eval_steps_per_second": 0.341, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.7796308537143437, |
| "grad_norm": 0.7378148436546326, |
| "learning_rate": 1.556272100536773e-05, |
| "loss": 0.3375, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.7796308537143437, |
| "eval_bleu": 46.1114, |
| "eval_gen_len": 55.8379, |
| "eval_loss": 0.7595505714416504, |
| "eval_runtime": 353.6224, |
| "eval_samples_per_second": 4.1, |
| "eval_steps_per_second": 0.342, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.033863832816393, |
| "grad_norm": 0.9236812591552734, |
| "learning_rate": 1.4927115927650158e-05, |
| "loss": 0.3306, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.033863832816393, |
| "eval_bleu": 46.1398, |
| "eval_gen_len": 55.8455, |
| "eval_loss": 0.7678882479667664, |
| "eval_runtime": 354.3507, |
| "eval_samples_per_second": 4.092, |
| "eval_steps_per_second": 0.341, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.288096811918442, |
| "grad_norm": 1.1531308889389038, |
| "learning_rate": 1.4291383652598642e-05, |
| "loss": 0.3187, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.288096811918442, |
| "eval_bleu": 46.1836, |
| "eval_gen_len": 55.8855, |
| "eval_loss": 0.7600361704826355, |
| "eval_runtime": 354.4781, |
| "eval_samples_per_second": 4.091, |
| "eval_steps_per_second": 0.341, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.5423297910204914, |
| "grad_norm": 0.6744846105575562, |
| "learning_rate": 1.3655651377547128e-05, |
| "loss": 0.3169, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.5423297910204914, |
| "eval_bleu": 46.554, |
| "eval_gen_len": 55.7834, |
| "eval_loss": 0.7513669729232788, |
| "eval_runtime": 345.0185, |
| "eval_samples_per_second": 4.203, |
| "eval_steps_per_second": 0.351, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.79656277012254, |
| "grad_norm": 0.7795997858047485, |
| "learning_rate": 1.3020046299829556e-05, |
| "loss": 0.3166, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.79656277012254, |
| "eval_bleu": 46.3029, |
| "eval_gen_len": 55.7586, |
| "eval_loss": 0.75509113073349, |
| "eval_runtime": 342.8695, |
| "eval_samples_per_second": 4.229, |
| "eval_steps_per_second": 0.353, |
| "step": 55000 |
| }, |
| { |
| "epoch": 3.0507957492245894, |
| "grad_norm": 0.8248696327209473, |
| "learning_rate": 1.238431402477804e-05, |
| "loss": 0.312, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.0507957492245894, |
| "eval_bleu": 46.2729, |
| "eval_gen_len": 55.8938, |
| "eval_loss": 0.753489077091217, |
| "eval_runtime": 344.7281, |
| "eval_samples_per_second": 4.206, |
| "eval_steps_per_second": 0.351, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.3050287283266386, |
| "grad_norm": 0.6925950646400452, |
| "learning_rate": 1.1748708947060469e-05, |
| "loss": 0.3043, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3.3050287283266386, |
| "eval_bleu": 46.5336, |
| "eval_gen_len": 55.809, |
| "eval_loss": 0.7513247132301331, |
| "eval_runtime": 342.2971, |
| "eval_samples_per_second": 4.236, |
| "eval_steps_per_second": 0.353, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3.5592617074286874, |
| "grad_norm": 0.7592390179634094, |
| "learning_rate": 1.1112976672008955e-05, |
| "loss": 0.3034, |
| "step": 70000 |
| }, |
| { |
| "epoch": 3.5592617074286874, |
| "eval_bleu": 46.3724, |
| "eval_gen_len": 55.8724, |
| "eval_loss": 0.7483436465263367, |
| "eval_runtime": 345.9725, |
| "eval_samples_per_second": 4.191, |
| "eval_steps_per_second": 0.35, |
| "step": 70000 |
| }, |
| { |
| "epoch": 3.8134946865307366, |
| "grad_norm": 0.6906684637069702, |
| "learning_rate": 1.047724439695744e-05, |
| "loss": 0.3022, |
| "step": 75000 |
| }, |
| { |
| "epoch": 3.8134946865307366, |
| "eval_bleu": 46.3098, |
| "eval_gen_len": 55.7759, |
| "eval_loss": 0.7495469450950623, |
| "eval_runtime": 345.9396, |
| "eval_samples_per_second": 4.191, |
| "eval_steps_per_second": 0.35, |
| "step": 75000 |
| }, |
| { |
| "epoch": 4.067727665632786, |
| "grad_norm": 0.8161213397979736, |
| "learning_rate": 9.841512121905925e-06, |
| "loss": 0.3008, |
| "step": 80000 |
| }, |
| { |
| "epoch": 4.067727665632786, |
| "eval_bleu": 46.3194, |
| "eval_gen_len": 55.829, |
| "eval_loss": 0.7491657137870789, |
| "eval_runtime": 345.4241, |
| "eval_samples_per_second": 4.198, |
| "eval_steps_per_second": 0.35, |
| "step": 80000 |
| }, |
| { |
| "epoch": 4.321960644734835, |
| "grad_norm": 0.641299307346344, |
| "learning_rate": 9.205907044188356e-06, |
| "loss": 0.2931, |
| "step": 85000 |
| }, |
| { |
| "epoch": 4.321960644734835, |
| "eval_bleu": 46.4319, |
| "eval_gen_len": 55.9069, |
| "eval_loss": 0.7467553615570068, |
| "eval_runtime": 349.087, |
| "eval_samples_per_second": 4.154, |
| "eval_steps_per_second": 0.347, |
| "step": 85000 |
| }, |
| { |
| "epoch": 4.576193623836884, |
| "grad_norm": 0.8405129909515381, |
| "learning_rate": 8.57017476913684e-06, |
| "loss": 0.2944, |
| "step": 90000 |
| }, |
| { |
| "epoch": 4.576193623836884, |
| "eval_bleu": 46.456, |
| "eval_gen_len": 55.9034, |
| "eval_loss": 0.7442417740821838, |
| "eval_runtime": 347.6322, |
| "eval_samples_per_second": 4.171, |
| "eval_steps_per_second": 0.348, |
| "step": 90000 |
| }, |
| { |
| "epoch": 4.8304266029389336, |
| "grad_norm": 1.2027479410171509, |
| "learning_rate": 7.934442494085324e-06, |
| "loss": 0.2928, |
| "step": 95000 |
| }, |
| { |
| "epoch": 4.8304266029389336, |
| "eval_bleu": 46.4492, |
| "eval_gen_len": 55.7993, |
| "eval_loss": 0.7427089214324951, |
| "eval_runtime": 345.6033, |
| "eval_samples_per_second": 4.196, |
| "eval_steps_per_second": 0.35, |
| "step": 95000 |
| }, |
| { |
| "epoch": 5.084659582040983, |
| "grad_norm": 0.668217658996582, |
| "learning_rate": 7.29871021903381e-06, |
| "loss": 0.2898, |
| "step": 100000 |
| }, |
| { |
| "epoch": 5.084659582040983, |
| "eval_bleu": 46.5755, |
| "eval_gen_len": 55.7524, |
| "eval_loss": 0.7419084906578064, |
| "eval_runtime": 342.1745, |
| "eval_samples_per_second": 4.238, |
| "eval_steps_per_second": 0.354, |
| "step": 100000 |
| }, |
| { |
| "epoch": 5.338892561143031, |
| "grad_norm": 1.0696251392364502, |
| "learning_rate": 6.663105141316239e-06, |
| "loss": 0.2851, |
| "step": 105000 |
| }, |
| { |
| "epoch": 5.338892561143031, |
| "eval_bleu": 46.6577, |
| "eval_gen_len": 55.9538, |
| "eval_loss": 0.7440945506095886, |
| "eval_runtime": 348.3083, |
| "eval_samples_per_second": 4.163, |
| "eval_steps_per_second": 0.347, |
| "step": 105000 |
| }, |
| { |
| "epoch": 5.59312554024508, |
| "grad_norm": 0.6697210073471069, |
| "learning_rate": 6.027372866264724e-06, |
| "loss": 0.286, |
| "step": 110000 |
| }, |
| { |
| "epoch": 5.59312554024508, |
| "eval_bleu": 46.7734, |
| "eval_gen_len": 55.7655, |
| "eval_loss": 0.7419018149375916, |
| "eval_runtime": 344.8268, |
| "eval_samples_per_second": 4.205, |
| "eval_steps_per_second": 0.351, |
| "step": 110000 |
| }, |
| { |
| "epoch": 5.84735851934713, |
| "grad_norm": 1.0238195657730103, |
| "learning_rate": 5.391767788547152e-06, |
| "loss": 0.2862, |
| "step": 115000 |
| }, |
| { |
| "epoch": 5.84735851934713, |
| "eval_bleu": 46.5343, |
| "eval_gen_len": 55.7931, |
| "eval_loss": 0.738459587097168, |
| "eval_runtime": 344.0608, |
| "eval_samples_per_second": 4.214, |
| "eval_steps_per_second": 0.352, |
| "step": 115000 |
| }, |
| { |
| "epoch": 6.101591498449179, |
| "grad_norm": 0.6626740097999573, |
| "learning_rate": 4.756035513495637e-06, |
| "loss": 0.2851, |
| "step": 120000 |
| }, |
| { |
| "epoch": 6.101591498449179, |
| "eval_bleu": 46.5618, |
| "eval_gen_len": 55.7979, |
| "eval_loss": 0.7425189018249512, |
| "eval_runtime": 344.7989, |
| "eval_samples_per_second": 4.205, |
| "eval_steps_per_second": 0.351, |
| "step": 120000 |
| }, |
| { |
| "epoch": 6.355824477551228, |
| "grad_norm": 0.9636191129684448, |
| "learning_rate": 4.1203032384441225e-06, |
| "loss": 0.283, |
| "step": 125000 |
| }, |
| { |
| "epoch": 6.355824477551228, |
| "eval_bleu": 46.6226, |
| "eval_gen_len": 55.7945, |
| "eval_loss": 0.743998646736145, |
| "eval_runtime": 345.3796, |
| "eval_samples_per_second": 4.198, |
| "eval_steps_per_second": 0.35, |
| "step": 125000 |
| }, |
| { |
| "epoch": 6.610057456653277, |
| "grad_norm": 0.9589861631393433, |
| "learning_rate": 3.4845709633926077e-06, |
| "loss": 0.2795, |
| "step": 130000 |
| }, |
| { |
| "epoch": 6.610057456653277, |
| "eval_bleu": 46.6222, |
| "eval_gen_len": 55.7572, |
| "eval_loss": 0.7409077882766724, |
| "eval_runtime": 345.3114, |
| "eval_samples_per_second": 4.199, |
| "eval_steps_per_second": 0.35, |
| "step": 130000 |
| }, |
| { |
| "epoch": 6.8642904357553265, |
| "grad_norm": 1.038360834121704, |
| "learning_rate": 2.848965885675036e-06, |
| "loss": 0.2814, |
| "step": 135000 |
| }, |
| { |
| "epoch": 6.8642904357553265, |
| "eval_bleu": 46.6826, |
| "eval_gen_len": 55.7393, |
| "eval_loss": 0.7406843304634094, |
| "eval_runtime": 344.9932, |
| "eval_samples_per_second": 4.203, |
| "eval_steps_per_second": 0.351, |
| "step": 135000 |
| }, |
| { |
| "epoch": 7.118523414857376, |
| "grad_norm": 0.8131181001663208, |
| "learning_rate": 2.2131064132895776e-06, |
| "loss": 0.2802, |
| "step": 140000 |
| }, |
| { |
| "epoch": 7.118523414857376, |
| "eval_bleu": 46.6691, |
| "eval_gen_len": 55.7793, |
| "eval_loss": 0.7410460114479065, |
| "eval_runtime": 344.2601, |
| "eval_samples_per_second": 4.212, |
| "eval_steps_per_second": 0.351, |
| "step": 140000 |
| }, |
| { |
| "epoch": 7.372756393959424, |
| "grad_norm": 0.8258042335510254, |
| "learning_rate": 1.5775013355720064e-06, |
| "loss": 0.2781, |
| "step": 145000 |
| }, |
| { |
| "epoch": 7.372756393959424, |
| "eval_bleu": 46.6998, |
| "eval_gen_len": 55.7331, |
| "eval_loss": 0.7418521642684937, |
| "eval_runtime": 344.3472, |
| "eval_samples_per_second": 4.211, |
| "eval_steps_per_second": 0.351, |
| "step": 145000 |
| }, |
| { |
| "epoch": 7.626989373061473, |
| "grad_norm": 0.849775493144989, |
| "learning_rate": 9.417690605204915e-07, |
| "loss": 0.2765, |
| "step": 150000 |
| }, |
| { |
| "epoch": 7.626989373061473, |
| "eval_bleu": 46.6978, |
| "eval_gen_len": 55.7703, |
| "eval_loss": 0.7417660355567932, |
| "eval_runtime": 345.0669, |
| "eval_samples_per_second": 4.202, |
| "eval_steps_per_second": 0.351, |
| "step": 150000 |
| }, |
| { |
| "epoch": 7.8812223521635225, |
| "grad_norm": 1.0750905275344849, |
| "learning_rate": 3.060367854689766e-07, |
| "loss": 0.2777, |
| "step": 155000 |
| }, |
| { |
| "epoch": 7.8812223521635225, |
| "eval_bleu": 46.6901, |
| "eval_gen_len": 55.769, |
| "eval_loss": 0.7415376305580139, |
| "eval_runtime": 345.2438, |
| "eval_samples_per_second": 4.2, |
| "eval_steps_per_second": 0.35, |
| "step": 155000 |
| } |
| ], |
| "logging_steps": 5000, |
| "max_steps": 157336, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5274231723938816e+17, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|