| { | |
| "best_metric": 3.141986846923828, | |
| "best_model_checkpoint": "contract1/checkpoint-1455", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1455, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0859106529209622, | |
| "grad_norm": 32.766380310058594, | |
| "learning_rate": 6.849315068493151e-06, | |
| "loss": 7.5498, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1718213058419244, | |
| "grad_norm": 55.16777420043945, | |
| "learning_rate": 1.5068493150684931e-05, | |
| "loss": 7.7409, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.25773195876288657, | |
| "grad_norm": 19.554790496826172, | |
| "learning_rate": 2.363013698630137e-05, | |
| "loss": 6.8118, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3436426116838488, | |
| "grad_norm": 11.124310493469238, | |
| "learning_rate": 3.219178082191781e-05, | |
| "loss": 5.94, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.42955326460481097, | |
| "grad_norm": 9.250848770141602, | |
| "learning_rate": 4.075342465753425e-05, | |
| "loss": 5.2277, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 3.131469964981079, | |
| "learning_rate": 4.9315068493150684e-05, | |
| "loss": 4.6817, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6013745704467354, | |
| "grad_norm": 4.209794998168945, | |
| "learning_rate": 4.912146676852559e-05, | |
| "loss": 4.3776, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6872852233676976, | |
| "grad_norm": 3.0055902004241943, | |
| "learning_rate": 4.816653934300993e-05, | |
| "loss": 4.3974, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7731958762886598, | |
| "grad_norm": 2.923142433166504, | |
| "learning_rate": 4.7211611917494275e-05, | |
| "loss": 4.2811, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.8591065292096219, | |
| "grad_norm": 3.110403060913086, | |
| "learning_rate": 4.625668449197861e-05, | |
| "loss": 4.1323, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9450171821305842, | |
| "grad_norm": 2.941375970840454, | |
| "learning_rate": 4.530175706646295e-05, | |
| "loss": 4.1902, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 15.1115, | |
| "eval_loss": 3.686694622039795, | |
| "eval_rouge1": 17.9543, | |
| "eval_rouge2": 4.0352, | |
| "eval_rougeL": 16.3506, | |
| "eval_rougeLsum": 16.4818, | |
| "eval_runtime": 11.3626, | |
| "eval_samples_per_second": 25.61, | |
| "eval_steps_per_second": 3.256, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 3.0971412658691406, | |
| "learning_rate": 4.434682964094729e-05, | |
| "loss": 4.0055, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1168384879725086, | |
| "grad_norm": 3.2513363361358643, | |
| "learning_rate": 4.339190221543163e-05, | |
| "loss": 3.9592, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.2027491408934707, | |
| "grad_norm": 4.395771026611328, | |
| "learning_rate": 4.2436974789915967e-05, | |
| "loss": 3.8709, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.2886597938144329, | |
| "grad_norm": 2.7217512130737305, | |
| "learning_rate": 4.1482047364400305e-05, | |
| "loss": 3.7554, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.3745704467353952, | |
| "grad_norm": 3.703568696975708, | |
| "learning_rate": 4.052711993888464e-05, | |
| "loss": 3.7343, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.4604810996563573, | |
| "grad_norm": 2.7263598442077637, | |
| "learning_rate": 3.957219251336899e-05, | |
| "loss": 3.7497, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 2.4919683933258057, | |
| "learning_rate": 3.861726508785333e-05, | |
| "loss": 3.7073, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.6323024054982818, | |
| "grad_norm": 2.5988521575927734, | |
| "learning_rate": 3.7662337662337665e-05, | |
| "loss": 3.6325, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.718213058419244, | |
| "grad_norm": 3.717288017272949, | |
| "learning_rate": 3.6707410236822004e-05, | |
| "loss": 3.687, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.8041237113402062, | |
| "grad_norm": 3.393786668777466, | |
| "learning_rate": 3.575248281130634e-05, | |
| "loss": 3.7349, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.8900343642611683, | |
| "grad_norm": 2.5332796573638916, | |
| "learning_rate": 3.479755538579068e-05, | |
| "loss": 3.7308, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.9759450171821307, | |
| "grad_norm": 3.4967894554138184, | |
| "learning_rate": 3.384262796027502e-05, | |
| "loss": 3.6033, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 14.7061, | |
| "eval_loss": 3.3814778327941895, | |
| "eval_rouge1": 20.6781, | |
| "eval_rouge2": 5.109, | |
| "eval_rougeL": 17.5025, | |
| "eval_rougeLsum": 17.5956, | |
| "eval_runtime": 11.6963, | |
| "eval_samples_per_second": 24.88, | |
| "eval_steps_per_second": 3.163, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.0618556701030926, | |
| "grad_norm": 3.7303099632263184, | |
| "learning_rate": 3.288770053475936e-05, | |
| "loss": 3.4857, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.147766323024055, | |
| "grad_norm": 2.58085036277771, | |
| "learning_rate": 3.1932773109243696e-05, | |
| "loss": 3.7377, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.2336769759450172, | |
| "grad_norm": 2.9038166999816895, | |
| "learning_rate": 3.097784568372804e-05, | |
| "loss": 3.4969, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.319587628865979, | |
| "grad_norm": 1.8798184394836426, | |
| "learning_rate": 3.002291825821238e-05, | |
| "loss": 3.3667, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.4054982817869415, | |
| "grad_norm": 2.5839955806732178, | |
| "learning_rate": 2.9067990832696718e-05, | |
| "loss": 3.5371, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.491408934707904, | |
| "grad_norm": 14.803485870361328, | |
| "learning_rate": 2.8113063407181056e-05, | |
| "loss": 3.4758, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.5773195876288657, | |
| "grad_norm": 2.901104688644409, | |
| "learning_rate": 2.7158135981665394e-05, | |
| "loss": 3.4274, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.663230240549828, | |
| "grad_norm": 3.5598862171173096, | |
| "learning_rate": 2.6203208556149733e-05, | |
| "loss": 3.5939, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.7491408934707904, | |
| "grad_norm": 2.656578540802002, | |
| "learning_rate": 2.524828113063407e-05, | |
| "loss": 3.5227, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.8350515463917527, | |
| "grad_norm": 2.2073974609375, | |
| "learning_rate": 2.4293353705118413e-05, | |
| "loss": 3.5447, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.9209621993127146, | |
| "grad_norm": 3.0660665035247803, | |
| "learning_rate": 2.333842627960275e-05, | |
| "loss": 3.4734, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 16.5439, | |
| "eval_loss": 3.232574462890625, | |
| "eval_rouge1": 20.2411, | |
| "eval_rouge2": 5.2598, | |
| "eval_rougeL": 17.2676, | |
| "eval_rougeLsum": 17.4831, | |
| "eval_runtime": 12.7924, | |
| "eval_samples_per_second": 22.748, | |
| "eval_steps_per_second": 2.892, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.006872852233677, | |
| "grad_norm": 2.2971296310424805, | |
| "learning_rate": 2.238349885408709e-05, | |
| "loss": 3.4626, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.0927835051546393, | |
| "grad_norm": 5.520618438720703, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 3.4557, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.178694158075601, | |
| "grad_norm": 2.2981772422790527, | |
| "learning_rate": 2.047364400305577e-05, | |
| "loss": 3.2812, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.2646048109965635, | |
| "grad_norm": 6.0153069496154785, | |
| "learning_rate": 1.951871657754011e-05, | |
| "loss": 3.4321, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.350515463917526, | |
| "grad_norm": 2.2888569831848145, | |
| "learning_rate": 1.8563789152024447e-05, | |
| "loss": 3.392, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 3.436426116838488, | |
| "grad_norm": 5.259116172790527, | |
| "learning_rate": 1.7608861726508785e-05, | |
| "loss": 3.4009, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.52233676975945, | |
| "grad_norm": 2.115800380706787, | |
| "learning_rate": 1.6653934300993127e-05, | |
| "loss": 3.3249, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 3.6082474226804124, | |
| "grad_norm": 2.3146419525146484, | |
| "learning_rate": 1.5699006875477465e-05, | |
| "loss": 3.2829, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.6941580756013748, | |
| "grad_norm": 2.9118130207061768, | |
| "learning_rate": 1.4744079449961804e-05, | |
| "loss": 3.4347, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 3.7800687285223367, | |
| "grad_norm": 2.7317888736724854, | |
| "learning_rate": 1.3789152024446142e-05, | |
| "loss": 3.2167, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.865979381443299, | |
| "grad_norm": 4.284421920776367, | |
| "learning_rate": 1.2834224598930484e-05, | |
| "loss": 3.431, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 3.9518900343642613, | |
| "grad_norm": 3.761094808578491, | |
| "learning_rate": 1.1879297173414822e-05, | |
| "loss": 3.4635, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 15.6284, | |
| "eval_loss": 3.164484739303589, | |
| "eval_rouge1": 20.158, | |
| "eval_rouge2": 4.9421, | |
| "eval_rougeL": 17.0338, | |
| "eval_rougeLsum": 17.2585, | |
| "eval_runtime": 11.6665, | |
| "eval_samples_per_second": 24.943, | |
| "eval_steps_per_second": 3.171, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 4.037800687285223, | |
| "grad_norm": 2.3253726959228516, | |
| "learning_rate": 1.092436974789916e-05, | |
| "loss": 3.3823, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 4.123711340206185, | |
| "grad_norm": 5.085910797119141, | |
| "learning_rate": 9.969442322383499e-06, | |
| "loss": 3.2498, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.209621993127148, | |
| "grad_norm": 2.912647008895874, | |
| "learning_rate": 9.014514896867839e-06, | |
| "loss": 3.3191, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 4.29553264604811, | |
| "grad_norm": 5.910384178161621, | |
| "learning_rate": 8.059587471352178e-06, | |
| "loss": 3.4222, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.381443298969073, | |
| "grad_norm": 10.643930435180664, | |
| "learning_rate": 7.104660045836517e-06, | |
| "loss": 3.4691, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 4.4673539518900345, | |
| "grad_norm": 2.9152700901031494, | |
| "learning_rate": 6.149732620320856e-06, | |
| "loss": 3.2257, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.553264604810996, | |
| "grad_norm": 2.8727643489837646, | |
| "learning_rate": 5.194805194805195e-06, | |
| "loss": 3.3841, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 4.639175257731958, | |
| "grad_norm": 8.290576934814453, | |
| "learning_rate": 4.239877769289534e-06, | |
| "loss": 3.1381, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.725085910652921, | |
| "grad_norm": 2.3321030139923096, | |
| "learning_rate": 3.2849503437738733e-06, | |
| "loss": 3.3243, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 4.810996563573883, | |
| "grad_norm": 3.101409912109375, | |
| "learning_rate": 2.3300229182582125e-06, | |
| "loss": 3.1536, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.896907216494846, | |
| "grad_norm": 4.4823174476623535, | |
| "learning_rate": 1.3750954927425516e-06, | |
| "loss": 3.3531, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 4.982817869415808, | |
| "grad_norm": 2.517242193222046, | |
| "learning_rate": 4.2016806722689076e-07, | |
| "loss": 3.4086, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 3.141986846923828, | |
| "eval_rouge1": 19.8864, | |
| "eval_rouge2": 4.9499, | |
| "eval_rougeL": 16.8946, | |
| "eval_rougeLsum": 17.1002, | |
| "eval_runtime": 12.3635, | |
| "eval_samples_per_second": 23.537, | |
| "eval_steps_per_second": 2.993, | |
| "step": 1455 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 1455, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9422115569664.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |