| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 41800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.023923444976076555, |
| "grad_norm": 18.739744186401367, |
| "learning_rate": 2.964114832535885e-05, |
| "loss": 1.5466, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04784688995215311, |
| "grad_norm": 9.403435707092285, |
| "learning_rate": 2.9282296650717705e-05, |
| "loss": 1.2579, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07177033492822966, |
| "grad_norm": 14.425116539001465, |
| "learning_rate": 2.8923444976076555e-05, |
| "loss": 1.0985, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.09569377990430622, |
| "grad_norm": 19.348230361938477, |
| "learning_rate": 2.8564593301435406e-05, |
| "loss": 1.0459, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.11961722488038277, |
| "grad_norm": 17.45675277709961, |
| "learning_rate": 2.820574162679426e-05, |
| "loss": 0.9095, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14354066985645933, |
| "grad_norm": 19.34926414489746, |
| "learning_rate": 2.784688995215311e-05, |
| "loss": 0.8321, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.1674641148325359, |
| "grad_norm": 32.13801574707031, |
| "learning_rate": 2.748803827751196e-05, |
| "loss": 0.7624, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.19138755980861244, |
| "grad_norm": 30.122373580932617, |
| "learning_rate": 2.7129186602870814e-05, |
| "loss": 0.7133, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.215311004784689, |
| "grad_norm": 36.39912796020508, |
| "learning_rate": 2.6770334928229668e-05, |
| "loss": 0.6391, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.23923444976076555, |
| "grad_norm": 26.704458236694336, |
| "learning_rate": 2.6411483253588518e-05, |
| "loss": 0.5955, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 57.83340072631836, |
| "learning_rate": 2.605263157894737e-05, |
| "loss": 0.5631, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.28708133971291866, |
| "grad_norm": 14.981019973754883, |
| "learning_rate": 2.5693779904306222e-05, |
| "loss": 0.518, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.31100478468899523, |
| "grad_norm": 18.152423858642578, |
| "learning_rate": 2.5334928229665073e-05, |
| "loss": 0.5048, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.3349282296650718, |
| "grad_norm": 28.128271102905273, |
| "learning_rate": 2.4976076555023923e-05, |
| "loss": 0.4524, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.3588516746411483, |
| "grad_norm": 5.79910945892334, |
| "learning_rate": 2.4617224880382777e-05, |
| "loss": 0.4103, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.3827751196172249, |
| "grad_norm": 36.72903060913086, |
| "learning_rate": 2.4258373205741627e-05, |
| "loss": 0.3995, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.40669856459330145, |
| "grad_norm": 45.00068664550781, |
| "learning_rate": 2.3899521531100477e-05, |
| "loss": 0.3897, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.430622009569378, |
| "grad_norm": 19.64744758605957, |
| "learning_rate": 2.354066985645933e-05, |
| "loss": 0.3699, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 26.348731994628906, |
| "learning_rate": 2.318181818181818e-05, |
| "loss": 0.3556, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.4784688995215311, |
| "grad_norm": 7.985578536987305, |
| "learning_rate": 2.2822966507177032e-05, |
| "loss": 0.3329, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5023923444976076, |
| "grad_norm": 26.768131256103516, |
| "learning_rate": 2.2464114832535886e-05, |
| "loss": 0.3031, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 25.47712516784668, |
| "learning_rate": 2.2105263157894736e-05, |
| "loss": 0.2956, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5502392344497608, |
| "grad_norm": 4.995678901672363, |
| "learning_rate": 2.1746411483253587e-05, |
| "loss": 0.2791, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.5741626794258373, |
| "grad_norm": 23.144134521484375, |
| "learning_rate": 2.138755980861244e-05, |
| "loss": 0.2751, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5980861244019139, |
| "grad_norm": 28.716154098510742, |
| "learning_rate": 2.1028708133971294e-05, |
| "loss": 0.2596, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.6220095693779905, |
| "grad_norm": 10.819624900817871, |
| "learning_rate": 2.0669856459330144e-05, |
| "loss": 0.2505, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.645933014354067, |
| "grad_norm": 4.408924102783203, |
| "learning_rate": 2.0311004784688998e-05, |
| "loss": 0.2396, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6698564593301436, |
| "grad_norm": 20.16508674621582, |
| "learning_rate": 1.995215311004785e-05, |
| "loss": 0.2531, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.69377990430622, |
| "grad_norm": 4.035028457641602, |
| "learning_rate": 1.95933014354067e-05, |
| "loss": 0.2221, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7177033492822966, |
| "grad_norm": 38.6718635559082, |
| "learning_rate": 1.9234449760765553e-05, |
| "loss": 0.2295, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7416267942583732, |
| "grad_norm": 33.711509704589844, |
| "learning_rate": 1.8875598086124403e-05, |
| "loss": 0.2155, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.7655502392344498, |
| "grad_norm": 19.33466339111328, |
| "learning_rate": 1.8516746411483253e-05, |
| "loss": 0.1885, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 46.828521728515625, |
| "learning_rate": 1.8157894736842107e-05, |
| "loss": 0.2047, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8133971291866029, |
| "grad_norm": 6.522198677062988, |
| "learning_rate": 1.7799043062200958e-05, |
| "loss": 0.2018, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8373205741626795, |
| "grad_norm": 1.8948196172714233, |
| "learning_rate": 1.7440191387559808e-05, |
| "loss": 0.209, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.861244019138756, |
| "grad_norm": 60.62660598754883, |
| "learning_rate": 1.708133971291866e-05, |
| "loss": 0.1878, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8851674641148325, |
| "grad_norm": 45.074764251708984, |
| "learning_rate": 1.6722488038277512e-05, |
| "loss": 0.1748, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 13.9043607711792, |
| "learning_rate": 1.6363636363636363e-05, |
| "loss": 0.1903, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.9330143540669856, |
| "grad_norm": 9.118316650390625, |
| "learning_rate": 1.6004784688995213e-05, |
| "loss": 0.1502, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.9569377990430622, |
| "grad_norm": 88.67637634277344, |
| "learning_rate": 1.5645933014354067e-05, |
| "loss": 0.1422, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.9808612440191388, |
| "grad_norm": 42.03529357910156, |
| "learning_rate": 1.528708133971292e-05, |
| "loss": 0.174, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.0047846889952152, |
| "grad_norm": 1.4736270904541016, |
| "learning_rate": 1.492822966507177e-05, |
| "loss": 0.1456, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.0287081339712918, |
| "grad_norm": 0.10340926051139832, |
| "learning_rate": 1.4569377990430621e-05, |
| "loss": 0.1326, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 20.14974021911621, |
| "learning_rate": 1.4210526315789473e-05, |
| "loss": 0.1416, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.076555023923445, |
| "grad_norm": 17.581331253051758, |
| "learning_rate": 1.3851674641148327e-05, |
| "loss": 0.1213, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.1004784688995215, |
| "grad_norm": 14.909838676452637, |
| "learning_rate": 1.3492822966507177e-05, |
| "loss": 0.1348, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.124401913875598, |
| "grad_norm": 10.54892349243164, |
| "learning_rate": 1.313397129186603e-05, |
| "loss": 0.1012, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.1483253588516746, |
| "grad_norm": 119.38558197021484, |
| "learning_rate": 1.2775119617224882e-05, |
| "loss": 0.1187, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.1722488038277512, |
| "grad_norm": 3.389181613922119, |
| "learning_rate": 1.2416267942583732e-05, |
| "loss": 0.1256, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.1961722488038278, |
| "grad_norm": 13.86876392364502, |
| "learning_rate": 1.2057416267942584e-05, |
| "loss": 0.1285, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.2200956937799043, |
| "grad_norm": 0.2061963677406311, |
| "learning_rate": 1.1698564593301434e-05, |
| "loss": 0.121, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.244019138755981, |
| "grad_norm": 5.819277286529541, |
| "learning_rate": 1.1339712918660286e-05, |
| "loss": 0.1185, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.2679425837320575, |
| "grad_norm": 25.058082580566406, |
| "learning_rate": 1.098086124401914e-05, |
| "loss": 0.1039, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.291866028708134, |
| "grad_norm": 0.010490541346371174, |
| "learning_rate": 1.062200956937799e-05, |
| "loss": 0.0996, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 43.120269775390625, |
| "learning_rate": 1.0263157894736843e-05, |
| "loss": 0.1101, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.339712918660287, |
| "grad_norm": 10.757476806640625, |
| "learning_rate": 9.904306220095695e-06, |
| "loss": 0.0932, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 10.827556610107422, |
| "learning_rate": 9.545454545454545e-06, |
| "loss": 0.0974, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.38755980861244, |
| "grad_norm": 50.764949798583984, |
| "learning_rate": 9.186602870813397e-06, |
| "loss": 0.101, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.4114832535885167, |
| "grad_norm": 0.17654885351657867, |
| "learning_rate": 8.82775119617225e-06, |
| "loss": 0.0915, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.4354066985645932, |
| "grad_norm": 18.97323226928711, |
| "learning_rate": 8.4688995215311e-06, |
| "loss": 0.0861, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.4593301435406698, |
| "grad_norm": 130.16769409179688, |
| "learning_rate": 8.110047846889953e-06, |
| "loss": 0.0842, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.4832535885167464, |
| "grad_norm": 0.09786632657051086, |
| "learning_rate": 7.751196172248804e-06, |
| "loss": 0.0804, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.507177033492823, |
| "grad_norm": 14.290946960449219, |
| "learning_rate": 7.392344497607656e-06, |
| "loss": 0.0628, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.5311004784688995, |
| "grad_norm": 0.4353134036064148, |
| "learning_rate": 7.033492822966507e-06, |
| "loss": 0.0649, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.555023923444976, |
| "grad_norm": 34.96877670288086, |
| "learning_rate": 6.674641148325359e-06, |
| "loss": 0.0901, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 3.358569860458374, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.0669, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.6028708133971292, |
| "grad_norm": 0.8462654948234558, |
| "learning_rate": 5.956937799043062e-06, |
| "loss": 0.0781, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.6267942583732058, |
| "grad_norm": 25.041208267211914, |
| "learning_rate": 5.5980861244019145e-06, |
| "loss": 0.0773, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.6507177033492821, |
| "grad_norm": 0.3199286162853241, |
| "learning_rate": 5.239234449760766e-06, |
| "loss": 0.0592, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.674641148325359, |
| "grad_norm": 0.008457236923277378, |
| "learning_rate": 4.880382775119617e-06, |
| "loss": 0.055, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.6985645933014353, |
| "grad_norm": 0.006078930571675301, |
| "learning_rate": 4.521531100478469e-06, |
| "loss": 0.0578, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.722488038277512, |
| "grad_norm": 1.8867311477661133, |
| "learning_rate": 4.162679425837321e-06, |
| "loss": 0.0745, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.7464114832535884, |
| "grad_norm": 0.48967209458351135, |
| "learning_rate": 3.8038277511961723e-06, |
| "loss": 0.0503, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.7703349282296652, |
| "grad_norm": 0.00684600742533803, |
| "learning_rate": 3.444976076555024e-06, |
| "loss": 0.067, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.7942583732057416, |
| "grad_norm": 8.393392562866211, |
| "learning_rate": 3.0861244019138756e-06, |
| "loss": 0.0543, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 6.198997497558594, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 0.0619, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.08396713435649872, |
| "learning_rate": 2.368421052631579e-06, |
| "loss": 0.0505, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.8660287081339713, |
| "grad_norm": 9.561423301696777, |
| "learning_rate": 2.009569377990431e-06, |
| "loss": 0.0723, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.8899521531100478, |
| "grad_norm": 0.0679645761847496, |
| "learning_rate": 1.6507177033492824e-06, |
| "loss": 0.0466, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.9138755980861244, |
| "grad_norm": 0.0894290879368782, |
| "learning_rate": 1.291866028708134e-06, |
| "loss": 0.0623, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.937799043062201, |
| "grad_norm": 70.90220642089844, |
| "learning_rate": 9.330143540669857e-07, |
| "loss": 0.0416, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.9617224880382775, |
| "grad_norm": 43.20172119140625, |
| "learning_rate": 5.741626794258373e-07, |
| "loss": 0.0473, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.985645933014354, |
| "grad_norm": 0.22573009133338928, |
| "learning_rate": 2.15311004784689e-07, |
| "loss": 0.0627, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 41800, |
| "total_flos": 6.553324322025062e+16, |
| "train_loss": 0.2633236618817708, |
| "train_runtime": 20696.3957, |
| "train_samples_per_second": 24.235, |
| "train_steps_per_second": 2.02 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 41800, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.553324322025062e+16, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|