| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9991111111111111, | |
| "eval_steps": 500, | |
| "global_step": 562, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017777777777777778, | |
| "grad_norm": 0.6805279850959778, | |
| "learning_rate": 0.00019644128113879004, | |
| "loss": 0.3007, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.035555555555555556, | |
| "grad_norm": 0.9196615219116211, | |
| "learning_rate": 0.0001928825622775801, | |
| "loss": 0.1195, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 0.9212412238121033, | |
| "learning_rate": 0.0001893238434163701, | |
| "loss": 0.1172, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07111111111111111, | |
| "grad_norm": 0.4567917585372925, | |
| "learning_rate": 0.00018576512455516017, | |
| "loss": 0.097, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 0.4863280653953552, | |
| "learning_rate": 0.00018220640569395016, | |
| "loss": 0.0855, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 0.5411233901977539, | |
| "learning_rate": 0.00017864768683274022, | |
| "loss": 0.0906, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12444444444444444, | |
| "grad_norm": 0.539750337600708, | |
| "learning_rate": 0.00017508896797153024, | |
| "loss": 0.0881, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 0.394882470369339, | |
| "learning_rate": 0.0001715302491103203, | |
| "loss": 0.0737, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.45392346382141113, | |
| "learning_rate": 0.00016797153024911032, | |
| "loss": 0.0652, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 0.4979017376899719, | |
| "learning_rate": 0.00016441281138790037, | |
| "loss": 0.073, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19555555555555557, | |
| "grad_norm": 0.3595920205116272, | |
| "learning_rate": 0.0001608540925266904, | |
| "loss": 0.0658, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 0.42612773180007935, | |
| "learning_rate": 0.00015729537366548045, | |
| "loss": 0.074, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2311111111111111, | |
| "grad_norm": 0.35243910551071167, | |
| "learning_rate": 0.00015373665480427045, | |
| "loss": 0.0677, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24888888888888888, | |
| "grad_norm": 0.5590384006500244, | |
| "learning_rate": 0.0001501779359430605, | |
| "loss": 0.0696, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 0.7073479294776917, | |
| "learning_rate": 0.00014661921708185053, | |
| "loss": 0.069, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.28444444444444444, | |
| "grad_norm": 0.32941359281539917, | |
| "learning_rate": 0.00014306049822064058, | |
| "loss": 0.0677, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3022222222222222, | |
| "grad_norm": 0.3985579311847687, | |
| "learning_rate": 0.0001395017793594306, | |
| "loss": 0.0546, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.3595498502254486, | |
| "learning_rate": 0.00013594306049822066, | |
| "loss": 0.0612, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3377777777777778, | |
| "grad_norm": 0.7771916389465332, | |
| "learning_rate": 0.00013238434163701069, | |
| "loss": 0.0659, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 0.4626810550689697, | |
| "learning_rate": 0.00012882562277580074, | |
| "loss": 0.0666, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 0.28734588623046875, | |
| "learning_rate": 0.00012526690391459074, | |
| "loss": 0.0471, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.39111111111111113, | |
| "grad_norm": 0.47027942538261414, | |
| "learning_rate": 0.0001217081850533808, | |
| "loss": 0.0603, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4088888888888889, | |
| "grad_norm": 0.46543505787849426, | |
| "learning_rate": 0.00011814946619217081, | |
| "loss": 0.0465, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 0.378945529460907, | |
| "learning_rate": 0.00011459074733096087, | |
| "loss": 0.0585, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.4640860855579376, | |
| "learning_rate": 0.00011103202846975089, | |
| "loss": 0.0499, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4622222222222222, | |
| "grad_norm": 0.30186182260513306, | |
| "learning_rate": 0.00010747330960854095, | |
| "loss": 0.0498, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.6072084903717041, | |
| "learning_rate": 0.00010391459074733096, | |
| "loss": 0.0526, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.49777777777777776, | |
| "grad_norm": 0.4315777122974396, | |
| "learning_rate": 0.00010035587188612101, | |
| "loss": 0.0412, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5155555555555555, | |
| "grad_norm": 0.32349273562431335, | |
| "learning_rate": 9.679715302491104e-05, | |
| "loss": 0.0395, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.3251255452632904, | |
| "learning_rate": 9.323843416370108e-05, | |
| "loss": 0.0449, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5511111111111111, | |
| "grad_norm": 0.40258878469467163, | |
| "learning_rate": 8.96797153024911e-05, | |
| "loss": 0.0535, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5688888888888889, | |
| "grad_norm": 0.3351120054721832, | |
| "learning_rate": 8.612099644128114e-05, | |
| "loss": 0.0449, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 0.3918339014053345, | |
| "learning_rate": 8.256227758007118e-05, | |
| "loss": 0.0498, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6044444444444445, | |
| "grad_norm": 0.26887497305870056, | |
| "learning_rate": 7.900355871886122e-05, | |
| "loss": 0.0395, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 0.34482306241989136, | |
| "learning_rate": 7.544483985765126e-05, | |
| "loss": 0.0504, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.33622053265571594, | |
| "learning_rate": 7.188612099644128e-05, | |
| "loss": 0.049, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6577777777777778, | |
| "grad_norm": 0.4018609821796417, | |
| "learning_rate": 6.832740213523132e-05, | |
| "loss": 0.0456, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6755555555555556, | |
| "grad_norm": 0.3232245445251465, | |
| "learning_rate": 6.476868327402136e-05, | |
| "loss": 0.0473, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 0.2700667679309845, | |
| "learning_rate": 6.12099644128114e-05, | |
| "loss": 0.0448, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 0.35857853293418884, | |
| "learning_rate": 5.765124555160143e-05, | |
| "loss": 0.0395, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7288888888888889, | |
| "grad_norm": 0.3355509638786316, | |
| "learning_rate": 5.4092526690391465e-05, | |
| "loss": 0.0381, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 0.36887580156326294, | |
| "learning_rate": 5.0533807829181504e-05, | |
| "loss": 0.0393, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7644444444444445, | |
| "grad_norm": 0.2498418539762497, | |
| "learning_rate": 4.697508896797153e-05, | |
| "loss": 0.0397, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7822222222222223, | |
| "grad_norm": 0.4195568263530731, | |
| "learning_rate": 4.341637010676157e-05, | |
| "loss": 0.0399, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.2515809237957001, | |
| "learning_rate": 3.98576512455516e-05, | |
| "loss": 0.0374, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8177777777777778, | |
| "grad_norm": 0.38955771923065186, | |
| "learning_rate": 3.629893238434164e-05, | |
| "loss": 0.0404, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8355555555555556, | |
| "grad_norm": 0.3211108446121216, | |
| "learning_rate": 3.274021352313167e-05, | |
| "loss": 0.0299, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 0.36825743317604065, | |
| "learning_rate": 2.918149466192171e-05, | |
| "loss": 0.0365, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8711111111111111, | |
| "grad_norm": 0.3567051589488983, | |
| "learning_rate": 2.5622775800711747e-05, | |
| "loss": 0.0414, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.41268640756607056, | |
| "learning_rate": 2.2064056939501783e-05, | |
| "loss": 0.0396, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 0.38851094245910645, | |
| "learning_rate": 1.8505338078291815e-05, | |
| "loss": 0.0368, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9244444444444444, | |
| "grad_norm": 0.5653034448623657, | |
| "learning_rate": 1.494661921708185e-05, | |
| "loss": 0.0339, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9422222222222222, | |
| "grad_norm": 0.33567166328430176, | |
| "learning_rate": 1.1387900355871885e-05, | |
| "loss": 0.047, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.3209730386734009, | |
| "learning_rate": 7.829181494661921e-06, | |
| "loss": 0.0411, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 0.2615627348423004, | |
| "learning_rate": 4.270462633451958e-06, | |
| "loss": 0.0345, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "grad_norm": 0.38623788952827454, | |
| "learning_rate": 7.117437722419928e-07, | |
| "loss": 0.0338, | |
| "step": 560 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 562, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4307316713467904e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |