| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5286343612334802, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00881057268722467, | |
| "grad_norm": 19.571447343122014, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 2.2003, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01762114537444934, | |
| "grad_norm": 6.597664324174228, | |
| "learning_rate": 1.3333333333333332e-06, | |
| "loss": 2.0343, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02643171806167401, | |
| "grad_norm": 3.014996781518845, | |
| "learning_rate": 2e-06, | |
| "loss": 1.8168, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03524229074889868, | |
| "grad_norm": 2.496924527736719, | |
| "learning_rate": 1.9995958747931082e-06, | |
| "loss": 1.7178, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04405286343612335, | |
| "grad_norm": 2.8973420754515464, | |
| "learning_rate": 1.9983838258067983e-06, | |
| "loss": 1.6557, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05286343612334802, | |
| "grad_norm": 2.3479520994877605, | |
| "learning_rate": 1.9963648326801653e-06, | |
| "loss": 1.595, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06167400881057269, | |
| "grad_norm": 2.2003866197934974, | |
| "learning_rate": 1.993540527265239e-06, | |
| "loss": 1.5428, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07048458149779736, | |
| "grad_norm": 2.222349491130299, | |
| "learning_rate": 1.98991319230804e-06, | |
| "loss": 1.5157, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07929515418502203, | |
| "grad_norm": 2.146284797118347, | |
| "learning_rate": 1.9854857596035474e-06, | |
| "loss": 1.4995, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0881057268722467, | |
| "grad_norm": 2.305597555128469, | |
| "learning_rate": 1.980261807626078e-06, | |
| "loss": 1.4886, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09691629955947137, | |
| "grad_norm": 2.199507218792448, | |
| "learning_rate": 1.974245558636978e-06, | |
| "loss": 1.4643, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10572687224669604, | |
| "grad_norm": 2.6946448901304674, | |
| "learning_rate": 1.967441875271983e-06, | |
| "loss": 1.4522, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1145374449339207, | |
| "grad_norm": 1.9626073275857734, | |
| "learning_rate": 1.959856256610988e-06, | |
| "loss": 1.4149, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12334801762114538, | |
| "grad_norm": 2.0441408462679065, | |
| "learning_rate": 1.951494833733414e-06, | |
| "loss": 1.385, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13215859030837004, | |
| "grad_norm": 2.0166534518054493, | |
| "learning_rate": 1.942364364762762e-06, | |
| "loss": 1.3718, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14096916299559473, | |
| "grad_norm": 1.9565509974119288, | |
| "learning_rate": 1.932472229404356e-06, | |
| "loss": 1.3775, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14977973568281938, | |
| "grad_norm": 2.1012455112006743, | |
| "learning_rate": 1.9218264229806917e-06, | |
| "loss": 1.3579, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.15859030837004406, | |
| "grad_norm": 1.90439374473129, | |
| "learning_rate": 1.9104355499692162e-06, | |
| "loss": 1.3333, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16740088105726872, | |
| "grad_norm": 2.2252827256286922, | |
| "learning_rate": 1.8983088170477553e-06, | |
| "loss": 1.3458, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1762114537444934, | |
| "grad_norm": 1.958512432909627, | |
| "learning_rate": 1.8854560256532098e-06, | |
| "loss": 1.332, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18502202643171806, | |
| "grad_norm": 1.9065312694563097, | |
| "learning_rate": 1.871887564059543e-06, | |
| "loss": 1.3138, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.19383259911894274, | |
| "grad_norm": 2.6199771240261436, | |
| "learning_rate": 1.8576143989814521e-06, | |
| "loss": 1.3229, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2026431718061674, | |
| "grad_norm": 2.217202276336257, | |
| "learning_rate": 1.8426480667105175e-06, | |
| "loss": 1.3205, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.21145374449339208, | |
| "grad_norm": 1.8491248577424624, | |
| "learning_rate": 1.8270006637909905e-06, | |
| "loss": 1.3085, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22026431718061673, | |
| "grad_norm": 1.9823551473714545, | |
| "learning_rate": 1.8106848372427548e-06, | |
| "loss": 1.2956, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2290748898678414, | |
| "grad_norm": 2.1060962042553726, | |
| "learning_rate": 1.7937137743393693e-06, | |
| "loss": 1.2804, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.23788546255506607, | |
| "grad_norm": 1.8695678284143167, | |
| "learning_rate": 1.7761011919494486e-06, | |
| "loss": 1.273, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.24669603524229075, | |
| "grad_norm": 2.56804202001478, | |
| "learning_rate": 1.7578613254499968e-06, | |
| "loss": 1.2984, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2555066079295154, | |
| "grad_norm": 1.9594579835819614, | |
| "learning_rate": 1.739008917220659e-06, | |
| "loss": 1.2554, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2643171806167401, | |
| "grad_norm": 1.948264525063767, | |
| "learning_rate": 1.719559204728188e-06, | |
| "loss": 1.2733, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.27312775330396477, | |
| "grad_norm": 1.9555361537181306, | |
| "learning_rate": 1.6995279082107537e-06, | |
| "loss": 1.2794, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.28193832599118945, | |
| "grad_norm": 2.072778303156424, | |
| "learning_rate": 1.6789312179720546e-06, | |
| "loss": 1.2594, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2907488986784141, | |
| "grad_norm": 1.8797335867525704, | |
| "learning_rate": 1.6577857812954991e-06, | |
| "loss": 1.2485, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.29955947136563876, | |
| "grad_norm": 2.039515984739043, | |
| "learning_rate": 1.6361086889890305e-06, | |
| "loss": 1.2645, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.30837004405286345, | |
| "grad_norm": 2.0610642486240125, | |
| "learning_rate": 1.613917461571475e-06, | |
| "loss": 1.2531, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.31718061674008813, | |
| "grad_norm": 2.06709238158364, | |
| "learning_rate": 1.5912300351115757e-06, | |
| "loss": 1.2491, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.32599118942731276, | |
| "grad_norm": 1.8437333020014937, | |
| "learning_rate": 1.5680647467311555e-06, | |
| "loss": 1.2334, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.33480176211453744, | |
| "grad_norm": 1.8965957390199724, | |
| "learning_rate": 1.5444403197841344e-06, | |
| "loss": 1.2172, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3436123348017621, | |
| "grad_norm": 10.745414670704703, | |
| "learning_rate": 1.5203758487233676e-06, | |
| "loss": 1.2306, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3524229074889868, | |
| "grad_norm": 1.978659425380664, | |
| "learning_rate": 1.4958907836675465e-06, | |
| "loss": 1.2425, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.36123348017621143, | |
| "grad_norm": 1.8232345405717043, | |
| "learning_rate": 1.4710049146806346e-06, | |
| "loss": 1.2401, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3700440528634361, | |
| "grad_norm": 1.9694165773247603, | |
| "learning_rate": 1.4457383557765383e-06, | |
| "loss": 1.2137, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3788546255506608, | |
| "grad_norm": 1.857798327139117, | |
| "learning_rate": 1.4201115286619464e-06, | |
| "loss": 1.2328, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3876651982378855, | |
| "grad_norm": 1.9640307234634835, | |
| "learning_rate": 1.3941451462304777e-06, | |
| "loss": 1.2244, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3964757709251101, | |
| "grad_norm": 1.9269638055698532, | |
| "learning_rate": 1.3678601958214777e-06, | |
| "loss": 1.2399, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4052863436123348, | |
| "grad_norm": 1.8699071909555112, | |
| "learning_rate": 1.3412779222569906e-06, | |
| "loss": 1.2175, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.41409691629955947, | |
| "grad_norm": 1.8481447316610542, | |
| "learning_rate": 1.3144198106706239e-06, | |
| "loss": 1.2238, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.42290748898678415, | |
| "grad_norm": 1.8316730682769253, | |
| "learning_rate": 1.2873075691421806e-06, | |
| "loss": 1.2304, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.43171806167400884, | |
| "grad_norm": 1.8960767894113884, | |
| "learning_rate": 1.2599631111520955e-06, | |
| "loss": 1.2036, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.44052863436123346, | |
| "grad_norm": 1.9109891577097189, | |
| "learning_rate": 1.2324085378698529e-06, | |
| "loss": 1.1917, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.44933920704845814, | |
| "grad_norm": 2.010796003658019, | |
| "learning_rate": 1.20466612029071e-06, | |
| "loss": 1.2279, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4581497797356828, | |
| "grad_norm": 2.6998433293606454, | |
| "learning_rate": 1.1767582812351548e-06, | |
| "loss": 1.1925, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4669603524229075, | |
| "grad_norm": 2.275146777332648, | |
| "learning_rate": 1.1487075772256517e-06, | |
| "loss": 1.1742, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.47577092511013214, | |
| "grad_norm": 1.7756028193514886, | |
| "learning_rate": 1.1205366802553228e-06, | |
| "loss": 1.1833, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4845814977973568, | |
| "grad_norm": 2.042541280215003, | |
| "learning_rate": 1.092268359463302e-06, | |
| "loss": 1.1826, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4933920704845815, | |
| "grad_norm": 1.84844630790175, | |
| "learning_rate": 1.0639254627315658e-06, | |
| "loss": 1.2045, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5022026431718062, | |
| "grad_norm": 1.9498951674092562, | |
| "learning_rate": 1.0355308982181253e-06, | |
| "loss": 1.1895, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5110132158590308, | |
| "grad_norm": 1.961163999725279, | |
| "learning_rate": 1.0071076158414974e-06, | |
| "loss": 1.1898, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5198237885462555, | |
| "grad_norm": 1.91915126758986, | |
| "learning_rate": 9.786785887314253e-07, | |
| "loss": 1.2014, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5286343612334802, | |
| "grad_norm": 1.845963386436035, | |
| "learning_rate": 9.502667946608332e-07, | |
| "loss": 1.207, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1135, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 97844723712000.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |