| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.4454976303317535, |
| "eval_steps": 500, |
| "global_step": 4000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08616975441619991, |
| "grad_norm": 1.8284112215042114, |
| "learning_rate": 1.98e-05, |
| "loss": 5.544659423828125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17233950883239982, |
| "grad_norm": 0.5319015383720398, |
| "learning_rate": 3.979999999999999e-05, |
| "loss": 3.90488037109375, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25850926324859974, |
| "grad_norm": 1.3977950811386108, |
| "learning_rate": 5.98e-05, |
| "loss": 3.39756103515625, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.34467901766479964, |
| "grad_norm": 1.9291573762893677, |
| "learning_rate": 7.98e-05, |
| "loss": 3.019991149902344, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4308487720809996, |
| "grad_norm": 1.4095340967178345, |
| "learning_rate": 9.979999999999999e-05, |
| "loss": 2.815445861816406, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5170185264971995, |
| "grad_norm": 3.2716641426086426, |
| "learning_rate": 0.00011979999999999998, |
| "loss": 2.6590045166015623, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6031882809133994, |
| "grad_norm": 1.3838716745376587, |
| "learning_rate": 0.00013979999999999998, |
| "loss": 2.543310089111328, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6893580353295993, |
| "grad_norm": 1.069161057472229, |
| "learning_rate": 0.00015979999999999998, |
| "loss": 2.396273651123047, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7755277897457993, |
| "grad_norm": 0.8585665822029114, |
| "learning_rate": 0.0001798, |
| "loss": 2.242165985107422, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8616975441619992, |
| "grad_norm": 0.7467069625854492, |
| "learning_rate": 0.0001998, |
| "loss": 2.1027012634277344, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9478672985781991, |
| "grad_norm": 0.5805935859680176, |
| "learning_rate": 0.00021979999999999998, |
| "loss": 2.037454376220703, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.033606204222318, |
| "grad_norm": 0.5948718786239624, |
| "learning_rate": 0.00023979999999999997, |
| "loss": 1.9681085205078126, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1197759586385179, |
| "grad_norm": 0.5413378477096558, |
| "learning_rate": 0.00025979999999999997, |
| "loss": 1.9135774230957032, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2059457130547178, |
| "grad_norm": 0.5196030139923096, |
| "learning_rate": 0.00027979999999999997, |
| "loss": 1.8392716979980468, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2921154674709177, |
| "grad_norm": 0.49619364738464355, |
| "learning_rate": 0.00029979999999999997, |
| "loss": 1.8049734497070313, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3782852218871176, |
| "grad_norm": 0.44414839148521423, |
| "learning_rate": 0.000299991068233357, |
| "loss": 1.7638165283203124, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.4644549763033177, |
| "grad_norm": 0.46444711089134216, |
| "learning_rate": 0.0002999639122316208, |
| "loss": 1.7137832641601562, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.5506247307195173, |
| "grad_norm": 0.5176238417625427, |
| "learning_rate": 0.0002999185343831476, |
| "loss": 1.675589599609375, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.6367944851357175, |
| "grad_norm": 0.4177858829498291, |
| "learning_rate": 0.0002998549402017187, |
| "loss": 1.6349491882324219, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.7229642395519171, |
| "grad_norm": 0.42198434472084045, |
| "learning_rate": 0.0002997731374145493, |
| "loss": 1.596505126953125, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.8091339939681172, |
| "grad_norm": 0.4523915946483612, |
| "learning_rate": 0.0002996731359613498, |
| "loss": 1.5908058166503907, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8953037483843171, |
| "grad_norm": 0.3901713788509369, |
| "learning_rate": 0.0002995549479931178, |
| "loss": 1.5610142517089844, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.981473502800517, |
| "grad_norm": 0.41816478967666626, |
| "learning_rate": 0.00029941858787066206, |
| "loss": 1.5319706726074218, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.067212408444636, |
| "grad_norm": 0.3872755765914917, |
| "learning_rate": 0.00029926407216285706, |
| "loss": 1.5055549621582032, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.1533821628608356, |
| "grad_norm": 0.4193103611469269, |
| "learning_rate": 0.0002990914196446301, |
| "loss": 1.4792218017578125, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.2395519172770357, |
| "grad_norm": 0.4024358093738556, |
| "learning_rate": 0.00029890065129467986, |
| "loss": 1.4786280822753906, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.325721671693236, |
| "grad_norm": 0.37588468194007874, |
| "learning_rate": 0.0002986917902929273, |
| "loss": 1.4545697021484374, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.4118914261094355, |
| "grad_norm": 0.39736974239349365, |
| "learning_rate": 0.0002984648620176991, |
| "loss": 1.4498170471191407, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.4980611805256356, |
| "grad_norm": 0.42380592226982117, |
| "learning_rate": 0.00029821989404264424, |
| "loss": 1.4262150573730468, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.5842309349418353, |
| "grad_norm": 0.411803662776947, |
| "learning_rate": 0.00029795691613338307, |
| "loss": 1.417086181640625, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.6704006893580354, |
| "grad_norm": 0.3662901818752289, |
| "learning_rate": 0.000297675960243891, |
| "loss": 1.3942941284179688, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.756570443774235, |
| "grad_norm": 0.3642771244049072, |
| "learning_rate": 0.00029737706051261557, |
| "loss": 1.38471923828125, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.842740198190435, |
| "grad_norm": 0.4138600826263428, |
| "learning_rate": 0.00029706025325832857, |
| "loss": 1.3765927124023438, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.9289099526066353, |
| "grad_norm": 0.3687536418437958, |
| "learning_rate": 0.0002967255769757127, |
| "loss": 1.3617820739746094, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.014648858250754, |
| "grad_norm": 0.3252148926258087, |
| "learning_rate": 0.0002963730723306845, |
| "loss": 1.3490205383300782, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.100818612666954, |
| "grad_norm": 0.3874260187149048, |
| "learning_rate": 0.0002960027821554529, |
| "loss": 1.3380169677734375, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.1869883670831536, |
| "grad_norm": 0.37778887152671814, |
| "learning_rate": 0.00029561475144331467, |
| "loss": 1.3190237426757812, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.2731581214993537, |
| "grad_norm": 0.37266016006469727, |
| "learning_rate": 0.00029520902734318766, |
| "loss": 1.313209991455078, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.359327875915554, |
| "grad_norm": 0.3792646527290344, |
| "learning_rate": 0.00029478565915388153, |
| "loss": 1.3055996704101562, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.4454976303317535, |
| "grad_norm": 0.3583495318889618, |
| "learning_rate": 0.00029434469831810764, |
| "loss": 1.301021728515625, |
| "step": 4000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 30000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 26, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9825523114901504e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|