| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 722, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.027710426047800486, |
| "grad_norm": 9.779006004333496, |
| "learning_rate": 2.6027397260273973e-06, |
| "loss": 1.6182, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05542085209560097, |
| "grad_norm": 1.9644428491592407, |
| "learning_rate": 5.342465753424658e-06, |
| "loss": 0.9549, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08313127814340146, |
| "grad_norm": 1.7376688718795776, |
| "learning_rate": 8.082191780821919e-06, |
| "loss": 0.7816, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11084170419120194, |
| "grad_norm": 3.079080104827881, |
| "learning_rate": 9.997891263419896e-06, |
| "loss": 0.6767, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.13855213023900242, |
| "grad_norm": 1.5074495077133179, |
| "learning_rate": 9.960452074303327e-06, |
| "loss": 0.5472, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16626255628680292, |
| "grad_norm": 1.528727412223816, |
| "learning_rate": 9.876555756875807e-06, |
| "loss": 0.4724, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1939729823346034, |
| "grad_norm": 1.4423059225082397, |
| "learning_rate": 9.746988042341907e-06, |
| "loss": 0.4629, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.22168340838240388, |
| "grad_norm": 1.143700122833252, |
| "learning_rate": 9.57296239750846e-06, |
| "loss": 0.4399, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.24939383443020435, |
| "grad_norm": 1.106168508529663, |
| "learning_rate": 9.356108660057662e-06, |
| "loss": 0.4209, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.27710426047800485, |
| "grad_norm": 1.386186957359314, |
| "learning_rate": 9.09845777429752e-06, |
| "loss": 0.4259, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.30481468652580535, |
| "grad_norm": 1.0028117895126343, |
| "learning_rate": 8.802422770347044e-06, |
| "loss": 0.4076, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.33252511257360584, |
| "grad_norm": 0.9471271634101868, |
| "learning_rate": 8.47077616489565e-06, |
| "loss": 0.4109, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3602355386214063, |
| "grad_norm": 0.974446177482605, |
| "learning_rate": 8.106623995190058e-06, |
| "loss": 0.4002, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3879459646692068, |
| "grad_norm": 0.9693788290023804, |
| "learning_rate": 7.71337672943343e-06, |
| "loss": 0.4071, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4156563907170073, |
| "grad_norm": 0.9601064324378967, |
| "learning_rate": 7.294717326035508e-06, |
| "loss": 0.4062, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.44336681676480777, |
| "grad_norm": 1.0168567895889282, |
| "learning_rate": 6.854566740854932e-06, |
| "loss": 0.3955, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.47107724281260827, |
| "grad_norm": 1.0382839441299438, |
| "learning_rate": 6.397047205475757e-06, |
| "loss": 0.3957, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4987876688604087, |
| "grad_norm": 0.986349880695343, |
| "learning_rate": 5.926443620435572e-06, |
| "loss": 0.3974, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5264980949082092, |
| "grad_norm": 0.9706918001174927, |
| "learning_rate": 5.447163424977076e-06, |
| "loss": 0.395, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5542085209560097, |
| "grad_norm": 0.882785439491272, |
| "learning_rate": 4.963695319163041e-06, |
| "loss": 0.4023, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5819189470038102, |
| "grad_norm": 0.9751160144805908, |
| "learning_rate": 4.480567224942845e-06, |
| "loss": 0.3917, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6096293730516107, |
| "grad_norm": 0.9817010164260864, |
| "learning_rate": 4.002303879886288e-06, |
| "loss": 0.3938, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6373397990994112, |
| "grad_norm": 0.929880678653717, |
| "learning_rate": 3.5333844607407497e-06, |
| "loss": 0.3935, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6650502251472117, |
| "grad_norm": 0.8725020885467529, |
| "learning_rate": 3.078200633688352e-06, |
| "loss": 0.3805, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6927606511950122, |
| "grad_norm": 0.8537760376930237, |
| "learning_rate": 2.6410154241835663e-06, |
| "loss": 0.3809, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7204710772428126, |
| "grad_norm": 0.9811424016952515, |
| "learning_rate": 2.22592329157594e-06, |
| "loss": 0.3745, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7481815032906131, |
| "grad_norm": 0.9665875434875488, |
| "learning_rate": 1.8368117824391623e-06, |
| "loss": 0.3809, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7758919293384136, |
| "grad_norm": 0.9251068234443665, |
| "learning_rate": 1.4773251217423424e-06, |
| "loss": 0.3839, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.803602355386214, |
| "grad_norm": 0.9169190526008606, |
| "learning_rate": 1.1508300828504682e-06, |
| "loss": 0.3684, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8313127814340145, |
| "grad_norm": 0.9597139954566956, |
| "learning_rate": 8.603844559986823e-07, |
| "loss": 0.3814, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.859023207481815, |
| "grad_norm": 1.081009864807129, |
| "learning_rate": 6.087084105489449e-07, |
| "loss": 0.3864, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8867336335296155, |
| "grad_norm": 0.9979385733604431, |
| "learning_rate": 3.9815901923598354e-07, |
| "loss": 0.377, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.914444059577416, |
| "grad_norm": 0.9564194679260254, |
| "learning_rate": 2.3070818299573972e-07, |
| "loss": 0.3775, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9421544856252165, |
| "grad_norm": 0.9237750172615051, |
| "learning_rate": 1.0792416312143172e-07, |
| "loss": 0.3817, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.969864911673017, |
| "grad_norm": 0.8861544728279114, |
| "learning_rate": 3.095689370785249e-08, |
| "loss": 0.3707, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9975753377208174, |
| "grad_norm": 0.9864802956581116, |
| "learning_rate": 5.272119402693898e-10, |
| "loss": 0.3693, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 722, |
| "total_flos": 7.079433197162856e+18, |
| "train_loss": 0.4693530139995744, |
| "train_runtime": 17945.1855, |
| "train_samples_per_second": 5.148, |
| "train_steps_per_second": 0.04 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 722, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.079433197162856e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|