| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.984615384615385, | |
| "eval_steps": 500, | |
| "global_step": 582, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010256410256410256, | |
| "eval_loss": 0.3835288882255554, | |
| "eval_runtime": 178.3246, | |
| "eval_samples_per_second": 1.155, | |
| "eval_steps_per_second": 0.292, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 6.791983604431152, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.2452, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 11.432096481323242, | |
| "learning_rate": 1.9500000000000003e-05, | |
| "loss": 0.1853, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.4045106172561646, | |
| "learning_rate": 2.95e-05, | |
| "loss": 0.1126, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 11.10899543762207, | |
| "learning_rate": 3.9500000000000005e-05, | |
| "loss": 0.2361, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0205128205128204, | |
| "grad_norm": 2.5366597175598145, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 0.1393, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2256410256410257, | |
| "grad_norm": 19.193058013916016, | |
| "learning_rate": 3.9893617021276594e-05, | |
| "loss": 0.182, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4307692307692308, | |
| "grad_norm": 2.2622101306915283, | |
| "learning_rate": 2.925531914893617e-05, | |
| "loss": 0.0682, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6358974358974359, | |
| "grad_norm": 1.3008161783218384, | |
| "learning_rate": 1.8617021276595745e-05, | |
| "loss": 0.0844, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8410256410256411, | |
| "grad_norm": 1.2486586570739746, | |
| "learning_rate": 7.97872340425532e-06, | |
| "loss": 0.1358, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0615384615384613, | |
| "grad_norm": 0.1917697638273239, | |
| "learning_rate": 2.4083769633507854e-05, | |
| "loss": 0.0433, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2666666666666666, | |
| "grad_norm": 0.0892116129398346, | |
| "learning_rate": 2.934027777777778e-05, | |
| "loss": 0.0348, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.471794871794872, | |
| "grad_norm": 1.184501051902771, | |
| "learning_rate": 2.5868055555555558e-05, | |
| "loss": 0.0639, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.676923076923077, | |
| "grad_norm": 0.4964236617088318, | |
| "learning_rate": 2.2395833333333337e-05, | |
| "loss": 0.1196, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.882051282051282, | |
| "grad_norm": 8.024321556091309, | |
| "learning_rate": 1.8923611111111112e-05, | |
| "loss": 0.0912, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.0923076923076924, | |
| "grad_norm": 0.8915512561798096, | |
| "learning_rate": 1.545138888888889e-05, | |
| "loss": 0.0188, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.2974358974358973, | |
| "grad_norm": 0.3651086091995239, | |
| "learning_rate": 1.1979166666666667e-05, | |
| "loss": 0.0696, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.5025641025641026, | |
| "grad_norm": 7.531687259674072, | |
| "learning_rate": 8.506944444444445e-06, | |
| "loss": 0.0439, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.707692307692308, | |
| "grad_norm": 16.617700576782227, | |
| "learning_rate": 5.034722222222222e-06, | |
| "loss": 0.0434, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.9128205128205127, | |
| "grad_norm": 0.07135667651891708, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.0137, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.123076923076923, | |
| "grad_norm": 4.136926174163818, | |
| "learning_rate": 1.8983402489626556e-05, | |
| "loss": 0.1377, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.328205128205128, | |
| "grad_norm": 0.3030303716659546, | |
| "learning_rate": 1.690871369294606e-05, | |
| "loss": 0.0382, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 0.07416559755802155, | |
| "learning_rate": 1.4834024896265561e-05, | |
| "loss": 0.0615, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.7384615384615385, | |
| "grad_norm": 6.657896995544434, | |
| "learning_rate": 1.2759336099585062e-05, | |
| "loss": 0.0241, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.943589743589744, | |
| "grad_norm": 0.006628558039665222, | |
| "learning_rate": 1.0684647302904565e-05, | |
| "loss": 0.0742, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.143589743589744, | |
| "grad_norm": 0.06650497764348984, | |
| "learning_rate": 8.609958506224066e-06, | |
| "loss": 0.0863, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.143589743589744, | |
| "eval_loss": 0.3170950710773468, | |
| "eval_runtime": 176.7917, | |
| "eval_samples_per_second": 1.165, | |
| "eval_steps_per_second": 0.294, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.348717948717948, | |
| "grad_norm": 0.06573180109262466, | |
| "learning_rate": 6.535269709543569e-06, | |
| "loss": 0.0203, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.553846153846154, | |
| "grad_norm": 0.2770240604877472, | |
| "learning_rate": 4.460580912863071e-06, | |
| "loss": 0.0249, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.758974358974359, | |
| "grad_norm": 0.49672994017601013, | |
| "learning_rate": 2.3858921161825725e-06, | |
| "loss": 0.0353, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.964102564102564, | |
| "grad_norm": 0.03965625539422035, | |
| "learning_rate": 3.112033195020747e-07, | |
| "loss": 0.052, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.984615384615385, | |
| "step": 582, | |
| "total_flos": 1.401094867087152e+17, | |
| "train_loss": 0.017203848492162128, | |
| "train_runtime": 4704.2352, | |
| "train_samples_per_second": 1.987, | |
| "train_steps_per_second": 0.124 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 582, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.401094867087152e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |