| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.011109136153572698, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00022218272307145397, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.9640000000000002e-05, |
| "loss": 1.2832, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00044436544614290794, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.9240000000000002e-05, |
| "loss": 0.6855, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0006665481692143619, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.884e-05, |
| "loss": 0.619, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0008887308922858159, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.8440000000000003e-05, |
| "loss": 0.5564, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0011109136153572697, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.8040000000000003e-05, |
| "loss": 0.5817, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0013330963384287238, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.764e-05, |
| "loss": 0.5999, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0015552790615001777, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.724e-05, |
| "loss": 0.5754, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0017774617845716318, |
| "grad_norm": 2.625, |
| "learning_rate": 1.684e-05, |
| "loss": 0.5528, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0019996445076430856, |
| "grad_norm": 2.40625, |
| "learning_rate": 1.6440000000000002e-05, |
| "loss": 0.5035, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0022218272307145395, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.6040000000000002e-05, |
| "loss": 0.532, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.002444009953785994, |
| "grad_norm": 2.75, |
| "learning_rate": 1.5640000000000003e-05, |
| "loss": 0.5373, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0026661926768574476, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.5240000000000001e-05, |
| "loss": 0.5573, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0028883753999289015, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.4840000000000002e-05, |
| "loss": 0.5304, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0031105581230003554, |
| "grad_norm": 2.625, |
| "learning_rate": 1.444e-05, |
| "loss": 0.5138, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0033327408460718097, |
| "grad_norm": 2.296875, |
| "learning_rate": 1.4040000000000001e-05, |
| "loss": 0.5014, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0035549235691432635, |
| "grad_norm": 2.390625, |
| "learning_rate": 1.3640000000000002e-05, |
| "loss": 0.5528, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0037771062922147174, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.3240000000000002e-05, |
| "loss": 0.5459, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.003999289015286171, |
| "grad_norm": 2.421875, |
| "learning_rate": 1.284e-05, |
| "loss": 0.5616, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.004221471738357625, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.2440000000000001e-05, |
| "loss": 0.5032, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.004443654461429079, |
| "grad_norm": 2.265625, |
| "learning_rate": 1.204e-05, |
| "loss": 0.5023, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004665837184500533, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.164e-05, |
| "loss": 0.4977, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.004888019907571988, |
| "grad_norm": 2.375, |
| "learning_rate": 1.1240000000000002e-05, |
| "loss": 0.5339, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0051102026306434414, |
| "grad_norm": 2.5, |
| "learning_rate": 1.0840000000000001e-05, |
| "loss": 0.5204, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.005332385353714895, |
| "grad_norm": 1.875, |
| "learning_rate": 1.0440000000000002e-05, |
| "loss": 0.4778, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.005554568076786349, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.004e-05, |
| "loss": 0.5423, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.005776750799857803, |
| "grad_norm": 2.28125, |
| "learning_rate": 9.640000000000001e-06, |
| "loss": 0.5378, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.005998933522929257, |
| "grad_norm": 2.109375, |
| "learning_rate": 9.240000000000001e-06, |
| "loss": 0.5198, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.006221116246000711, |
| "grad_norm": 2.390625, |
| "learning_rate": 8.84e-06, |
| "loss": 0.5659, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.006443298969072165, |
| "grad_norm": 2.859375, |
| "learning_rate": 8.44e-06, |
| "loss": 0.5353, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.006665481692143619, |
| "grad_norm": 2.1875, |
| "learning_rate": 8.040000000000001e-06, |
| "loss": 0.4759, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.006887664415215073, |
| "grad_norm": 2.625, |
| "learning_rate": 7.640000000000001e-06, |
| "loss": 0.5544, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.007109847138286527, |
| "grad_norm": 2.203125, |
| "learning_rate": 7.24e-06, |
| "loss": 0.5641, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.007332029861357981, |
| "grad_norm": 2.625, |
| "learning_rate": 6.8400000000000014e-06, |
| "loss": 0.5065, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.007554212584429435, |
| "grad_norm": 2.453125, |
| "learning_rate": 6.440000000000001e-06, |
| "loss": 0.5257, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.007776395307500889, |
| "grad_norm": 2.828125, |
| "learning_rate": 6.040000000000001e-06, |
| "loss": 0.544, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.007998578030572343, |
| "grad_norm": 2.421875, |
| "learning_rate": 5.64e-06, |
| "loss": 0.4859, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.008220760753643797, |
| "grad_norm": 2.71875, |
| "learning_rate": 5.240000000000001e-06, |
| "loss": 0.4905, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.00844294347671525, |
| "grad_norm": 2.296875, |
| "learning_rate": 4.84e-06, |
| "loss": 0.4889, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.008665126199786705, |
| "grad_norm": 2.75, |
| "learning_rate": 4.440000000000001e-06, |
| "loss": 0.5447, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.008887308922858158, |
| "grad_norm": 2.34375, |
| "learning_rate": 4.04e-06, |
| "loss": 0.5265, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.009109491645929613, |
| "grad_norm": 2.328125, |
| "learning_rate": 3.6400000000000003e-06, |
| "loss": 0.5095, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.009331674369001066, |
| "grad_norm": 2.515625, |
| "learning_rate": 3.2400000000000003e-06, |
| "loss": 0.4687, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.00955385709207252, |
| "grad_norm": 2.546875, |
| "learning_rate": 2.84e-06, |
| "loss": 0.4742, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.009776039815143975, |
| "grad_norm": 2.6875, |
| "learning_rate": 2.4400000000000004e-06, |
| "loss": 0.5529, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.009998222538215428, |
| "grad_norm": 3.0625, |
| "learning_rate": 2.04e-06, |
| "loss": 0.5127, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.010220405261286883, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.6400000000000002e-06, |
| "loss": 0.5252, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.010442587984358336, |
| "grad_norm": 2.40625, |
| "learning_rate": 1.2400000000000002e-06, |
| "loss": 0.4934, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.01066477070742979, |
| "grad_norm": 2.734375, |
| "learning_rate": 8.400000000000001e-07, |
| "loss": 0.5206, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.010886953430501244, |
| "grad_norm": 2.765625, |
| "learning_rate": 4.4e-07, |
| "loss": 0.5411, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.011109136153572698, |
| "grad_norm": 2.5625, |
| "learning_rate": 4e-08, |
| "loss": 0.4797, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.452249304466473e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|