| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 60, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "grad_norm": 3.034785032272339, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.0719, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.115314483642578, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.0679, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 3.1453707218170166, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.0672, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 3.1942501068115234, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.0786, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 3.230632781982422, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.074, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.2152795791625977, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.0574, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.9832417964935303, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.0784, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.8603551387786865, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.1003, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.974980592727661, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.0639, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.9170238971710205, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.0749, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 2.5163683891296387, |
| "learning_rate": 5.5e-07, |
| "loss": 1.0817, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 2.6248385906219482, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.0552, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 2.2683868408203125, |
| "learning_rate": 6.5e-07, |
| "loss": 1.0697, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 2.057786226272583, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.0373, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.9215784072875977, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0799, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.639724612236023, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.052, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 1.5040388107299805, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.0464, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 1.3652805089950562, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.0396, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 1.2367398738861084, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.0393, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.209457516670227, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.0293, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 1.1993581056594849, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.0237, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 1.1360894441604614, |
| "learning_rate": 1.1e-06, |
| "loss": 1.0157, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 1.1070096492767334, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.0523, |
| "step": 23 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 1.1012957096099854, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.0087, |
| "step": 24 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.0733041763305664, |
| "learning_rate": 1.25e-06, |
| "loss": 1.0022, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 1.0499606132507324, |
| "learning_rate": 1.3e-06, |
| "loss": 1.0358, |
| "step": 26 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 1.0175936222076416, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.0146, |
| "step": 27 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.9660918116569519, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.0123, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.8856618404388428, |
| "learning_rate": 1.45e-06, |
| "loss": 0.9958, |
| "step": 29 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.8275923728942871, |
| "learning_rate": 1.5e-06, |
| "loss": 0.9811, |
| "step": 30 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 0.7996008992195129, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.0077, |
| "step": 31 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.7398363947868347, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.9961, |
| "step": 32 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 0.7428069114685059, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.9807, |
| "step": 33 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.7325337529182434, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.9719, |
| "step": 34 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.7133640050888062, |
| "learning_rate": 1.75e-06, |
| "loss": 0.9691, |
| "step": 35 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.7199342250823975, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.9654, |
| "step": 36 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 0.7164607048034668, |
| "learning_rate": 1.85e-06, |
| "loss": 0.9445, |
| "step": 37 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.6870836615562439, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.9574, |
| "step": 38 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 0.6839168667793274, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.9272, |
| "step": 39 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.680034875869751, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9688, |
| "step": 40 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 0.604970395565033, |
| "learning_rate": 2.05e-06, |
| "loss": 0.954, |
| "step": 41 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.5941487550735474, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.9346, |
| "step": 42 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 0.5750426650047302, |
| "learning_rate": 2.15e-06, |
| "loss": 0.9388, |
| "step": 43 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.5305467247962952, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9075, |
| "step": 44 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.5384947657585144, |
| "learning_rate": 2.25e-06, |
| "loss": 0.9458, |
| "step": 45 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.5123675465583801, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.9346, |
| "step": 46 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 0.5044260025024414, |
| "learning_rate": 2.35e-06, |
| "loss": 0.9208, |
| "step": 47 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.5064399242401123, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.9298, |
| "step": 48 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 0.5042670369148254, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.9001, |
| "step": 49 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.5186490416526794, |
| "learning_rate": 2.5e-06, |
| "loss": 0.9196, |
| "step": 50 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 0.5051029324531555, |
| "learning_rate": 2.55e-06, |
| "loss": 0.9373, |
| "step": 51 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.4815538227558136, |
| "learning_rate": 2.6e-06, |
| "loss": 0.9183, |
| "step": 52 |
| }, |
| { |
| "epoch": 5.3, |
| "grad_norm": 0.4696069359779358, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.8937, |
| "step": 53 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 0.47198885679244995, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.8954, |
| "step": 54 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.47748100757598877, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.8964, |
| "step": 55 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.4699823558330536, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.886, |
| "step": 56 |
| }, |
| { |
| "epoch": 5.7, |
| "grad_norm": 0.4619591534137726, |
| "learning_rate": 2.85e-06, |
| "loss": 0.8993, |
| "step": 57 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 0.4674915075302124, |
| "learning_rate": 2.9e-06, |
| "loss": 0.8715, |
| "step": 58 |
| }, |
| { |
| "epoch": 5.9, |
| "grad_norm": 0.45391637086868286, |
| "learning_rate": 2.95e-06, |
| "loss": 0.8917, |
| "step": 59 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.46444305777549744, |
| "learning_rate": 3e-06, |
| "loss": 0.882, |
| "step": 60 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 60, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.028918054449316e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|