| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.753623188405797, |
| "eval_steps": 500, |
| "global_step": 85, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.057971014492753624, |
| "grad_norm": 5.159448254527082, |
| "learning_rate": 0.0, |
| "loss": 0.7831, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.11594202898550725, |
| "grad_norm": 5.074454618154074, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.7702, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 5.033467750869474, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.7884, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.2318840579710145, |
| "grad_norm": 4.632881551856663, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.7716, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "grad_norm": 3.315176232308323, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.723, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 2.394181259514717, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.6995, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.4057971014492754, |
| "grad_norm": 3.7004540250844045, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.6504, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.463768115942029, |
| "grad_norm": 3.9537411155858577, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.6537, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 3.365368057453053, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.5975, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "grad_norm": 3.1403485995161238, |
| "learning_rate": 1e-05, |
| "loss": 0.6283, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6376811594202898, |
| "grad_norm": 2.4878800967156245, |
| "learning_rate": 9.995728791936505e-06, |
| "loss": 0.5365, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 1.571307648439155, |
| "learning_rate": 9.98292246503335e-06, |
| "loss": 0.4818, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.7536231884057971, |
| "grad_norm": 1.3929267598847173, |
| "learning_rate": 9.961602898685225e-06, |
| "loss": 0.5166, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.8115942028985508, |
| "grad_norm": 1.6289303587249693, |
| "learning_rate": 9.931806517013612e-06, |
| "loss": 0.5384, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 1.412181464381996, |
| "learning_rate": 9.893584226636773e-06, |
| "loss": 0.4933, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "grad_norm": 1.444951103218855, |
| "learning_rate": 9.847001329696653e-06, |
| "loss": 0.5236, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.9855072463768116, |
| "grad_norm": 1.3616316085764364, |
| "learning_rate": 9.792137412291265e-06, |
| "loss": 0.5006, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.3616316085764364, |
| "learning_rate": 9.729086208503174e-06, |
| "loss": 0.4813, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.0579710144927537, |
| "grad_norm": 2.1567352869229954, |
| "learning_rate": 9.657955440256396e-06, |
| "loss": 0.4481, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.1159420289855073, |
| "grad_norm": 1.2546916585964516, |
| "learning_rate": 9.578866633275289e-06, |
| "loss": 0.4735, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.1739130434782608, |
| "grad_norm": 1.1045035898812048, |
| "learning_rate": 9.491954909459895e-06, |
| "loss": 0.4139, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.2318840579710144, |
| "grad_norm": 1.0382597534226017, |
| "learning_rate": 9.397368756032445e-06, |
| "loss": 0.445, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.289855072463768, |
| "grad_norm": 1.269533585653712, |
| "learning_rate": 9.295269771849426e-06, |
| "loss": 0.4651, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.3478260869565217, |
| "grad_norm": 1.0434296813168673, |
| "learning_rate": 9.185832391312644e-06, |
| "loss": 0.458, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.4057971014492754, |
| "grad_norm": 1.1217463005684392, |
| "learning_rate": 9.069243586350976e-06, |
| "loss": 0.4376, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.463768115942029, |
| "grad_norm": 1.1702630509813354, |
| "learning_rate": 8.94570254698197e-06, |
| "loss": 0.4637, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 0.9905456980422715, |
| "learning_rate": 8.815420340999034e-06, |
| "loss": 0.4186, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.5797101449275361, |
| "grad_norm": 1.140388928402416, |
| "learning_rate": 8.67861955336566e-06, |
| "loss": 0.4851, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.6376811594202898, |
| "grad_norm": 1.1003593567236911, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.4158, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 1.0323183154161821, |
| "learning_rate": 8.386407858128707e-06, |
| "loss": 0.4571, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.7536231884057971, |
| "grad_norm": 0.9652171295110353, |
| "learning_rate": 8.231496189304704e-06, |
| "loss": 0.4343, |
| "step": 31 |
| }, |
| { |
| "epoch": 1.8115942028985508, |
| "grad_norm": 1.0411121822811822, |
| "learning_rate": 8.071063563448341e-06, |
| "loss": 0.4371, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.8695652173913042, |
| "grad_norm": 0.9087117048299469, |
| "learning_rate": 7.905384077009693e-06, |
| "loss": 0.3843, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.927536231884058, |
| "grad_norm": 0.8945258612409986, |
| "learning_rate": 7.734740790612137e-06, |
| "loss": 0.4238, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.9855072463768115, |
| "grad_norm": 0.9072287658607223, |
| "learning_rate": 7.559425245448006e-06, |
| "loss": 0.4239, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9072287658607223, |
| "learning_rate": 7.379736965185369e-06, |
| "loss": 0.3879, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.0579710144927534, |
| "grad_norm": 1.8075291280487968, |
| "learning_rate": 7.195982944236853e-06, |
| "loss": 0.4129, |
| "step": 37 |
| }, |
| { |
| "epoch": 2.1159420289855073, |
| "grad_norm": 1.0080140091271943, |
| "learning_rate": 7.008477123264849e-06, |
| "loss": 0.3956, |
| "step": 38 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 0.897311897179687, |
| "learning_rate": 6.817539852819149e-06, |
| "loss": 0.3859, |
| "step": 39 |
| }, |
| { |
| "epoch": 2.2318840579710146, |
| "grad_norm": 0.8980290567272365, |
| "learning_rate": 6.6234973460234184e-06, |
| "loss": 0.3878, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.289855072463768, |
| "grad_norm": 0.9297885347982958, |
| "learning_rate": 6.426681121245527e-06, |
| "loss": 0.3511, |
| "step": 41 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 0.8239294623215564, |
| "learning_rate": 6.227427435703997e-06, |
| "loss": 0.4005, |
| "step": 42 |
| }, |
| { |
| "epoch": 2.4057971014492754, |
| "grad_norm": 7.099470282692103, |
| "learning_rate": 6.026076710978172e-06, |
| "loss": 0.3572, |
| "step": 43 |
| }, |
| { |
| "epoch": 2.463768115942029, |
| "grad_norm": 0.9621872386993758, |
| "learning_rate": 5.82297295140367e-06, |
| "loss": 0.4079, |
| "step": 44 |
| }, |
| { |
| "epoch": 2.5217391304347827, |
| "grad_norm": 0.8853307030646145, |
| "learning_rate": 5.61846315634674e-06, |
| "loss": 0.3506, |
| "step": 45 |
| }, |
| { |
| "epoch": 2.579710144927536, |
| "grad_norm": 0.8876519887284963, |
| "learning_rate": 5.412896727361663e-06, |
| "loss": 0.3726, |
| "step": 46 |
| }, |
| { |
| "epoch": 2.63768115942029, |
| "grad_norm": 0.9042713927633073, |
| "learning_rate": 5.206624871244066e-06, |
| "loss": 0.369, |
| "step": 47 |
| }, |
| { |
| "epoch": 2.6956521739130435, |
| "grad_norm": 0.8360870067501421, |
| "learning_rate": 5e-06, |
| "loss": 0.3272, |
| "step": 48 |
| }, |
| { |
| "epoch": 2.753623188405797, |
| "grad_norm": 0.7654639929708382, |
| "learning_rate": 4.793375128755934e-06, |
| "loss": 0.3464, |
| "step": 49 |
| }, |
| { |
| "epoch": 2.8115942028985508, |
| "grad_norm": 0.9559519589621139, |
| "learning_rate": 4.587103272638339e-06, |
| "loss": 0.3792, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.869565217391304, |
| "grad_norm": 0.912171268081883, |
| "learning_rate": 4.381536843653262e-06, |
| "loss": 0.3639, |
| "step": 51 |
| }, |
| { |
| "epoch": 2.927536231884058, |
| "grad_norm": 0.8016958636807102, |
| "learning_rate": 4.17702704859633e-06, |
| "loss": 0.3497, |
| "step": 52 |
| }, |
| { |
| "epoch": 2.9855072463768115, |
| "grad_norm": 0.9037924330632935, |
| "learning_rate": 3.973923289021829e-06, |
| "loss": 0.3819, |
| "step": 53 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.9037924330632935, |
| "learning_rate": 3.7725725642960047e-06, |
| "loss": 0.3725, |
| "step": 54 |
| }, |
| { |
| "epoch": 3.0579710144927534, |
| "grad_norm": 1.7435467163889906, |
| "learning_rate": 3.573318878754475e-06, |
| "loss": 0.3339, |
| "step": 55 |
| }, |
| { |
| "epoch": 3.1159420289855073, |
| "grad_norm": 0.8639273215194021, |
| "learning_rate": 3.3765026539765832e-06, |
| "loss": 0.3282, |
| "step": 56 |
| }, |
| { |
| "epoch": 3.1739130434782608, |
| "grad_norm": 0.8620194795220659, |
| "learning_rate": 3.1824601471808504e-06, |
| "loss": 0.3771, |
| "step": 57 |
| }, |
| { |
| "epoch": 3.2318840579710146, |
| "grad_norm": 0.818398812959851, |
| "learning_rate": 2.991522876735154e-06, |
| "loss": 0.32, |
| "step": 58 |
| }, |
| { |
| "epoch": 3.289855072463768, |
| "grad_norm": 0.7961553960544394, |
| "learning_rate": 2.804017055763149e-06, |
| "loss": 0.3354, |
| "step": 59 |
| }, |
| { |
| "epoch": 3.3478260869565215, |
| "grad_norm": 0.8313300023912036, |
| "learning_rate": 2.6202630348146323e-06, |
| "loss": 0.3086, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.4057971014492754, |
| "grad_norm": 0.7966466657831824, |
| "learning_rate": 2.4405747545519966e-06, |
| "loss": 0.3461, |
| "step": 61 |
| }, |
| { |
| "epoch": 3.463768115942029, |
| "grad_norm": 0.8990785358269929, |
| "learning_rate": 2.265259209387867e-06, |
| "loss": 0.3419, |
| "step": 62 |
| }, |
| { |
| "epoch": 3.5217391304347827, |
| "grad_norm": 0.8214296109986554, |
| "learning_rate": 2.094615922990309e-06, |
| "loss": 0.3228, |
| "step": 63 |
| }, |
| { |
| "epoch": 3.579710144927536, |
| "grad_norm": 0.7687607558338363, |
| "learning_rate": 1.928936436551661e-06, |
| "loss": 0.3341, |
| "step": 64 |
| }, |
| { |
| "epoch": 3.63768115942029, |
| "grad_norm": 0.8107501089319402, |
| "learning_rate": 1.7685038106952952e-06, |
| "loss": 0.3141, |
| "step": 65 |
| }, |
| { |
| "epoch": 3.6956521739130435, |
| "grad_norm": 0.8005672821108396, |
| "learning_rate": 1.6135921418712959e-06, |
| "loss": 0.3064, |
| "step": 66 |
| }, |
| { |
| "epoch": 3.753623188405797, |
| "grad_norm": 0.7178076901315711, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.2989, |
| "step": 67 |
| }, |
| { |
| "epoch": 3.8115942028985508, |
| "grad_norm": 0.8662218799990845, |
| "learning_rate": 1.321380446634342e-06, |
| "loss": 0.3212, |
| "step": 68 |
| }, |
| { |
| "epoch": 3.869565217391304, |
| "grad_norm": 0.7898861729963894, |
| "learning_rate": 1.1845796590009684e-06, |
| "loss": 0.3296, |
| "step": 69 |
| }, |
| { |
| "epoch": 3.927536231884058, |
| "grad_norm": 0.7743651242946709, |
| "learning_rate": 1.0542974530180327e-06, |
| "loss": 0.3341, |
| "step": 70 |
| }, |
| { |
| "epoch": 3.9855072463768115, |
| "grad_norm": 0.7837882634849173, |
| "learning_rate": 9.307564136490255e-07, |
| "loss": 0.3316, |
| "step": 71 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.648891461040096, |
| "learning_rate": 8.141676086873574e-07, |
| "loss": 0.333, |
| "step": 72 |
| }, |
| { |
| "epoch": 4.057971014492754, |
| "grad_norm": 0.7683801753309785, |
| "learning_rate": 7.047302281505735e-07, |
| "loss": 0.3209, |
| "step": 73 |
| }, |
| { |
| "epoch": 4.115942028985507, |
| "grad_norm": 0.7457543082443584, |
| "learning_rate": 6.026312439675553e-07, |
| "loss": 0.31, |
| "step": 74 |
| }, |
| { |
| "epoch": 4.173913043478261, |
| "grad_norm": 0.6915197851906805, |
| "learning_rate": 5.080450905401057e-07, |
| "loss": 0.2763, |
| "step": 75 |
| }, |
| { |
| "epoch": 4.231884057971015, |
| "grad_norm": 0.7772721453876562, |
| "learning_rate": 4.211333667247125e-07, |
| "loss": 0.3156, |
| "step": 76 |
| }, |
| { |
| "epoch": 4.2898550724637685, |
| "grad_norm": 0.6330066051722274, |
| "learning_rate": 3.420445597436056e-07, |
| "loss": 0.2816, |
| "step": 77 |
| }, |
| { |
| "epoch": 4.3478260869565215, |
| "grad_norm": 0.7025563271833313, |
| "learning_rate": 2.7091379149682683e-07, |
| "loss": 0.2933, |
| "step": 78 |
| }, |
| { |
| "epoch": 4.405797101449275, |
| "grad_norm": 0.7797377278716924, |
| "learning_rate": 2.0786258770873647e-07, |
| "loss": 0.3372, |
| "step": 79 |
| }, |
| { |
| "epoch": 4.463768115942029, |
| "grad_norm": 0.6949232577970824, |
| "learning_rate": 1.5299867030334815e-07, |
| "loss": 0.2892, |
| "step": 80 |
| }, |
| { |
| "epoch": 4.521739130434782, |
| "grad_norm": 0.712480327775005, |
| "learning_rate": 1.0641577336322761e-07, |
| "loss": 0.3173, |
| "step": 81 |
| }, |
| { |
| "epoch": 4.579710144927536, |
| "grad_norm": 0.7367737093031481, |
| "learning_rate": 6.819348298638839e-08, |
| "loss": 0.3027, |
| "step": 82 |
| }, |
| { |
| "epoch": 4.63768115942029, |
| "grad_norm": 0.68317505546867, |
| "learning_rate": 3.839710131477492e-08, |
| "loss": 0.295, |
| "step": 83 |
| }, |
| { |
| "epoch": 4.695652173913043, |
| "grad_norm": 0.7210852567246594, |
| "learning_rate": 1.7077534966650767e-08, |
| "loss": 0.3287, |
| "step": 84 |
| }, |
| { |
| "epoch": 4.753623188405797, |
| "grad_norm": 0.7796588531860184, |
| "learning_rate": 4.2712080634949024e-09, |
| "loss": 0.3023, |
| "step": 85 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 85, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 17, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 12373272117248.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|