| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 3006, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0499001996007984, |
| "grad_norm": 3.5520248413085938, |
| "learning_rate": 5.414364640883978e-06, |
| "loss": 0.4617, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0998003992015968, |
| "grad_norm": 4.260124206542969, |
| "learning_rate": 1.0939226519337018e-05, |
| "loss": 0.4357, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1497005988023952, |
| "grad_norm": 2.909259080886841, |
| "learning_rate": 1.6464088397790058e-05, |
| "loss": 0.4064, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1996007984031936, |
| "grad_norm": 1.7840492725372314, |
| "learning_rate": 1.9997996619906375e-05, |
| "loss": 0.4212, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.249500998003992, |
| "grad_norm": 3.765916585922241, |
| "learning_rate": 1.9971421218881642e-05, |
| "loss": 0.4019, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2994011976047904, |
| "grad_norm": 3.896864652633667, |
| "learning_rate": 1.9914024726308284e-05, |
| "loss": 0.3965, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.34930139720558884, |
| "grad_norm": 8.40927505493164, |
| "learning_rate": 1.9825984551455585e-05, |
| "loss": 0.3949, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3992015968063872, |
| "grad_norm": 4.71005916595459, |
| "learning_rate": 1.970757282145864e-05, |
| "loss": 0.4092, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4491017964071856, |
| "grad_norm": 4.222394943237305, |
| "learning_rate": 1.9559155540188965e-05, |
| "loss": 0.3905, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.499001996007984, |
| "grad_norm": 4.025149822235107, |
| "learning_rate": 1.9381191456957516e-05, |
| "loss": 0.3853, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5489021956087824, |
| "grad_norm": 2.301793336868286, |
| "learning_rate": 1.9174230648546855e-05, |
| "loss": 0.3757, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5988023952095808, |
| "grad_norm": 11.234354972839355, |
| "learning_rate": 1.893891281895534e-05, |
| "loss": 0.3907, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6487025948103793, |
| "grad_norm": 4.995813369750977, |
| "learning_rate": 1.8675965322108713e-05, |
| "loss": 0.4029, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6986027944111777, |
| "grad_norm": 3.2409844398498535, |
| "learning_rate": 1.838620091365083e-05, |
| "loss": 0.3961, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7485029940119761, |
| "grad_norm": 3.947786569595337, |
| "learning_rate": 1.80705152387625e-05, |
| "loss": 0.3693, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7984031936127745, |
| "grad_norm": 2.521354913711548, |
| "learning_rate": 1.7729884063773596e-05, |
| "loss": 0.3717, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8483033932135728, |
| "grad_norm": 2.0015060901641846, |
| "learning_rate": 1.7365360260125233e-05, |
| "loss": 0.3771, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8982035928143712, |
| "grad_norm": 2.748753786087036, |
| "learning_rate": 1.697807055000447e-05, |
| "loss": 0.3756, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9481037924151696, |
| "grad_norm": 6.853181838989258, |
| "learning_rate": 1.6569212023710624e-05, |
| "loss": 0.3834, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.998003992015968, |
| "grad_norm": 3.869363307952881, |
| "learning_rate": 1.614004843951774e-05, |
| "loss": 0.3708, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0479041916167664, |
| "grad_norm": 1.820420742034912, |
| "learning_rate": 1.5691906317470182e-05, |
| "loss": 0.3728, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0978043912175648, |
| "grad_norm": 2.278934955596924, |
| "learning_rate": 1.522617083918523e-05, |
| "loss": 0.3675, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1477045908183632, |
| "grad_norm": 4.441009998321533, |
| "learning_rate": 1.4744281566336039e-05, |
| "loss": 0.3524, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1976047904191618, |
| "grad_norm": 3.737205982208252, |
| "learning_rate": 1.4247727991049036e-05, |
| "loss": 0.3407, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.24750499001996, |
| "grad_norm": 6.3948540687561035, |
| "learning_rate": 1.3738044931969103e-05, |
| "loss": 0.3556, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2974051896207586, |
| "grad_norm": 2.192323923110962, |
| "learning_rate": 1.3216807790223108e-05, |
| "loss": 0.359, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.347305389221557, |
| "grad_norm": 3.622485637664795, |
| "learning_rate": 1.2685627679945297e-05, |
| "loss": 0.3594, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.3972055888223553, |
| "grad_norm": 3.1616196632385254, |
| "learning_rate": 1.2146146448415847e-05, |
| "loss": 0.3384, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4471057884231537, |
| "grad_norm": 4.088330268859863, |
| "learning_rate": 1.1600031601205001e-05, |
| "loss": 0.3614, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.4970059880239521, |
| "grad_norm": 4.012702465057373, |
| "learning_rate": 1.1048971148008917e-05, |
| "loss": 0.3435, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5469061876247505, |
| "grad_norm": 4.387750625610352, |
| "learning_rate": 1.0494668385108433e-05, |
| "loss": 0.3495, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.596806387225549, |
| "grad_norm": 4.125097751617432, |
| "learning_rate": 9.938836630577868e-06, |
| "loss": 0.345, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.6467065868263473, |
| "grad_norm": 6.880813121795654, |
| "learning_rate": 9.38319392851706e-06, |
| "loss": 0.3514, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.6966067864271457, |
| "grad_norm": 2.5724949836730957, |
| "learning_rate": 8.829457738675486e-06, |
| "loss": 0.3343, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.746506986027944, |
| "grad_norm": 5.649964332580566, |
| "learning_rate": 8.279339627882612e-06, |
| "loss": 0.3232, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.7964071856287425, |
| "grad_norm": 2.4297564029693604, |
| "learning_rate": 7.734539979692912e-06, |
| "loss": 0.3304, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.846307385229541, |
| "grad_norm": 1.9791656732559204, |
| "learning_rate": 7.196742738597746e-06, |
| "loss": 0.3379, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.8962075848303392, |
| "grad_norm": 6.672551155090332, |
| "learning_rate": 6.667610205049422e-06, |
| "loss": 0.3265, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.9461077844311379, |
| "grad_norm": 3.6431407928466797, |
| "learning_rate": 6.148777897385789e-06, |
| "loss": 0.3374, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.996007984031936, |
| "grad_norm": 4.975691795349121, |
| "learning_rate": 5.641849496536765e-06, |
| "loss": 0.3387, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.0459081836327346, |
| "grad_norm": 3.658121347427368, |
| "learning_rate": 5.148391889138484e-06, |
| "loss": 0.3061, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.095808383233533, |
| "grad_norm": 2.8228607177734375, |
| "learning_rate": 4.6699303243765115e-06, |
| "loss": 0.2963, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.1457085828343314, |
| "grad_norm": 8.923210144042969, |
| "learning_rate": 4.207943699528001e-06, |
| "loss": 0.3079, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.1956087824351296, |
| "grad_norm": 3.0726373195648193, |
| "learning_rate": 3.7638599887749905e-06, |
| "loss": 0.29, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.245508982035928, |
| "grad_norm": 6.0434722900390625, |
| "learning_rate": 3.3390518294180095e-06, |
| "loss": 0.3024, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.2954091816367264, |
| "grad_norm": 4.792275905609131, |
| "learning_rate": 2.9348322791328645e-06, |
| "loss": 0.2805, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.345309381237525, |
| "grad_norm": 4.969878196716309, |
| "learning_rate": 2.5524507573846e-06, |
| "loss": 0.3083, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.3952095808383236, |
| "grad_norm": 4.028615951538086, |
| "learning_rate": 2.1930891835435542e-06, |
| "loss": 0.2911, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.4451097804391217, |
| "grad_norm": 4.579285621643066, |
| "learning_rate": 1.8578583236402904e-06, |
| "loss": 0.2709, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.49500998003992, |
| "grad_norm": 3.3637678623199463, |
| "learning_rate": 1.5477943570514676e-06, |
| "loss": 0.277, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.5449101796407185, |
| "grad_norm": 3.1785693168640137, |
| "learning_rate": 1.26385567372877e-06, |
| "loss": 0.2811, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.594810379241517, |
| "grad_norm": 3.9199390411376953, |
| "learning_rate": 1.006919911870481e-06, |
| "loss": 0.2746, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.6447105788423153, |
| "grad_norm": 3.6984686851501465, |
| "learning_rate": 7.777812451921185e-07, |
| "loss": 0.2761, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.694610778443114, |
| "grad_norm": 4.063042640686035, |
| "learning_rate": 5.771479281809755e-07, |
| "loss": 0.2693, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.744510978043912, |
| "grad_norm": 3.75936222076416, |
| "learning_rate": 4.0564010692210943e-07, |
| "loss": 0.2701, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.7944111776447107, |
| "grad_norm": 4.77657413482666, |
| "learning_rate": 2.6378790226231686e-07, |
| "loss": 0.2812, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.844311377245509, |
| "grad_norm": 4.659968852996826, |
| "learning_rate": 1.5202977123698114e-07, |
| "loss": 0.2647, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.8942115768463075, |
| "grad_norm": 3.1246726512908936, |
| "learning_rate": 7.071115182450294e-08, |
| "loss": 0.2761, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.9441117764471056, |
| "grad_norm": 3.649775505065918, |
| "learning_rate": 2.0083395217306288e-08, |
| "loss": 0.2956, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.9940119760479043, |
| "grad_norm": 4.269359588623047, |
| "learning_rate": 3.0298890970392736e-10, |
| "loss": 0.2726, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 3006, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.5298690157259776e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|