| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 10000, |
| "global_step": 735, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13646055437100213, |
| "grad_norm": 0.14500385522842407, |
| "learning_rate": 0.00011399999999999999, |
| "loss": 0.1268, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.27292110874200426, |
| "grad_norm": 0.06265528500080109, |
| "learning_rate": 0.000234, |
| "loss": 0.0282, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4093816631130064, |
| "grad_norm": 0.03309512510895729, |
| "learning_rate": 0.00029987223755234907, |
| "loss": 0.0157, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5458422174840085, |
| "grad_norm": 0.03388677537441254, |
| "learning_rate": 0.00029867524500941253, |
| "loss": 0.0125, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6823027718550106, |
| "grad_norm": 0.038252197206020355, |
| "learning_rate": 0.00029622824461983995, |
| "loss": 0.0128, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8187633262260128, |
| "grad_norm": 0.06330293416976929, |
| "learning_rate": 0.00029255180988050044, |
| "loss": 0.0112, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9552238805970149, |
| "grad_norm": 0.03660163655877113, |
| "learning_rate": 0.0002876768509289324, |
| "loss": 0.0106, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0886993603411514, |
| "grad_norm": 0.028105631470680237, |
| "learning_rate": 0.0002816443546620542, |
| "loss": 0.0085, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2251599147121535, |
| "grad_norm": 0.04313601925969124, |
| "learning_rate": 0.00027450504013311436, |
| "loss": 0.007, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3616204690831557, |
| "grad_norm": 0.03731823340058327, |
| "learning_rate": 0.00026631893212418224, |
| "loss": 0.0056, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4980810234541577, |
| "grad_norm": 0.02299409918487072, |
| "learning_rate": 0.00025715485647942525, |
| "loss": 0.0075, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.63454157782516, |
| "grad_norm": 0.03759332001209259, |
| "learning_rate": 0.00024708986144223035, |
| "loss": 0.0063, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.7710021321961622, |
| "grad_norm": 0.013533813878893852, |
| "learning_rate": 0.00023620856986135804, |
| "loss": 0.0052, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.9074626865671642, |
| "grad_norm": 0.0647633746266365, |
| "learning_rate": 0.00022460246771254522, |
| "loss": 0.0045, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.0409381663113004, |
| "grad_norm": 0.022289317101240158, |
| "learning_rate": 0.0002123691349174121, |
| "loss": 0.003, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.177398720682303, |
| "grad_norm": 0.040852464735507965, |
| "learning_rate": 0.00019961142492666903, |
| "loss": 0.0028, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.313859275053305, |
| "grad_norm": 0.04412081092596054, |
| "learning_rate": 0.00018643659996539272, |
| "loss": 0.0029, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.450319829424307, |
| "grad_norm": 0.047590937465429306, |
| "learning_rate": 0.00017295542921091727, |
| "loss": 0.0025, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.5867803837953094, |
| "grad_norm": 0.032162390649318695, |
| "learning_rate": 0.00015928125748553563, |
| "loss": 0.002, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.7232409381663114, |
| "grad_norm": 0.02574550174176693, |
| "learning_rate": 0.00014552905229410626, |
| "loss": 0.0014, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.8597014925373134, |
| "grad_norm": 0.014707539230585098, |
| "learning_rate": 0.000131814437218731, |
| "loss": 0.0012, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.9961620469083154, |
| "grad_norm": 0.04477572441101074, |
| "learning_rate": 0.0001182527197973709, |
| "loss": 0.0011, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.129637526652452, |
| "grad_norm": 0.012851215898990631, |
| "learning_rate": 0.00010495792205964832, |
| "loss": 0.0008, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.266098081023454, |
| "grad_norm": 0.02783357724547386, |
| "learning_rate": 9.204182187073868e-05, |
| "loss": 0.0007, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.402558635394456, |
| "grad_norm": 0.004735818598419428, |
| "learning_rate": 7.961301314338808e-05, |
| "loss": 0.0004, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.539019189765458, |
| "grad_norm": 0.003670594422146678, |
| "learning_rate": 6.777599281945507e-05, |
| "loss": 0.0004, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.6754797441364606, |
| "grad_norm": 0.013839378952980042, |
| "learning_rate": 5.66302822973053e-05, |
| "loss": 0.0004, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.8119402985074626, |
| "grad_norm": 0.00302703189663589, |
| "learning_rate": 4.626959069178253e-05, |
| "loss": 0.0004, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.948400852878465, |
| "grad_norm": 0.026532089337706566, |
| "learning_rate": 3.6781026961763353e-05, |
| "loss": 0.0004, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.081876332622601, |
| "grad_norm": 0.001038851565681398, |
| "learning_rate": 2.8244367529442822e-05, |
| "loss": 0.0002, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.218336886993604, |
| "grad_norm": 0.0008404534310102463, |
| "learning_rate": 2.0731385548944725e-05, |
| "loss": 0.0002, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.354797441364606, |
| "grad_norm": 0.0016892015701159835, |
| "learning_rate": 1.4305247463523778e-05, |
| "loss": 0.0002, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.491257995735608, |
| "grad_norm": 0.002141030738130212, |
| "learning_rate": 9.019981924888797e-06, |
| "loss": 0.0001, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.62771855010661, |
| "grad_norm": 0.0017192725790664554, |
| "learning_rate": 4.920025539782397e-06, |
| "loss": 0.0002, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.764179104477612, |
| "grad_norm": 0.002778939437121153, |
| "learning_rate": 2.0398492630157303e-06, |
| "loss": 0.0002, |
| "step": 700 |
| }, |
| { |
| "epoch": 4.900639658848614, |
| "grad_norm": 0.002974987495690584, |
| "learning_rate": 4.036685781107329e-07, |
| "loss": 0.0001, |
| "step": 720 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 735, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8319121075920896e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|