| { |
| "best_metric": 0.8936182856559753, |
| "best_model_checkpoint": "./outputs/public-irc-mistral-24b/checkpoint-48", |
| "epoch": 2.974619289340102, |
| "eval_steps": 500, |
| "global_step": 72, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04060913705583756, |
| "grad_norm": 3.373054027557373, |
| "learning_rate": 2e-05, |
| "loss": 1.2957, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.08121827411167512, |
| "grad_norm": 3.194347381591797, |
| "learning_rate": 4e-05, |
| "loss": 1.3221, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.1218274111675127, |
| "grad_norm": 1.2986558675765991, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 1.2683, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.16243654822335024, |
| "grad_norm": 1.1605945825576782, |
| "learning_rate": 8e-05, |
| "loss": 1.1636, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.20304568527918782, |
| "grad_norm": 0.811213493347168, |
| "learning_rate": 7.997668089464696e-05, |
| "loss": 1.0964, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.2436548223350254, |
| "grad_norm": 0.7070867419242859, |
| "learning_rate": 7.990675076762158e-05, |
| "loss": 1.0897, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.28426395939086296, |
| "grad_norm": 0.674956738948822, |
| "learning_rate": 7.97902911543238e-05, |
| "loss": 1.0602, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.3248730964467005, |
| "grad_norm": 0.6653350591659546, |
| "learning_rate": 7.962743784145323e-05, |
| "loss": 1.0097, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.36548223350253806, |
| "grad_norm": 0.6503349542617798, |
| "learning_rate": 7.941838070868787e-05, |
| "loss": 1.0102, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.40609137055837563, |
| "grad_norm": 0.49681031703948975, |
| "learning_rate": 7.916336350729293e-05, |
| "loss": 1.0227, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.4467005076142132, |
| "grad_norm": 0.5602617263793945, |
| "learning_rate": 7.886268357591766e-05, |
| "loss": 0.9935, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.4873096446700508, |
| "grad_norm": 0.48682689666748047, |
| "learning_rate": 7.851669149391198e-05, |
| "loss": 0.9811, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.5279187817258884, |
| "grad_norm": 0.5210645198822021, |
| "learning_rate": 7.812579067256644e-05, |
| "loss": 0.9828, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.5685279187817259, |
| "grad_norm": 0.46042340993881226, |
| "learning_rate": 7.769043688475283e-05, |
| "loss": 0.9629, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.6091370558375635, |
| "grad_norm": 0.4750231206417084, |
| "learning_rate": 7.721113773351333e-05, |
| "loss": 0.9599, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.649746192893401, |
| "grad_norm": 0.44704967737197876, |
| "learning_rate": 7.668845206021812e-05, |
| "loss": 0.9417, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.6903553299492385, |
| "grad_norm": 0.45399850606918335, |
| "learning_rate": 7.612298929298132e-05, |
| "loss": 0.9442, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.7309644670050761, |
| "grad_norm": 0.48387065529823303, |
| "learning_rate": 7.551540873609502e-05, |
| "loss": 0.9388, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.7715736040609137, |
| "grad_norm": 0.4469858705997467, |
| "learning_rate": 7.486641880131006e-05, |
| "loss": 0.9357, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.8121827411167513, |
| "grad_norm": 0.4322652816772461, |
| "learning_rate": 7.417677618185955e-05, |
| "loss": 0.9191, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.8527918781725888, |
| "grad_norm": 0.4220181703567505, |
| "learning_rate": 7.344728497018844e-05, |
| "loss": 0.9269, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.8934010152284264, |
| "grad_norm": 0.4223184287548065, |
| "learning_rate": 7.267879572041768e-05, |
| "loss": 0.9092, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.934010152284264, |
| "grad_norm": 0.5329856276512146, |
| "learning_rate": 7.187220445663618e-05, |
| "loss": 0.8954, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.9746192893401016, |
| "grad_norm": 0.504860520362854, |
| "learning_rate": 7.10284516281768e-05, |
| "loss": 0.9145, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.9746192893401016, |
| "eval_loss": 0.9128336310386658, |
| "eval_runtime": 89.3062, |
| "eval_samples_per_second": 0.84, |
| "eval_steps_per_second": 0.426, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.0406091370558375, |
| "grad_norm": 1.4868385791778564, |
| "learning_rate": 7.014852101309445e-05, |
| "loss": 1.6092, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.0812182741116751, |
| "grad_norm": 0.43014273047447205, |
| "learning_rate": 6.923343857112497e-05, |
| "loss": 0.7399, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.1218274111675126, |
| "grad_norm": 0.4565065801143646, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.7567, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.1624365482233503, |
| "grad_norm": 0.511966347694397, |
| "learning_rate": 6.730212572874618e-05, |
| "loss": 0.7053, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.2030456852791878, |
| "grad_norm": 0.4735160171985626, |
| "learning_rate": 6.628814715271891e-05, |
| "loss": 0.726, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.2436548223350254, |
| "grad_norm": 0.5390828251838684, |
| "learning_rate": 6.524351777304212e-05, |
| "loss": 0.7107, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.284263959390863, |
| "grad_norm": 0.45148932933807373, |
| "learning_rate": 6.416945558084379e-05, |
| "loss": 0.7038, |
| "step": 31 |
| }, |
| { |
| "epoch": 1.3248730964467006, |
| "grad_norm": 0.4815531075000763, |
| "learning_rate": 6.30672128845947e-05, |
| "loss": 0.6963, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.365482233502538, |
| "grad_norm": 0.47509998083114624, |
| "learning_rate": 6.193807484997275e-05, |
| "loss": 0.7048, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.4060913705583755, |
| "grad_norm": 0.4874464273452759, |
| "learning_rate": 6.078335800141735e-05, |
| "loss": 0.7139, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.4467005076142132, |
| "grad_norm": 0.46367257833480835, |
| "learning_rate": 5.96044086871207e-05, |
| "loss": 0.6811, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.487309644670051, |
| "grad_norm": 0.44525542855262756, |
| "learning_rate": 5.840260150924609e-05, |
| "loss": 0.6935, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.5279187817258884, |
| "grad_norm": 0.4398341774940491, |
| "learning_rate": 5.717933772120329e-05, |
| "loss": 0.7027, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.5685279187817258, |
| "grad_norm": 0.42870181798934937, |
| "learning_rate": 5.593604359384967e-05, |
| "loss": 0.6933, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.6091370558375635, |
| "grad_norm": 0.4329513609409332, |
| "learning_rate": 5.467416875252227e-05, |
| "loss": 0.6596, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.649746192893401, |
| "grad_norm": 0.44533371925354004, |
| "learning_rate": 5.339518448683945e-05, |
| "loss": 0.6675, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.6903553299492384, |
| "grad_norm": 0.45296040177345276, |
| "learning_rate": 5.210058203524304e-05, |
| "loss": 0.6794, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.7309644670050761, |
| "grad_norm": 0.46650972962379456, |
| "learning_rate": 5.0791870846280974e-05, |
| "loss": 0.6615, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.7715736040609138, |
| "grad_norm": 0.43552765250205994, |
| "learning_rate": 4.9470576818657873e-05, |
| "loss": 0.6594, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.8121827411167513, |
| "grad_norm": 0.44811365008354187, |
| "learning_rate": 4.8138240522105365e-05, |
| "loss": 0.6609, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.8527918781725887, |
| "grad_norm": 0.45581039786338806, |
| "learning_rate": 4.679641540114667e-05, |
| "loss": 0.6727, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.8934010152284264, |
| "grad_norm": 0.4468774199485779, |
| "learning_rate": 4.5446665963849874e-05, |
| "loss": 0.6528, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.934010152284264, |
| "grad_norm": 0.44559335708618164, |
| "learning_rate": 4.409056595768137e-05, |
| "loss": 0.6722, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.9746192893401016, |
| "grad_norm": 0.48416054248809814, |
| "learning_rate": 4.272969653458685e-05, |
| "loss": 0.6565, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.9746192893401016, |
| "eval_loss": 0.8936182856559753, |
| "eval_runtime": 89.4083, |
| "eval_samples_per_second": 0.839, |
| "eval_steps_per_second": 0.425, |
| "step": 48 |
| }, |
| { |
| "epoch": 2.0406091370558377, |
| "grad_norm": 1.8757866621017456, |
| "learning_rate": 4.136564440743872e-05, |
| "loss": 1.1625, |
| "step": 49 |
| }, |
| { |
| "epoch": 2.081218274111675, |
| "grad_norm": 0.5622301697731018, |
| "learning_rate": 4e-05, |
| "loss": 0.5237, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.1218274111675126, |
| "grad_norm": 0.4795697331428528, |
| "learning_rate": 3.8634355592561286e-05, |
| "loss": 0.5147, |
| "step": 51 |
| }, |
| { |
| "epoch": 2.1624365482233503, |
| "grad_norm": 0.9597147703170776, |
| "learning_rate": 3.727030346541317e-05, |
| "loss": 0.5149, |
| "step": 52 |
| }, |
| { |
| "epoch": 2.203045685279188, |
| "grad_norm": 0.629220187664032, |
| "learning_rate": 3.590943404231863e-05, |
| "loss": 0.4905, |
| "step": 53 |
| }, |
| { |
| "epoch": 2.2436548223350252, |
| "grad_norm": 0.46845948696136475, |
| "learning_rate": 3.4553334036150146e-05, |
| "loss": 0.488, |
| "step": 54 |
| }, |
| { |
| "epoch": 2.284263959390863, |
| "grad_norm": 0.502011239528656, |
| "learning_rate": 3.3203584598853335e-05, |
| "loss": 0.4628, |
| "step": 55 |
| }, |
| { |
| "epoch": 2.3248730964467006, |
| "grad_norm": 0.5105593800544739, |
| "learning_rate": 3.1861759477894656e-05, |
| "loss": 0.4941, |
| "step": 56 |
| }, |
| { |
| "epoch": 2.3654822335025383, |
| "grad_norm": 0.48986580967903137, |
| "learning_rate": 3.052942318134213e-05, |
| "loss": 0.4881, |
| "step": 57 |
| }, |
| { |
| "epoch": 2.4060913705583755, |
| "grad_norm": 0.5030830502510071, |
| "learning_rate": 2.9208129153719026e-05, |
| "loss": 0.4953, |
| "step": 58 |
| }, |
| { |
| "epoch": 2.446700507614213, |
| "grad_norm": 0.47233346104621887, |
| "learning_rate": 2.7899417964756973e-05, |
| "loss": 0.4572, |
| "step": 59 |
| }, |
| { |
| "epoch": 2.487309644670051, |
| "grad_norm": 0.49828535318374634, |
| "learning_rate": 2.6604815513160556e-05, |
| "loss": 0.472, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.527918781725888, |
| "grad_norm": 0.4967016577720642, |
| "learning_rate": 2.5325831247477747e-05, |
| "loss": 0.4835, |
| "step": 61 |
| }, |
| { |
| "epoch": 2.568527918781726, |
| "grad_norm": 0.5143364667892456, |
| "learning_rate": 2.4063956406150345e-05, |
| "loss": 0.4703, |
| "step": 62 |
| }, |
| { |
| "epoch": 2.6091370558375635, |
| "grad_norm": 0.5132246613502502, |
| "learning_rate": 2.282066227879673e-05, |
| "loss": 0.4804, |
| "step": 63 |
| }, |
| { |
| "epoch": 2.649746192893401, |
| "grad_norm": 0.4817405641078949, |
| "learning_rate": 2.1597398490753917e-05, |
| "loss": 0.4722, |
| "step": 64 |
| }, |
| { |
| "epoch": 2.6903553299492384, |
| "grad_norm": 0.4971908926963806, |
| "learning_rate": 2.0395591312879324e-05, |
| "loss": 0.4817, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.730964467005076, |
| "grad_norm": 0.48459553718566895, |
| "learning_rate": 1.9216641998582666e-05, |
| "loss": 0.4628, |
| "step": 66 |
| }, |
| { |
| "epoch": 2.771573604060914, |
| "grad_norm": 0.4817572832107544, |
| "learning_rate": 1.8061925150027244e-05, |
| "loss": 0.4633, |
| "step": 67 |
| }, |
| { |
| "epoch": 2.812182741116751, |
| "grad_norm": 0.4870697855949402, |
| "learning_rate": 1.6932787115405318e-05, |
| "loss": 0.4683, |
| "step": 68 |
| }, |
| { |
| "epoch": 2.8527918781725887, |
| "grad_norm": 0.49406981468200684, |
| "learning_rate": 1.5830544419156223e-05, |
| "loss": 0.4738, |
| "step": 69 |
| }, |
| { |
| "epoch": 2.8934010152284264, |
| "grad_norm": 0.4873085021972656, |
| "learning_rate": 1.47564822269579e-05, |
| "loss": 0.4624, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.934010152284264, |
| "grad_norm": 0.851365864276886, |
| "learning_rate": 1.3711852847281098e-05, |
| "loss": 0.4654, |
| "step": 71 |
| }, |
| { |
| "epoch": 2.974619289340102, |
| "grad_norm": 0.4922949969768524, |
| "learning_rate": 1.2697874271253844e-05, |
| "loss": 0.4671, |
| "step": 72 |
| }, |
| { |
| "epoch": 2.974619289340102, |
| "eval_loss": 0.9503005743026733, |
| "eval_runtime": 89.423, |
| "eval_samples_per_second": 0.839, |
| "eval_steps_per_second": 0.425, |
| "step": 72 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 96, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.342087885624443e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|