| { |
| "best_global_step": 60006, |
| "best_metric": 0.7708992224677207, |
| "best_model_checkpoint": "./nvidia_domain_model_multilingual-e5-small/checkpoint-60006", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 60006, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.024997500249975, |
| "grad_norm": 7.94265604019165, |
| "learning_rate": 4.9584208245842084e-05, |
| "loss": 2.602, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04999500049995, |
| "grad_norm": 5.469175815582275, |
| "learning_rate": 4.9167583241675834e-05, |
| "loss": 1.8965, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.074992500749925, |
| "grad_norm": 7.713351726531982, |
| "learning_rate": 4.875095823750958e-05, |
| "loss": 1.604, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0999900009999, |
| "grad_norm": 11.510587692260742, |
| "learning_rate": 4.833433323334334e-05, |
| "loss": 1.3957, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12498750124987501, |
| "grad_norm": 12.626413345336914, |
| "learning_rate": 4.791770822917708e-05, |
| "loss": 1.322, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14998500149985, |
| "grad_norm": 7.150252342224121, |
| "learning_rate": 4.750108322501083e-05, |
| "loss": 1.2218, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17498250174982502, |
| "grad_norm": 16.782085418701172, |
| "learning_rate": 4.708445822084459e-05, |
| "loss": 1.195, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.1999800019998, |
| "grad_norm": 13.529509544372559, |
| "learning_rate": 4.666783321667834e-05, |
| "loss": 1.1313, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22497750224977503, |
| "grad_norm": 19.662353515625, |
| "learning_rate": 4.625120821251208e-05, |
| "loss": 1.0902, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.24997500249975002, |
| "grad_norm": 11.194819450378418, |
| "learning_rate": 4.5834583208345836e-05, |
| "loss": 1.0637, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.274972502749725, |
| "grad_norm": 13.16511058807373, |
| "learning_rate": 4.5417958204179585e-05, |
| "loss": 1.0626, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.2999700029997, |
| "grad_norm": 9.290426254272461, |
| "learning_rate": 4.5001333200013335e-05, |
| "loss": 1.0054, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.32496750324967505, |
| "grad_norm": 17.698017120361328, |
| "learning_rate": 4.4584708195847084e-05, |
| "loss": 1.0253, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.34996500349965004, |
| "grad_norm": 15.605792999267578, |
| "learning_rate": 4.4168083191680834e-05, |
| "loss": 1.0127, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.37496250374962503, |
| "grad_norm": 33.41305923461914, |
| "learning_rate": 4.375145818751458e-05, |
| "loss": 0.9714, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.3999600039996, |
| "grad_norm": 18.213973999023438, |
| "learning_rate": 4.333483318334833e-05, |
| "loss": 0.9589, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.42495750424957507, |
| "grad_norm": 11.406991958618164, |
| "learning_rate": 4.291820817918208e-05, |
| "loss": 0.9808, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.44995500449955006, |
| "grad_norm": 15.420747756958008, |
| "learning_rate": 4.250158317501584e-05, |
| "loss": 0.9392, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.47495250474952505, |
| "grad_norm": 19.129817962646484, |
| "learning_rate": 4.208495817084958e-05, |
| "loss": 0.9304, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.49995000499950004, |
| "grad_norm": 9.371217727661133, |
| "learning_rate": 4.166833316668333e-05, |
| "loss": 0.9369, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5249475052494751, |
| "grad_norm": 11.86233901977539, |
| "learning_rate": 4.1251708162517086e-05, |
| "loss": 0.9181, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.54994500549945, |
| "grad_norm": 16.078561782836914, |
| "learning_rate": 4.0835083158350836e-05, |
| "loss": 0.8996, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5749425057494251, |
| "grad_norm": 8.514225006103516, |
| "learning_rate": 4.0418458154184585e-05, |
| "loss": 0.9111, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.5999400059994, |
| "grad_norm": 7.778424263000488, |
| "learning_rate": 4.000183315001833e-05, |
| "loss": 0.9033, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.624937506249375, |
| "grad_norm": 10.383719444274902, |
| "learning_rate": 3.9585208145852084e-05, |
| "loss": 0.917, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.6499350064993501, |
| "grad_norm": 12.048624992370605, |
| "learning_rate": 3.9168583141685834e-05, |
| "loss": 0.8872, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.674932506749325, |
| "grad_norm": 14.255531311035156, |
| "learning_rate": 3.875195813751958e-05, |
| "loss": 0.8604, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6999300069993001, |
| "grad_norm": 15.18703556060791, |
| "learning_rate": 3.833533313335333e-05, |
| "loss": 0.8628, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7249275072492751, |
| "grad_norm": 12.154521942138672, |
| "learning_rate": 3.791870812918708e-05, |
| "loss": 0.8929, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7499250074992501, |
| "grad_norm": 14.692411422729492, |
| "learning_rate": 3.750208312502083e-05, |
| "loss": 0.8585, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7749225077492251, |
| "grad_norm": 8.900308609008789, |
| "learning_rate": 3.708545812085458e-05, |
| "loss": 0.9014, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.7999200079992, |
| "grad_norm": 18.15697479248047, |
| "learning_rate": 3.666883311668834e-05, |
| "loss": 0.8581, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8249175082491751, |
| "grad_norm": 14.366026878356934, |
| "learning_rate": 3.6252208112522086e-05, |
| "loss": 0.8622, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8499150084991501, |
| "grad_norm": 14.673120498657227, |
| "learning_rate": 3.583558310835583e-05, |
| "loss": 0.873, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8749125087491251, |
| "grad_norm": 9.87514877319336, |
| "learning_rate": 3.541895810418958e-05, |
| "loss": 0.8446, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.8999100089991001, |
| "grad_norm": 20.0493221282959, |
| "learning_rate": 3.5002333100023335e-05, |
| "loss": 0.819, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.924907509249075, |
| "grad_norm": 18.50018882751465, |
| "learning_rate": 3.4585708095857084e-05, |
| "loss": 0.8458, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.9499050094990501, |
| "grad_norm": 16.332889556884766, |
| "learning_rate": 3.4169083091690833e-05, |
| "loss": 0.8458, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.9749025097490251, |
| "grad_norm": 11.074434280395508, |
| "learning_rate": 3.375245808752458e-05, |
| "loss": 0.8497, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.9999000099990001, |
| "grad_norm": 8.59486198425293, |
| "learning_rate": 3.333583308335833e-05, |
| "loss": 0.7989, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.7451762918283228, |
| "eval_loss": 0.8514304757118225, |
| "eval_runtime": 10.7214, |
| "eval_samples_per_second": 1865.614, |
| "eval_steps_per_second": 233.272, |
| "step": 20002 |
| }, |
| { |
| "epoch": 1.024897510248975, |
| "grad_norm": 11.283440589904785, |
| "learning_rate": 3.291920807919208e-05, |
| "loss": 0.6034, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.0498950104989502, |
| "grad_norm": 14.751864433288574, |
| "learning_rate": 3.250258307502583e-05, |
| "loss": 0.6148, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.0748925107489251, |
| "grad_norm": 20.8693790435791, |
| "learning_rate": 3.208595807085959e-05, |
| "loss": 0.614, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.0998900109989, |
| "grad_norm": 15.057612419128418, |
| "learning_rate": 3.166933306669333e-05, |
| "loss": 0.5895, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.1248875112488752, |
| "grad_norm": 10.95419979095459, |
| "learning_rate": 3.125270806252708e-05, |
| "loss": 0.6483, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.1498850114988501, |
| "grad_norm": 17.469892501831055, |
| "learning_rate": 3.083608305836083e-05, |
| "loss": 0.6331, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.174882511748825, |
| "grad_norm": 20.316282272338867, |
| "learning_rate": 3.041945805419458e-05, |
| "loss": 0.5885, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.1998800119988, |
| "grad_norm": 5.562185764312744, |
| "learning_rate": 3.0002833050028334e-05, |
| "loss": 0.6082, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.2248775122487752, |
| "grad_norm": 17.523334503173828, |
| "learning_rate": 2.958620804586208e-05, |
| "loss": 0.6312, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.24987501249875, |
| "grad_norm": 20.40757179260254, |
| "learning_rate": 2.916958304169583e-05, |
| "loss": 0.6033, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.274872512748725, |
| "grad_norm": 18.183963775634766, |
| "learning_rate": 2.8752958037529583e-05, |
| "loss": 0.6006, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.2998700129987002, |
| "grad_norm": 4.399472236633301, |
| "learning_rate": 2.8336333033363332e-05, |
| "loss": 0.6283, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.3248675132486751, |
| "grad_norm": 17.38117027282715, |
| "learning_rate": 2.7919708029197085e-05, |
| "loss": 0.6319, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.34986501349865, |
| "grad_norm": 9.839600563049316, |
| "learning_rate": 2.7503083025030828e-05, |
| "loss": 0.5913, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.3748625137486252, |
| "grad_norm": 3.2011570930480957, |
| "learning_rate": 2.708645802086458e-05, |
| "loss": 0.6037, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.3998600139986002, |
| "grad_norm": 9.335294723510742, |
| "learning_rate": 2.666983301669833e-05, |
| "loss": 0.6025, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.424857514248575, |
| "grad_norm": 26.70831298828125, |
| "learning_rate": 2.6253208012532083e-05, |
| "loss": 0.6067, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.4498550144985503, |
| "grad_norm": 16.662883758544922, |
| "learning_rate": 2.5836583008365832e-05, |
| "loss": 0.6075, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.4748525147485252, |
| "grad_norm": 18.168540954589844, |
| "learning_rate": 2.5419958004199578e-05, |
| "loss": 0.6035, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.4998500149985001, |
| "grad_norm": 49.09202575683594, |
| "learning_rate": 2.500333300003333e-05, |
| "loss": 0.5826, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.5248475152484753, |
| "grad_norm": 18.314056396484375, |
| "learning_rate": 2.458670799586708e-05, |
| "loss": 0.5905, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.54984501549845, |
| "grad_norm": 4.7171406745910645, |
| "learning_rate": 2.4170082991700833e-05, |
| "loss": 0.563, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.5748425157484252, |
| "grad_norm": 17.988279342651367, |
| "learning_rate": 2.375345798753458e-05, |
| "loss": 0.5795, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.5998400159984003, |
| "grad_norm": 15.996960639953613, |
| "learning_rate": 2.3336832983368332e-05, |
| "loss": 0.603, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.624837516248375, |
| "grad_norm": 15.832610130310059, |
| "learning_rate": 2.2920207979202078e-05, |
| "loss": 0.5805, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.6498350164983502, |
| "grad_norm": 33.191444396972656, |
| "learning_rate": 2.250358297503583e-05, |
| "loss": 0.6108, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.6748325167483251, |
| "grad_norm": 8.741061210632324, |
| "learning_rate": 2.208695797086958e-05, |
| "loss": 0.6077, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.6998300169983, |
| "grad_norm": 14.29039192199707, |
| "learning_rate": 2.167033296670333e-05, |
| "loss": 0.5751, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.7248275172482752, |
| "grad_norm": 21.69901466369629, |
| "learning_rate": 2.1253707962537083e-05, |
| "loss": 0.5833, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.7498250174982501, |
| "grad_norm": 32.595794677734375, |
| "learning_rate": 2.083708295837083e-05, |
| "loss": 0.5895, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.774822517748225, |
| "grad_norm": 42.687721252441406, |
| "learning_rate": 2.042045795420458e-05, |
| "loss": 0.5541, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.7998200179982002, |
| "grad_norm": 16.474918365478516, |
| "learning_rate": 2.000383295003833e-05, |
| "loss": 0.5423, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.8248175182481752, |
| "grad_norm": 13.296688079833984, |
| "learning_rate": 1.958720794587208e-05, |
| "loss": 0.5566, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.84981501849815, |
| "grad_norm": 18.645790100097656, |
| "learning_rate": 1.917058294170583e-05, |
| "loss": 0.5493, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.8748125187481253, |
| "grad_norm": 12.576258659362793, |
| "learning_rate": 1.875395793753958e-05, |
| "loss": 0.5602, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.8998100189981002, |
| "grad_norm": 21.95449447631836, |
| "learning_rate": 1.833733293337333e-05, |
| "loss": 0.5878, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.9248075192480751, |
| "grad_norm": 9.17590618133545, |
| "learning_rate": 1.792070792920708e-05, |
| "loss": 0.5681, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.9498050194980503, |
| "grad_norm": 12.517435073852539, |
| "learning_rate": 1.750408292504083e-05, |
| "loss": 0.5464, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.9748025197480252, |
| "grad_norm": 15.346318244934082, |
| "learning_rate": 1.708745792087458e-05, |
| "loss": 0.5917, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.9998000199980002, |
| "grad_norm": 29.321331024169922, |
| "learning_rate": 1.667083291670833e-05, |
| "loss": 0.5443, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.7651638193675152, |
| "eval_loss": 0.8535689115524292, |
| "eval_runtime": 10.748, |
| "eval_samples_per_second": 1860.991, |
| "eval_steps_per_second": 232.694, |
| "step": 40004 |
| }, |
| { |
| "epoch": 2.0247975202479753, |
| "grad_norm": 24.93914794921875, |
| "learning_rate": 1.625420791254208e-05, |
| "loss": 0.3501, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.04979502049795, |
| "grad_norm": 50.30072784423828, |
| "learning_rate": 1.5837582908375832e-05, |
| "loss": 0.3785, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.074792520747925, |
| "grad_norm": 24.169206619262695, |
| "learning_rate": 1.5420957904209578e-05, |
| "loss": 0.4034, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.0997900209979004, |
| "grad_norm": 43.043338775634766, |
| "learning_rate": 1.500433290004333e-05, |
| "loss": 0.385, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.124787521247875, |
| "grad_norm": 1.601791262626648, |
| "learning_rate": 1.4587707895877079e-05, |
| "loss": 0.3758, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.1497850214978502, |
| "grad_norm": 1.0921714305877686, |
| "learning_rate": 1.417108289171083e-05, |
| "loss": 0.3713, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.1747825217478254, |
| "grad_norm": 23.122596740722656, |
| "learning_rate": 1.375445788754458e-05, |
| "loss": 0.413, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.1997800219978, |
| "grad_norm": 7.090549468994141, |
| "learning_rate": 1.3337832883378329e-05, |
| "loss": 0.3787, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.2247775222477753, |
| "grad_norm": 17.668933868408203, |
| "learning_rate": 1.292120787921208e-05, |
| "loss": 0.3805, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.2497750224977504, |
| "grad_norm": 15.878674507141113, |
| "learning_rate": 1.2504582875045829e-05, |
| "loss": 0.3757, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.274772522747725, |
| "grad_norm": 39.11751937866211, |
| "learning_rate": 1.2087957870879578e-05, |
| "loss": 0.3887, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.2997700229977003, |
| "grad_norm": 4.333780288696289, |
| "learning_rate": 1.167133286671333e-05, |
| "loss": 0.3789, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.324767523247675, |
| "grad_norm": 21.4094295501709, |
| "learning_rate": 1.1254707862547079e-05, |
| "loss": 0.3742, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.34976502349765, |
| "grad_norm": 14.586631774902344, |
| "learning_rate": 1.083808285838083e-05, |
| "loss": 0.3805, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.3747625237476253, |
| "grad_norm": 1.1548786163330078, |
| "learning_rate": 1.042145785421458e-05, |
| "loss": 0.3936, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.3997600239976, |
| "grad_norm": 0.03682245686650276, |
| "learning_rate": 1.0004832850048329e-05, |
| "loss": 0.38, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.424757524247575, |
| "grad_norm": 35.44232940673828, |
| "learning_rate": 9.588207845882078e-06, |
| "loss": 0.3941, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.4497550244975503, |
| "grad_norm": 8.77474594116211, |
| "learning_rate": 9.171582841715828e-06, |
| "loss": 0.4054, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.474752524747525, |
| "grad_norm": 13.013947486877441, |
| "learning_rate": 8.754957837549579e-06, |
| "loss": 0.3659, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.4997500249975, |
| "grad_norm": 14.281270980834961, |
| "learning_rate": 8.338332833383328e-06, |
| "loss": 0.3917, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.5247475252474754, |
| "grad_norm": 24.0106258392334, |
| "learning_rate": 7.92170782921708e-06, |
| "loss": 0.3876, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.54974502549745, |
| "grad_norm": 20.56169319152832, |
| "learning_rate": 7.505082825050828e-06, |
| "loss": 0.3628, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.5747425257474252, |
| "grad_norm": 0.7545715570449829, |
| "learning_rate": 7.088457820884578e-06, |
| "loss": 0.3918, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.5997400259974004, |
| "grad_norm": 17.21295928955078, |
| "learning_rate": 6.6718328167183295e-06, |
| "loss": 0.359, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.624737526247375, |
| "grad_norm": 9.85009479522705, |
| "learning_rate": 6.255207812552079e-06, |
| "loss": 0.3634, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.6497350264973503, |
| "grad_norm": 21.24859046936035, |
| "learning_rate": 5.838582808385828e-06, |
| "loss": 0.3737, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.6747325267473254, |
| "grad_norm": 13.614805221557617, |
| "learning_rate": 5.421957804219578e-06, |
| "loss": 0.4022, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.6997300269973, |
| "grad_norm": 5.028663158416748, |
| "learning_rate": 5.005332800053329e-06, |
| "loss": 0.3562, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.7247275272472753, |
| "grad_norm": 22.341398239135742, |
| "learning_rate": 4.588707795887078e-06, |
| "loss": 0.349, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.7497250274972505, |
| "grad_norm": 9.66286849975586, |
| "learning_rate": 4.172082791720828e-06, |
| "loss": 0.3573, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.774722527747225, |
| "grad_norm": 4.927464962005615, |
| "learning_rate": 3.755457787554578e-06, |
| "loss": 0.335, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.7997200279972003, |
| "grad_norm": 4.33281135559082, |
| "learning_rate": 3.338832783388328e-06, |
| "loss": 0.3679, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.8247175282471755, |
| "grad_norm": 0.29482612013816833, |
| "learning_rate": 2.9222077792220777e-06, |
| "loss": 0.3266, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.84971502849715, |
| "grad_norm": 21.363672256469727, |
| "learning_rate": 2.505582775055828e-06, |
| "loss": 0.3453, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.8747125287471254, |
| "grad_norm": 2.6021454334259033, |
| "learning_rate": 2.088957770889578e-06, |
| "loss": 0.3682, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.8997100289971005, |
| "grad_norm": 4.7911577224731445, |
| "learning_rate": 1.6723327667233275e-06, |
| "loss": 0.3417, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.924707529247075, |
| "grad_norm": 0.21428282558918, |
| "learning_rate": 1.2557077625570776e-06, |
| "loss": 0.3192, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.9497050294970504, |
| "grad_norm": 1.2091667652130127, |
| "learning_rate": 8.390827583908276e-07, |
| "loss": 0.3375, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.9747025297470255, |
| "grad_norm": 24.199045181274414, |
| "learning_rate": 4.2245775422457754e-07, |
| "loss": 0.3669, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.9997000299970003, |
| "grad_norm": 5.163976669311523, |
| "learning_rate": 5.832750058327501e-09, |
| "loss": 0.332, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.7708992224677207, |
| "eval_loss": 0.9973717331886292, |
| "eval_runtime": 11.045, |
| "eval_samples_per_second": 1810.963, |
| "eval_steps_per_second": 226.438, |
| "step": 60006 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 60006, |
| "total_flos": 7908628105405440.0, |
| "train_loss": 0.6686365460569141, |
| "train_runtime": 1639.5865, |
| "train_samples_per_second": 292.775, |
| "train_steps_per_second": 36.598 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 60006, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7908628105405440.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|