| { |
| "best_metric": 0.78049123, |
| "best_model_checkpoint": "/global/D1/homes/sushant/SoccerNetExperiments/Soccer-Video-ChatGPT/November_xvars/swift/output/qwen2-vl-7b-instruct/v7-20241118-100959/checkpoint-5800", |
| "epoch": 5.0, |
| "eval_steps": 100, |
| "global_step": 7270, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "acc": 0.51318568, |
| "epoch": 0.000687757909215956, |
| "grad_norm": 0.8106947541236877, |
| "learning_rate": 0.0, |
| "loss": 2.20640945, |
| "memory(GiB)": 68.96, |
| "step": 1, |
| "train_speed(iter/s)": 0.018567 |
| }, |
| { |
| "acc": 0.522241, |
| "epoch": 0.0034387895460797797, |
| "grad_norm": 0.7490503191947937, |
| "learning_rate": 2.7291774109314122e-05, |
| "loss": 2.21162605, |
| "memory(GiB)": 68.96, |
| "step": 5, |
| "train_speed(iter/s)": 0.040393 |
| }, |
| { |
| "acc": 0.54119682, |
| "epoch": 0.0068775790921595595, |
| "grad_norm": 0.7276351451873779, |
| "learning_rate": 3.904570144643008e-05, |
| "loss": 2.13246613, |
| "memory(GiB)": 73.29, |
| "step": 10, |
| "train_speed(iter/s)": 0.052109 |
| }, |
| { |
| "acc": 0.5478312, |
| "epoch": 0.01031636863823934, |
| "grad_norm": 0.763149082660675, |
| "learning_rate": 4.5921308174844174e-05, |
| "loss": 2.00201836, |
| "memory(GiB)": 73.29, |
| "step": 15, |
| "train_speed(iter/s)": 0.056038 |
| }, |
| { |
| "acc": 0.56882777, |
| "epoch": 0.013755158184319119, |
| "grad_norm": 0.7898057103157043, |
| "learning_rate": 5.0799628783546016e-05, |
| "loss": 1.84855347, |
| "memory(GiB)": 73.29, |
| "step": 20, |
| "train_speed(iter/s)": 0.058389 |
| }, |
| { |
| "acc": 0.59600019, |
| "epoch": 0.0171939477303989, |
| "grad_norm": 0.9877901673316956, |
| "learning_rate": 5.4583548218628245e-05, |
| "loss": 1.73883362, |
| "memory(GiB)": 73.29, |
| "step": 25, |
| "train_speed(iter/s)": 0.060811 |
| }, |
| { |
| "acc": 0.60646133, |
| "epoch": 0.02063273727647868, |
| "grad_norm": 0.9179441332817078, |
| "learning_rate": 5.7675235511960126e-05, |
| "loss": 1.64034233, |
| "memory(GiB)": 73.29, |
| "step": 30, |
| "train_speed(iter/s)": 0.06211 |
| }, |
| { |
| "acc": 0.61271744, |
| "epoch": 0.024071526822558458, |
| "grad_norm": 0.9307955503463745, |
| "learning_rate": 6.028921987267401e-05, |
| "loss": 1.5906249, |
| "memory(GiB)": 73.29, |
| "step": 35, |
| "train_speed(iter/s)": 0.06288 |
| }, |
| { |
| "acc": 0.63797045, |
| "epoch": 0.027510316368638238, |
| "grad_norm": 1.0717326402664185, |
| "learning_rate": 6.255355612066197e-05, |
| "loss": 1.44913902, |
| "memory(GiB)": 73.29, |
| "step": 40, |
| "train_speed(iter/s)": 0.064 |
| }, |
| { |
| "acc": 0.64407902, |
| "epoch": 0.030949105914718018, |
| "grad_norm": 1.0732834339141846, |
| "learning_rate": 6.455084224037423e-05, |
| "loss": 1.44504213, |
| "memory(GiB)": 73.29, |
| "step": 45, |
| "train_speed(iter/s)": 0.064348 |
| }, |
| { |
| "acc": 0.64924326, |
| "epoch": 0.0343878954607978, |
| "grad_norm": 1.1162458658218384, |
| "learning_rate": 6.633747555574418e-05, |
| "loss": 1.36141453, |
| "memory(GiB)": 73.29, |
| "step": 50, |
| "train_speed(iter/s)": 0.064587 |
| }, |
| { |
| "acc": 0.6600091, |
| "epoch": 0.03782668500687758, |
| "grad_norm": 1.0633102655410767, |
| "learning_rate": 6.795368198249832e-05, |
| "loss": 1.3460659, |
| "memory(GiB)": 73.29, |
| "step": 55, |
| "train_speed(iter/s)": 0.064754 |
| }, |
| { |
| "acc": 0.65980716, |
| "epoch": 0.04126547455295736, |
| "grad_norm": 1.0164440870285034, |
| "learning_rate": 6.942916284907606e-05, |
| "loss": 1.33123722, |
| "memory(GiB)": 73.29, |
| "step": 60, |
| "train_speed(iter/s)": 0.065266 |
| }, |
| { |
| "acc": 0.68235178, |
| "epoch": 0.04470426409903714, |
| "grad_norm": 1.185117483139038, |
| "learning_rate": 7.078647367172232e-05, |
| "loss": 1.24153843, |
| "memory(GiB)": 73.29, |
| "step": 65, |
| "train_speed(iter/s)": 0.065105 |
| }, |
| { |
| "acc": 0.67718811, |
| "epoch": 0.048143053645116916, |
| "grad_norm": 1.1237530708312988, |
| "learning_rate": 7.204314720978996e-05, |
| "loss": 1.28077126, |
| "memory(GiB)": 73.29, |
| "step": 70, |
| "train_speed(iter/s)": 0.065778 |
| }, |
| { |
| "acc": 0.68681493, |
| "epoch": 0.0515818431911967, |
| "grad_norm": 1.2692396640777588, |
| "learning_rate": 7.321308228415829e-05, |
| "loss": 1.25220881, |
| "memory(GiB)": 73.29, |
| "step": 75, |
| "train_speed(iter/s)": 0.066254 |
| }, |
| { |
| "acc": 0.67568145, |
| "epoch": 0.055020632737276476, |
| "grad_norm": 1.118291974067688, |
| "learning_rate": 7.43074834577779e-05, |
| "loss": 1.2524622, |
| "memory(GiB)": 73.29, |
| "step": 80, |
| "train_speed(iter/s)": 0.066119 |
| }, |
| { |
| "acc": 0.67670813, |
| "epoch": 0.05845942228335626, |
| "grad_norm": 1.1989713907241821, |
| "learning_rate": 7.533551533853211e-05, |
| "loss": 1.24576015, |
| "memory(GiB)": 73.29, |
| "step": 85, |
| "train_speed(iter/s)": 0.066293 |
| }, |
| { |
| "acc": 0.69235625, |
| "epoch": 0.061898211829436035, |
| "grad_norm": 1.2158828973770142, |
| "learning_rate": 7.630476957749017e-05, |
| "loss": 1.20176125, |
| "memory(GiB)": 73.29, |
| "step": 90, |
| "train_speed(iter/s)": 0.066483 |
| }, |
| { |
| "acc": 0.68914762, |
| "epoch": 0.06533700137551582, |
| "grad_norm": 1.1737231016159058, |
| "learning_rate": 7.722160543566566e-05, |
| "loss": 1.21274147, |
| "memory(GiB)": 73.29, |
| "step": 95, |
| "train_speed(iter/s)": 0.066668 |
| }, |
| { |
| "acc": 0.69834042, |
| "epoch": 0.0687757909215956, |
| "grad_norm": 1.351590871810913, |
| "learning_rate": 7.809140289286016e-05, |
| "loss": 1.1592926, |
| "memory(GiB)": 73.29, |
| "step": 100, |
| "train_speed(iter/s)": 0.067074 |
| }, |
| { |
| "epoch": 0.0687757909215956, |
| "eval_acc": 0.6962927970486346, |
| "eval_loss": 1.1645218133926392, |
| "eval_runtime": 1212.4549, |
| "eval_samples_per_second": 3.533, |
| "eval_steps_per_second": 0.064, |
| "step": 100 |
| }, |
| { |
| "acc": 0.68415451, |
| "epoch": 0.07221458046767538, |
| "grad_norm": 1.3831959962844849, |
| "learning_rate": 7.891875393820406e-05, |
| "loss": 1.2015852, |
| "memory(GiB)": 73.29, |
| "step": 105, |
| "train_speed(iter/s)": 0.037875 |
| }, |
| { |
| "acc": 0.68617525, |
| "epoch": 0.07565337001375516, |
| "grad_norm": 1.1734046936035156, |
| "learning_rate": 7.970760931961428e-05, |
| "loss": 1.20146303, |
| "memory(GiB)": 67.64, |
| "step": 110, |
| "train_speed(iter/s)": 0.038604 |
| }, |
| { |
| "acc": 0.71534705, |
| "epoch": 0.07909215955983494, |
| "grad_norm": 1.1629948616027832, |
| "learning_rate": 8.046139264575035e-05, |
| "loss": 1.0837039, |
| "memory(GiB)": 67.64, |
| "step": 115, |
| "train_speed(iter/s)": 0.039388 |
| }, |
| { |
| "acc": 0.69408731, |
| "epoch": 0.08253094910591471, |
| "grad_norm": 1.3171385526657104, |
| "learning_rate": 8.118309018619202e-05, |
| "loss": 1.18307505, |
| "memory(GiB)": 67.64, |
| "step": 120, |
| "train_speed(iter/s)": 0.040077 |
| }, |
| { |
| "acc": 0.70501647, |
| "epoch": 0.0859697386519945, |
| "grad_norm": 1.3044822216033936, |
| "learning_rate": 8.187532232794237e-05, |
| "loss": 1.13398886, |
| "memory(GiB)": 67.64, |
| "step": 125, |
| "train_speed(iter/s)": 0.040793 |
| }, |
| { |
| "acc": 0.70435572, |
| "epoch": 0.08940852819807428, |
| "grad_norm": 1.329248070716858, |
| "learning_rate": 8.254040100883828e-05, |
| "loss": 1.12086363, |
| "memory(GiB)": 67.64, |
| "step": 130, |
| "train_speed(iter/s)": 0.041495 |
| }, |
| { |
| "acc": 0.69323554, |
| "epoch": 0.09284731774415406, |
| "grad_norm": 1.2392340898513794, |
| "learning_rate": 8.318037630590428e-05, |
| "loss": 1.17682867, |
| "memory(GiB)": 67.64, |
| "step": 135, |
| "train_speed(iter/s)": 0.042188 |
| }, |
| { |
| "acc": 0.68262854, |
| "epoch": 0.09628610729023383, |
| "grad_norm": 1.50913667678833, |
| "learning_rate": 8.379707454690589e-05, |
| "loss": 1.20728226, |
| "memory(GiB)": 67.64, |
| "step": 140, |
| "train_speed(iter/s)": 0.042814 |
| }, |
| { |
| "acc": 0.71855187, |
| "epoch": 0.09972489683631362, |
| "grad_norm": 1.3727279901504517, |
| "learning_rate": 8.439212973113382e-05, |
| "loss": 1.06742191, |
| "memory(GiB)": 67.64, |
| "step": 145, |
| "train_speed(iter/s)": 0.043424 |
| }, |
| { |
| "acc": 0.70871205, |
| "epoch": 0.1031636863823934, |
| "grad_norm": 1.3805909156799316, |
| "learning_rate": 8.496700962127424e-05, |
| "loss": 1.11401825, |
| "memory(GiB)": 67.64, |
| "step": 150, |
| "train_speed(iter/s)": 0.044042 |
| }, |
| { |
| "acc": 0.73342023, |
| "epoch": 0.10660247592847318, |
| "grad_norm": 1.2183469533920288, |
| "learning_rate": 8.552303755540939e-05, |
| "loss": 1.00700331, |
| "memory(GiB)": 67.64, |
| "step": 155, |
| "train_speed(iter/s)": 0.044601 |
| }, |
| { |
| "acc": 0.70797634, |
| "epoch": 0.11004126547455295, |
| "grad_norm": 1.4554626941680908, |
| "learning_rate": 8.606141079489386e-05, |
| "loss": 1.09950924, |
| "memory(GiB)": 67.64, |
| "step": 160, |
| "train_speed(iter/s)": 0.045018 |
| }, |
| { |
| "acc": 0.69420943, |
| "epoch": 0.11348005502063274, |
| "grad_norm": 1.3245141506195068, |
| "learning_rate": 8.658321604802837e-05, |
| "loss": 1.15192003, |
| "memory(GiB)": 67.64, |
| "step": 165, |
| "train_speed(iter/s)": 0.045554 |
| }, |
| { |
| "acc": 0.7062036, |
| "epoch": 0.11691884456671252, |
| "grad_norm": 1.271952748298645, |
| "learning_rate": 8.708944267564807e-05, |
| "loss": 1.08263731, |
| "memory(GiB)": 67.64, |
| "step": 170, |
| "train_speed(iter/s)": 0.045943 |
| }, |
| { |
| "acc": 0.69984941, |
| "epoch": 0.1203576341127923, |
| "grad_norm": 1.4456363916397095, |
| "learning_rate": 8.758099398198813e-05, |
| "loss": 1.12532272, |
| "memory(GiB)": 67.64, |
| "step": 175, |
| "train_speed(iter/s)": 0.046356 |
| }, |
| { |
| "acc": 0.71651649, |
| "epoch": 0.12379642365887207, |
| "grad_norm": 1.44161057472229, |
| "learning_rate": 8.805869691460613e-05, |
| "loss": 1.07470217, |
| "memory(GiB)": 67.64, |
| "step": 180, |
| "train_speed(iter/s)": 0.04681 |
| }, |
| { |
| "acc": 0.71227612, |
| "epoch": 0.12723521320495185, |
| "grad_norm": 1.3441652059555054, |
| "learning_rate": 8.852331043501091e-05, |
| "loss": 1.09072762, |
| "memory(GiB)": 67.64, |
| "step": 185, |
| "train_speed(iter/s)": 0.047226 |
| }, |
| { |
| "acc": 0.72131248, |
| "epoch": 0.13067400275103164, |
| "grad_norm": 1.6152911186218262, |
| "learning_rate": 8.897553277278162e-05, |
| "loss": 1.05218563, |
| "memory(GiB)": 67.64, |
| "step": 190, |
| "train_speed(iter/s)": 0.047678 |
| }, |
| { |
| "acc": 0.71243434, |
| "epoch": 0.13411279229711143, |
| "grad_norm": 1.45099937915802, |
| "learning_rate": 8.94160077372524e-05, |
| "loss": 1.08823862, |
| "memory(GiB)": 67.64, |
| "step": 195, |
| "train_speed(iter/s)": 0.048064 |
| }, |
| { |
| "acc": 0.71744361, |
| "epoch": 0.1375515818431912, |
| "grad_norm": 1.4195399284362793, |
| "learning_rate": 8.984533022997609e-05, |
| "loss": 1.05846539, |
| "memory(GiB)": 67.64, |
| "step": 200, |
| "train_speed(iter/s)": 0.048352 |
| }, |
| { |
| "epoch": 0.1375515818431912, |
| "eval_acc": 0.7119831736176722, |
| "eval_loss": 1.0829237699508667, |
| "eval_runtime": 1085.6657, |
| "eval_samples_per_second": 3.945, |
| "eval_steps_per_second": 0.071, |
| "step": 200 |
| }, |
| { |
| "acc": 0.69938354, |
| "epoch": 0.14099037138927098, |
| "grad_norm": 1.3801318407058716, |
| "learning_rate": 9.026405107641496e-05, |
| "loss": 1.1244791, |
| "memory(GiB)": 67.64, |
| "step": 205, |
| "train_speed(iter/s)": 0.038724 |
| }, |
| { |
| "acc": 0.70936947, |
| "epoch": 0.14442916093535077, |
| "grad_norm": 1.3959752321243286, |
| "learning_rate": 9.067268127532e-05, |
| "loss": 1.09259109, |
| "memory(GiB)": 67.64, |
| "step": 210, |
| "train_speed(iter/s)": 0.039167 |
| }, |
| { |
| "acc": 0.70823245, |
| "epoch": 0.14786795048143053, |
| "grad_norm": 1.4662190675735474, |
| "learning_rate": 9.107169574803587e-05, |
| "loss": 1.11423931, |
| "memory(GiB)": 67.64, |
| "step": 215, |
| "train_speed(iter/s)": 0.039589 |
| }, |
| { |
| "acc": 0.71795692, |
| "epoch": 0.15130674002751032, |
| "grad_norm": 1.2458115816116333, |
| "learning_rate": 9.146153665673023e-05, |
| "loss": 1.08119087, |
| "memory(GiB)": 67.64, |
| "step": 220, |
| "train_speed(iter/s)": 0.040035 |
| }, |
| { |
| "acc": 0.70050411, |
| "epoch": 0.15474552957359008, |
| "grad_norm": 1.3731013536453247, |
| "learning_rate": 9.184261634968835e-05, |
| "loss": 1.12374535, |
| "memory(GiB)": 67.64, |
| "step": 225, |
| "train_speed(iter/s)": 0.040423 |
| }, |
| { |
| "acc": 0.7144835, |
| "epoch": 0.15818431911966988, |
| "grad_norm": 1.4678212404251099, |
| "learning_rate": 9.221531998286629e-05, |
| "loss": 1.05655756, |
| "memory(GiB)": 67.64, |
| "step": 230, |
| "train_speed(iter/s)": 0.04076 |
| }, |
| { |
| "acc": 0.70189781, |
| "epoch": 0.16162310866574967, |
| "grad_norm": 1.3910584449768066, |
| "learning_rate": 9.258000785948739e-05, |
| "loss": 1.12144871, |
| "memory(GiB)": 67.64, |
| "step": 235, |
| "train_speed(iter/s)": 0.04114 |
| }, |
| { |
| "acc": 0.73604274, |
| "epoch": 0.16506189821182943, |
| "grad_norm": 1.3721731901168823, |
| "learning_rate": 9.293701752330797e-05, |
| "loss": 0.98678083, |
| "memory(GiB)": 67.64, |
| "step": 240, |
| "train_speed(iter/s)": 0.041584 |
| }, |
| { |
| "acc": 0.71869593, |
| "epoch": 0.16850068775790922, |
| "grad_norm": 1.6655057668685913, |
| "learning_rate": 9.32866656360339e-05, |
| "loss": 1.07134695, |
| "memory(GiB)": 67.64, |
| "step": 245, |
| "train_speed(iter/s)": 0.042014 |
| }, |
| { |
| "acc": 0.71307015, |
| "epoch": 0.171939477303989, |
| "grad_norm": 1.3052812814712524, |
| "learning_rate": 9.36292496650583e-05, |
| "loss": 1.06437588, |
| "memory(GiB)": 67.64, |
| "step": 250, |
| "train_speed(iter/s)": 0.042373 |
| }, |
| { |
| "acc": 0.71391711, |
| "epoch": 0.17537826685006877, |
| "grad_norm": 1.5577940940856934, |
| "learning_rate": 9.396504940406217e-05, |
| "loss": 1.07451763, |
| "memory(GiB)": 67.64, |
| "step": 255, |
| "train_speed(iter/s)": 0.042705 |
| }, |
| { |
| "acc": 0.73166742, |
| "epoch": 0.17881705639614856, |
| "grad_norm": 1.33721923828125, |
| "learning_rate": 9.429432834595424e-05, |
| "loss": 0.99717045, |
| "memory(GiB)": 67.64, |
| "step": 260, |
| "train_speed(iter/s)": 0.043055 |
| }, |
| { |
| "acc": 0.71127567, |
| "epoch": 0.18225584594222832, |
| "grad_norm": 1.549870252609253, |
| "learning_rate": 9.461733492503013e-05, |
| "loss": 1.10144587, |
| "memory(GiB)": 67.64, |
| "step": 265, |
| "train_speed(iter/s)": 0.043386 |
| }, |
| { |
| "acc": 0.72763004, |
| "epoch": 0.1856946354883081, |
| "grad_norm": 1.4047890901565552, |
| "learning_rate": 9.493430364302024e-05, |
| "loss": 1.01531572, |
| "memory(GiB)": 67.64, |
| "step": 270, |
| "train_speed(iter/s)": 0.043699 |
| }, |
| { |
| "acc": 0.72718954, |
| "epoch": 0.1891334250343879, |
| "grad_norm": 1.3470264673233032, |
| "learning_rate": 9.524545609181246e-05, |
| "loss": 1.01689529, |
| "memory(GiB)": 67.64, |
| "step": 275, |
| "train_speed(iter/s)": 0.043985 |
| }, |
| { |
| "acc": 0.72714009, |
| "epoch": 0.19257221458046767, |
| "grad_norm": 1.3996589183807373, |
| "learning_rate": 9.555100188402185e-05, |
| "loss": 1.01372051, |
| "memory(GiB)": 67.64, |
| "step": 280, |
| "train_speed(iter/s)": 0.044244 |
| }, |
| { |
| "acc": 0.71828256, |
| "epoch": 0.19601100412654746, |
| "grad_norm": 1.5369681119918823, |
| "learning_rate": 9.585113950119573e-05, |
| "loss": 1.06217566, |
| "memory(GiB)": 67.64, |
| "step": 285, |
| "train_speed(iter/s)": 0.044532 |
| }, |
| { |
| "acc": 0.72461739, |
| "epoch": 0.19944979367262725, |
| "grad_norm": 1.3398535251617432, |
| "learning_rate": 9.614605706824978e-05, |
| "loss": 1.03975096, |
| "memory(GiB)": 67.64, |
| "step": 290, |
| "train_speed(iter/s)": 0.044835 |
| }, |
| { |
| "acc": 0.71793423, |
| "epoch": 0.202888583218707, |
| "grad_norm": 1.4092602729797363, |
| "learning_rate": 9.64359330617034e-05, |
| "loss": 1.05028229, |
| "memory(GiB)": 67.64, |
| "step": 295, |
| "train_speed(iter/s)": 0.045175 |
| }, |
| { |
| "acc": 0.72302713, |
| "epoch": 0.2063273727647868, |
| "grad_norm": 1.2952080965042114, |
| "learning_rate": 9.67209369583902e-05, |
| "loss": 1.01765738, |
| "memory(GiB)": 67.64, |
| "step": 300, |
| "train_speed(iter/s)": 0.045401 |
| }, |
| { |
| "epoch": 0.2063273727647868, |
| "eval_acc": 0.7200083232105098, |
| "eval_loss": 1.0458483695983887, |
| "eval_runtime": 1091.4981, |
| "eval_samples_per_second": 3.924, |
| "eval_steps_per_second": 0.071, |
| "step": 300 |
| }, |
| { |
| "acc": 0.709624, |
| "epoch": 0.2097661623108666, |
| "grad_norm": 1.5634573698043823, |
| "learning_rate": 9.700122983054879e-05, |
| "loss": 1.07294426, |
| "memory(GiB)": 67.64, |
| "step": 305, |
| "train_speed(iter/s)": 0.039263 |
| }, |
| { |
| "acc": 0.70980182, |
| "epoch": 0.21320495185694635, |
| "grad_norm": 1.372841477394104, |
| "learning_rate": 9.727696489252533e-05, |
| "loss": 1.10122662, |
| "memory(GiB)": 67.64, |
| "step": 310, |
| "train_speed(iter/s)": 0.039598 |
| }, |
| { |
| "acc": 0.73576632, |
| "epoch": 0.21664374140302614, |
| "grad_norm": 1.539969801902771, |
| "learning_rate": 9.754828800373411e-05, |
| "loss": 0.98867779, |
| "memory(GiB)": 67.64, |
| "step": 315, |
| "train_speed(iter/s)": 0.039872 |
| }, |
| { |
| "acc": 0.7245533, |
| "epoch": 0.2200825309491059, |
| "grad_norm": 1.2448300123214722, |
| "learning_rate": 9.781533813200982e-05, |
| "loss": 1.01700201, |
| "memory(GiB)": 67.64, |
| "step": 320, |
| "train_speed(iter/s)": 0.040166 |
| }, |
| { |
| "acc": 0.72238054, |
| "epoch": 0.2235213204951857, |
| "grad_norm": 1.2697371244430542, |
| "learning_rate": 9.807824778103646e-05, |
| "loss": 1.0503273, |
| "memory(GiB)": 67.64, |
| "step": 325, |
| "train_speed(iter/s)": 0.040431 |
| }, |
| { |
| "acc": 0.7078352, |
| "epoch": 0.22696011004126548, |
| "grad_norm": 1.3228161334991455, |
| "learning_rate": 9.833714338514432e-05, |
| "loss": 1.10422878, |
| "memory(GiB)": 67.64, |
| "step": 330, |
| "train_speed(iter/s)": 0.040712 |
| }, |
| { |
| "acc": 0.71083031, |
| "epoch": 0.23039889958734525, |
| "grad_norm": 1.2554104328155518, |
| "learning_rate": 9.859214567441929e-05, |
| "loss": 1.06728878, |
| "memory(GiB)": 67.64, |
| "step": 335, |
| "train_speed(iter/s)": 0.040973 |
| }, |
| { |
| "acc": 0.73246231, |
| "epoch": 0.23383768913342504, |
| "grad_norm": 1.601881742477417, |
| "learning_rate": 9.884337001276401e-05, |
| "loss": 0.99594593, |
| "memory(GiB)": 67.64, |
| "step": 340, |
| "train_speed(iter/s)": 0.041255 |
| }, |
| { |
| "acc": 0.72822175, |
| "epoch": 0.23727647867950483, |
| "grad_norm": 1.374062418937683, |
| "learning_rate": 9.90909267112804e-05, |
| "loss": 0.9949461, |
| "memory(GiB)": 67.64, |
| "step": 345, |
| "train_speed(iter/s)": 0.041529 |
| }, |
| { |
| "acc": 0.72813654, |
| "epoch": 0.2407152682255846, |
| "grad_norm": 1.4039307832717896, |
| "learning_rate": 9.933492131910406e-05, |
| "loss": 1.00009727, |
| "memory(GiB)": 67.64, |
| "step": 350, |
| "train_speed(iter/s)": 0.041803 |
| }, |
| { |
| "acc": 0.71051707, |
| "epoch": 0.24415405777166438, |
| "grad_norm": 1.4029077291488647, |
| "learning_rate": 9.957545489361027e-05, |
| "loss": 1.05340385, |
| "memory(GiB)": 67.64, |
| "step": 355, |
| "train_speed(iter/s)": 0.042061 |
| }, |
| { |
| "acc": 0.72410893, |
| "epoch": 0.24759284731774414, |
| "grad_norm": 1.379601001739502, |
| "learning_rate": 9.981262425172208e-05, |
| "loss": 1.03275814, |
| "memory(GiB)": 67.64, |
| "step": 360, |
| "train_speed(iter/s)": 0.042262 |
| }, |
| { |
| "acc": 0.72450876, |
| "epoch": 0.25103163686382396, |
| "grad_norm": 1.2809425592422485, |
| "learning_rate": 9.999999482699181e-05, |
| "loss": 1.02212152, |
| "memory(GiB)": 67.64, |
| "step": 365, |
| "train_speed(iter/s)": 0.042465 |
| }, |
| { |
| "acc": 0.72661881, |
| "epoch": 0.2544704264099037, |
| "grad_norm": 1.5250205993652344, |
| "learning_rate": 9.999981377181717e-05, |
| "loss": 1.03086433, |
| "memory(GiB)": 67.64, |
| "step": 370, |
| "train_speed(iter/s)": 0.042736 |
| }, |
| { |
| "acc": 0.72812705, |
| "epoch": 0.2579092159559835, |
| "grad_norm": 1.2832344770431519, |
| "learning_rate": 9.999937406730297e-05, |
| "loss": 1.00952168, |
| "memory(GiB)": 67.64, |
| "step": 375, |
| "train_speed(iter/s)": 0.042979 |
| }, |
| { |
| "acc": 0.69843874, |
| "epoch": 0.2613480055020633, |
| "grad_norm": 1.3689916133880615, |
| "learning_rate": 9.999867571572407e-05, |
| "loss": 1.1430685, |
| "memory(GiB)": 67.64, |
| "step": 380, |
| "train_speed(iter/s)": 0.043186 |
| }, |
| { |
| "acc": 0.71121368, |
| "epoch": 0.26478679504814306, |
| "grad_norm": 1.5115655660629272, |
| "learning_rate": 9.999771872069336e-05, |
| "loss": 1.06673965, |
| "memory(GiB)": 67.64, |
| "step": 385, |
| "train_speed(iter/s)": 0.043399 |
| }, |
| { |
| "acc": 0.71244879, |
| "epoch": 0.26822558459422285, |
| "grad_norm": 1.2644624710083008, |
| "learning_rate": 9.999650308716193e-05, |
| "loss": 1.0759717, |
| "memory(GiB)": 67.64, |
| "step": 390, |
| "train_speed(iter/s)": 0.043596 |
| }, |
| { |
| "acc": 0.71011033, |
| "epoch": 0.2716643741403026, |
| "grad_norm": 1.425584077835083, |
| "learning_rate": 9.999502882141882e-05, |
| "loss": 1.08612566, |
| "memory(GiB)": 67.64, |
| "step": 395, |
| "train_speed(iter/s)": 0.043813 |
| }, |
| { |
| "acc": 0.71973572, |
| "epoch": 0.2751031636863824, |
| "grad_norm": 1.281044840812683, |
| "learning_rate": 9.999329593109124e-05, |
| "loss": 1.04273968, |
| "memory(GiB)": 67.64, |
| "step": 400, |
| "train_speed(iter/s)": 0.04406 |
| }, |
| { |
| "epoch": 0.2751031636863824, |
| "eval_acc": 0.723596301795114, |
| "eval_loss": 1.0237661600112915, |
| "eval_runtime": 1145.744, |
| "eval_samples_per_second": 3.738, |
| "eval_steps_per_second": 0.067, |
| "step": 400 |
| }, |
| { |
| "acc": 0.72366686, |
| "epoch": 0.27854195323246217, |
| "grad_norm": 1.4894949197769165, |
| "learning_rate": 9.999130442514431e-05, |
| "loss": 1.02950411, |
| "memory(GiB)": 67.64, |
| "step": 405, |
| "train_speed(iter/s)": 0.03932 |
| }, |
| { |
| "acc": 0.74041648, |
| "epoch": 0.28198074277854196, |
| "grad_norm": 1.2302844524383545, |
| "learning_rate": 9.998905431388113e-05, |
| "loss": 0.95937977, |
| "memory(GiB)": 67.64, |
| "step": 410, |
| "train_speed(iter/s)": 0.039555 |
| }, |
| { |
| "acc": 0.72292333, |
| "epoch": 0.28541953232462175, |
| "grad_norm": 1.1821825504302979, |
| "learning_rate": 9.998654560894271e-05, |
| "loss": 1.02365704, |
| "memory(GiB)": 67.64, |
| "step": 415, |
| "train_speed(iter/s)": 0.039766 |
| }, |
| { |
| "acc": 0.70973835, |
| "epoch": 0.28885832187070154, |
| "grad_norm": 1.2947014570236206, |
| "learning_rate": 9.998377832330788e-05, |
| "loss": 1.07417269, |
| "memory(GiB)": 67.64, |
| "step": 420, |
| "train_speed(iter/s)": 0.039992 |
| }, |
| { |
| "acc": 0.73527951, |
| "epoch": 0.2922971114167813, |
| "grad_norm": 1.2616949081420898, |
| "learning_rate": 9.99807524712933e-05, |
| "loss": 0.98149738, |
| "memory(GiB)": 67.64, |
| "step": 425, |
| "train_speed(iter/s)": 0.040206 |
| }, |
| { |
| "acc": 0.71251645, |
| "epoch": 0.29573590096286106, |
| "grad_norm": 1.2349984645843506, |
| "learning_rate": 9.997746806855323e-05, |
| "loss": 1.07718506, |
| "memory(GiB)": 67.64, |
| "step": 430, |
| "train_speed(iter/s)": 0.040404 |
| }, |
| { |
| "acc": 0.72872591, |
| "epoch": 0.29917469050894085, |
| "grad_norm": 1.128265619277954, |
| "learning_rate": 9.997392513207963e-05, |
| "loss": 1.00703831, |
| "memory(GiB)": 67.64, |
| "step": 435, |
| "train_speed(iter/s)": 0.040581 |
| }, |
| { |
| "acc": 0.72117209, |
| "epoch": 0.30261348005502064, |
| "grad_norm": 1.249985933303833, |
| "learning_rate": 9.997012368020198e-05, |
| "loss": 1.01667709, |
| "memory(GiB)": 67.64, |
| "step": 440, |
| "train_speed(iter/s)": 0.040799 |
| }, |
| { |
| "acc": 0.72518797, |
| "epoch": 0.30605226960110044, |
| "grad_norm": 1.3999882936477661, |
| "learning_rate": 9.996606373258716e-05, |
| "loss": 1.04834728, |
| "memory(GiB)": 67.64, |
| "step": 445, |
| "train_speed(iter/s)": 0.041 |
| }, |
| { |
| "acc": 0.72560539, |
| "epoch": 0.30949105914718017, |
| "grad_norm": 1.3446978330612183, |
| "learning_rate": 9.99617453102394e-05, |
| "loss": 1.01653395, |
| "memory(GiB)": 67.64, |
| "step": 450, |
| "train_speed(iter/s)": 0.041207 |
| }, |
| { |
| "acc": 0.72350621, |
| "epoch": 0.31292984869325996, |
| "grad_norm": 1.2894266843795776, |
| "learning_rate": 9.99571684355002e-05, |
| "loss": 0.99579372, |
| "memory(GiB)": 67.64, |
| "step": 455, |
| "train_speed(iter/s)": 0.041368 |
| }, |
| { |
| "acc": 0.70948811, |
| "epoch": 0.31636863823933975, |
| "grad_norm": 1.4214539527893066, |
| "learning_rate": 9.995233313204806e-05, |
| "loss": 1.09332161, |
| "memory(GiB)": 67.64, |
| "step": 460, |
| "train_speed(iter/s)": 0.041539 |
| }, |
| { |
| "acc": 0.74661293, |
| "epoch": 0.31980742778541954, |
| "grad_norm": 1.2697914838790894, |
| "learning_rate": 9.994723942489859e-05, |
| "loss": 0.93414135, |
| "memory(GiB)": 67.64, |
| "step": 465, |
| "train_speed(iter/s)": 0.041724 |
| }, |
| { |
| "acc": 0.72276139, |
| "epoch": 0.32324621733149933, |
| "grad_norm": 1.2612886428833008, |
| "learning_rate": 9.99418873404042e-05, |
| "loss": 1.04514399, |
| "memory(GiB)": 67.64, |
| "step": 470, |
| "train_speed(iter/s)": 0.041901 |
| }, |
| { |
| "acc": 0.72859631, |
| "epoch": 0.32668500687757906, |
| "grad_norm": 1.2637856006622314, |
| "learning_rate": 9.993627690625399e-05, |
| "loss": 0.99566994, |
| "memory(GiB)": 67.64, |
| "step": 475, |
| "train_speed(iter/s)": 0.042059 |
| }, |
| { |
| "acc": 0.72311392, |
| "epoch": 0.33012379642365886, |
| "grad_norm": 1.2103707790374756, |
| "learning_rate": 9.993040815147369e-05, |
| "loss": 1.02551346, |
| "memory(GiB)": 67.64, |
| "step": 480, |
| "train_speed(iter/s)": 0.042237 |
| }, |
| { |
| "acc": 0.7304266, |
| "epoch": 0.33356258596973865, |
| "grad_norm": 1.4478263854980469, |
| "learning_rate": 9.992428110642546e-05, |
| "loss": 1.00502892, |
| "memory(GiB)": 67.64, |
| "step": 485, |
| "train_speed(iter/s)": 0.042429 |
| }, |
| { |
| "acc": 0.72812204, |
| "epoch": 0.33700137551581844, |
| "grad_norm": 1.28928542137146, |
| "learning_rate": 9.991789580280768e-05, |
| "loss": 0.99270744, |
| "memory(GiB)": 67.64, |
| "step": 490, |
| "train_speed(iter/s)": 0.042611 |
| }, |
| { |
| "acc": 0.73110504, |
| "epoch": 0.3404401650618982, |
| "grad_norm": 1.277113914489746, |
| "learning_rate": 9.991125227365489e-05, |
| "loss": 0.9932848, |
| "memory(GiB)": 67.71, |
| "step": 495, |
| "train_speed(iter/s)": 0.042803 |
| }, |
| { |
| "acc": 0.73536983, |
| "epoch": 0.343878954607978, |
| "grad_norm": 1.4031190872192383, |
| "learning_rate": 9.990435055333755e-05, |
| "loss": 1.00407228, |
| "memory(GiB)": 67.71, |
| "step": 500, |
| "train_speed(iter/s)": 0.042997 |
| }, |
| { |
| "epoch": 0.343878954607978, |
| "eval_acc": 0.7273923606424618, |
| "eval_loss": 1.006140112876892, |
| "eval_runtime": 1123.2925, |
| "eval_samples_per_second": 3.813, |
| "eval_steps_per_second": 0.069, |
| "step": 500 |
| }, |
| { |
| "acc": 0.7310411, |
| "epoch": 0.34731774415405775, |
| "grad_norm": 1.1264581680297852, |
| "learning_rate": 9.989719067756184e-05, |
| "loss": 0.97913218, |
| "memory(GiB)": 67.71, |
| "step": 505, |
| "train_speed(iter/s)": 0.039389 |
| }, |
| { |
| "acc": 0.72247181, |
| "epoch": 0.35075653370013754, |
| "grad_norm": 1.2322190999984741, |
| "learning_rate": 9.988977268336956e-05, |
| "loss": 1.04118223, |
| "memory(GiB)": 67.71, |
| "step": 510, |
| "train_speed(iter/s)": 0.039571 |
| }, |
| { |
| "acc": 0.7294539, |
| "epoch": 0.35419532324621733, |
| "grad_norm": 1.1988883018493652, |
| "learning_rate": 9.988209660913789e-05, |
| "loss": 0.96120787, |
| "memory(GiB)": 67.71, |
| "step": 515, |
| "train_speed(iter/s)": 0.039729 |
| }, |
| { |
| "acc": 0.72807951, |
| "epoch": 0.3576341127922971, |
| "grad_norm": 1.4514073133468628, |
| "learning_rate": 9.987416249457917e-05, |
| "loss": 1.00832357, |
| "memory(GiB)": 67.71, |
| "step": 520, |
| "train_speed(iter/s)": 0.039869 |
| }, |
| { |
| "acc": 0.72818184, |
| "epoch": 0.3610729023383769, |
| "grad_norm": 1.2781667709350586, |
| "learning_rate": 9.986597038074072e-05, |
| "loss": 1.00557394, |
| "memory(GiB)": 67.71, |
| "step": 525, |
| "train_speed(iter/s)": 0.040019 |
| }, |
| { |
| "acc": 0.7372427, |
| "epoch": 0.36451169188445665, |
| "grad_norm": 1.196447491645813, |
| "learning_rate": 9.985752031000465e-05, |
| "loss": 0.97588711, |
| "memory(GiB)": 67.71, |
| "step": 530, |
| "train_speed(iter/s)": 0.040179 |
| }, |
| { |
| "acc": 0.73485746, |
| "epoch": 0.36795048143053644, |
| "grad_norm": 1.2713799476623535, |
| "learning_rate": 9.984881232608758e-05, |
| "loss": 0.99121141, |
| "memory(GiB)": 67.71, |
| "step": 535, |
| "train_speed(iter/s)": 0.040356 |
| }, |
| { |
| "acc": 0.7316514, |
| "epoch": 0.3713892709766162, |
| "grad_norm": 1.388735055923462, |
| "learning_rate": 9.983984647404047e-05, |
| "loss": 0.97529774, |
| "memory(GiB)": 67.71, |
| "step": 540, |
| "train_speed(iter/s)": 0.040533 |
| }, |
| { |
| "acc": 0.73824301, |
| "epoch": 0.374828060522696, |
| "grad_norm": 1.263832926750183, |
| "learning_rate": 9.983062280024837e-05, |
| "loss": 0.95761375, |
| "memory(GiB)": 67.71, |
| "step": 545, |
| "train_speed(iter/s)": 0.040707 |
| }, |
| { |
| "acc": 0.72791233, |
| "epoch": 0.3782668500687758, |
| "grad_norm": 1.3154568672180176, |
| "learning_rate": 9.982114135243019e-05, |
| "loss": 1.00505419, |
| "memory(GiB)": 67.71, |
| "step": 550, |
| "train_speed(iter/s)": 0.040862 |
| }, |
| { |
| "acc": 0.73077579, |
| "epoch": 0.3817056396148556, |
| "grad_norm": 1.2996647357940674, |
| "learning_rate": 9.981140217963838e-05, |
| "loss": 0.98154631, |
| "memory(GiB)": 67.71, |
| "step": 555, |
| "train_speed(iter/s)": 0.041008 |
| }, |
| { |
| "acc": 0.7352643, |
| "epoch": 0.38514442916093533, |
| "grad_norm": 1.3090369701385498, |
| "learning_rate": 9.980140533225882e-05, |
| "loss": 0.9830574, |
| "memory(GiB)": 67.71, |
| "step": 560, |
| "train_speed(iter/s)": 0.041146 |
| }, |
| { |
| "acc": 0.7195425, |
| "epoch": 0.3885832187070151, |
| "grad_norm": 1.655612587928772, |
| "learning_rate": 9.979115086201042e-05, |
| "loss": 1.05448446, |
| "memory(GiB)": 67.71, |
| "step": 565, |
| "train_speed(iter/s)": 0.041304 |
| }, |
| { |
| "acc": 0.73759327, |
| "epoch": 0.3920220082530949, |
| "grad_norm": 1.183268427848816, |
| "learning_rate": 9.978063882194492e-05, |
| "loss": 0.96683788, |
| "memory(GiB)": 67.71, |
| "step": 570, |
| "train_speed(iter/s)": 0.041468 |
| }, |
| { |
| "acc": 0.73216171, |
| "epoch": 0.3954607977991747, |
| "grad_norm": 1.2590916156768799, |
| "learning_rate": 9.976986926644662e-05, |
| "loss": 0.97658138, |
| "memory(GiB)": 67.71, |
| "step": 575, |
| "train_speed(iter/s)": 0.04163 |
| }, |
| { |
| "acc": 0.72127271, |
| "epoch": 0.3988995873452545, |
| "grad_norm": 1.1548501253128052, |
| "learning_rate": 9.975884225123204e-05, |
| "loss": 1.00985394, |
| "memory(GiB)": 67.71, |
| "step": 580, |
| "train_speed(iter/s)": 0.041797 |
| }, |
| { |
| "acc": 0.74563594, |
| "epoch": 0.4023383768913342, |
| "grad_norm": 1.0580244064331055, |
| "learning_rate": 9.974755783334972e-05, |
| "loss": 0.94991455, |
| "memory(GiB)": 67.71, |
| "step": 585, |
| "train_speed(iter/s)": 0.041937 |
| }, |
| { |
| "acc": 0.72397938, |
| "epoch": 0.405777166437414, |
| "grad_norm": 1.2799969911575317, |
| "learning_rate": 9.973601607117985e-05, |
| "loss": 1.04541121, |
| "memory(GiB)": 67.71, |
| "step": 590, |
| "train_speed(iter/s)": 0.042103 |
| }, |
| { |
| "acc": 0.75536423, |
| "epoch": 0.4092159559834938, |
| "grad_norm": 1.2122467756271362, |
| "learning_rate": 9.972421702443402e-05, |
| "loss": 0.91661882, |
| "memory(GiB)": 67.71, |
| "step": 595, |
| "train_speed(iter/s)": 0.042263 |
| }, |
| { |
| "acc": 0.72923999, |
| "epoch": 0.4126547455295736, |
| "grad_norm": 1.3098151683807373, |
| "learning_rate": 9.971216075415486e-05, |
| "loss": 0.99268637, |
| "memory(GiB)": 67.71, |
| "step": 600, |
| "train_speed(iter/s)": 0.042394 |
| }, |
| { |
| "epoch": 0.4126547455295736, |
| "eval_acc": 0.7299005713771539, |
| "eval_loss": 0.9898082613945007, |
| "eval_runtime": 1136.3836, |
| "eval_samples_per_second": 3.769, |
| "eval_steps_per_second": 0.068, |
| "step": 600 |
| }, |
| { |
| "acc": 0.73311081, |
| "epoch": 0.4160935350756534, |
| "grad_norm": 1.149190902709961, |
| "learning_rate": 9.969984732271578e-05, |
| "loss": 0.98028679, |
| "memory(GiB)": 67.71, |
| "step": 605, |
| "train_speed(iter/s)": 0.039392 |
| }, |
| { |
| "acc": 0.7316927, |
| "epoch": 0.4195323246217332, |
| "grad_norm": 1.3081296682357788, |
| "learning_rate": 9.96872767938206e-05, |
| "loss": 0.98179483, |
| "memory(GiB)": 67.71, |
| "step": 610, |
| "train_speed(iter/s)": 0.039559 |
| }, |
| { |
| "acc": 0.73893361, |
| "epoch": 0.4229711141678129, |
| "grad_norm": 1.1731023788452148, |
| "learning_rate": 9.967444923250323e-05, |
| "loss": 0.94215651, |
| "memory(GiB)": 67.71, |
| "step": 615, |
| "train_speed(iter/s)": 0.039695 |
| }, |
| { |
| "acc": 0.72336564, |
| "epoch": 0.4264099037138927, |
| "grad_norm": 1.2004274129867554, |
| "learning_rate": 9.966136470512739e-05, |
| "loss": 1.01167727, |
| "memory(GiB)": 67.71, |
| "step": 620, |
| "train_speed(iter/s)": 0.03985 |
| }, |
| { |
| "acc": 0.73260341, |
| "epoch": 0.4298486932599725, |
| "grad_norm": 1.1863032579421997, |
| "learning_rate": 9.964802327938616e-05, |
| "loss": 0.98780212, |
| "memory(GiB)": 67.71, |
| "step": 625, |
| "train_speed(iter/s)": 0.039998 |
| }, |
| { |
| "acc": 0.72430835, |
| "epoch": 0.4332874828060523, |
| "grad_norm": 1.2297348976135254, |
| "learning_rate": 9.963442502430173e-05, |
| "loss": 1.02258396, |
| "memory(GiB)": 67.71, |
| "step": 630, |
| "train_speed(iter/s)": 0.0401 |
| }, |
| { |
| "acc": 0.73400669, |
| "epoch": 0.43672627235213207, |
| "grad_norm": 1.1201564073562622, |
| "learning_rate": 9.962057001022499e-05, |
| "loss": 0.95277481, |
| "memory(GiB)": 67.71, |
| "step": 635, |
| "train_speed(iter/s)": 0.040238 |
| }, |
| { |
| "acc": 0.72435627, |
| "epoch": 0.4401650618982118, |
| "grad_norm": 1.2594115734100342, |
| "learning_rate": 9.96064583088352e-05, |
| "loss": 1.01793871, |
| "memory(GiB)": 67.71, |
| "step": 640, |
| "train_speed(iter/s)": 0.040389 |
| }, |
| { |
| "acc": 0.74932237, |
| "epoch": 0.4436038514442916, |
| "grad_norm": 1.0871134996414185, |
| "learning_rate": 9.959208999313953e-05, |
| "loss": 0.92056198, |
| "memory(GiB)": 67.71, |
| "step": 645, |
| "train_speed(iter/s)": 0.040522 |
| }, |
| { |
| "acc": 0.74172649, |
| "epoch": 0.4470426409903714, |
| "grad_norm": 1.0481441020965576, |
| "learning_rate": 9.957746513747285e-05, |
| "loss": 0.94307327, |
| "memory(GiB)": 67.71, |
| "step": 650, |
| "train_speed(iter/s)": 0.040673 |
| }, |
| { |
| "acc": 0.73418083, |
| "epoch": 0.4504814305364512, |
| "grad_norm": 1.2039026021957397, |
| "learning_rate": 9.956258381749717e-05, |
| "loss": 0.96942959, |
| "memory(GiB)": 67.71, |
| "step": 655, |
| "train_speed(iter/s)": 0.04079 |
| }, |
| { |
| "acc": 0.73663011, |
| "epoch": 0.45392022008253097, |
| "grad_norm": 1.2746825218200684, |
| "learning_rate": 9.954744611020134e-05, |
| "loss": 0.96783085, |
| "memory(GiB)": 67.71, |
| "step": 660, |
| "train_speed(iter/s)": 0.040931 |
| }, |
| { |
| "acc": 0.75085382, |
| "epoch": 0.4573590096286107, |
| "grad_norm": 1.1864688396453857, |
| "learning_rate": 9.953205209390065e-05, |
| "loss": 0.93258324, |
| "memory(GiB)": 67.71, |
| "step": 665, |
| "train_speed(iter/s)": 0.041065 |
| }, |
| { |
| "acc": 0.74181981, |
| "epoch": 0.4607977991746905, |
| "grad_norm": 1.2284380197525024, |
| "learning_rate": 9.95164018482364e-05, |
| "loss": 0.94610729, |
| "memory(GiB)": 67.71, |
| "step": 670, |
| "train_speed(iter/s)": 0.041186 |
| }, |
| { |
| "acc": 0.73316283, |
| "epoch": 0.4642365887207703, |
| "grad_norm": 1.0974282026290894, |
| "learning_rate": 9.950049545417551e-05, |
| "loss": 0.97180891, |
| "memory(GiB)": 67.71, |
| "step": 675, |
| "train_speed(iter/s)": 0.041284 |
| }, |
| { |
| "acc": 0.73497968, |
| "epoch": 0.4676753782668501, |
| "grad_norm": 1.1195545196533203, |
| "learning_rate": 9.948433299401008e-05, |
| "loss": 0.96802521, |
| "memory(GiB)": 67.71, |
| "step": 680, |
| "train_speed(iter/s)": 0.041406 |
| }, |
| { |
| "acc": 0.71404638, |
| "epoch": 0.47111416781292986, |
| "grad_norm": 1.2557018995285034, |
| "learning_rate": 9.946791455135697e-05, |
| "loss": 1.04876156, |
| "memory(GiB)": 67.71, |
| "step": 685, |
| "train_speed(iter/s)": 0.041511 |
| }, |
| { |
| "acc": 0.73286834, |
| "epoch": 0.47455295735900965, |
| "grad_norm": 1.2220708131790161, |
| "learning_rate": 9.945124021115738e-05, |
| "loss": 0.96964302, |
| "memory(GiB)": 67.71, |
| "step": 690, |
| "train_speed(iter/s)": 0.041631 |
| }, |
| { |
| "acc": 0.73684483, |
| "epoch": 0.4779917469050894, |
| "grad_norm": 1.2621607780456543, |
| "learning_rate": 9.94343100596764e-05, |
| "loss": 0.95697803, |
| "memory(GiB)": 67.71, |
| "step": 695, |
| "train_speed(iter/s)": 0.041775 |
| }, |
| { |
| "acc": 0.73987064, |
| "epoch": 0.4814305364511692, |
| "grad_norm": 1.1854294538497925, |
| "learning_rate": 9.941712418450258e-05, |
| "loss": 0.94488659, |
| "memory(GiB)": 67.71, |
| "step": 700, |
| "train_speed(iter/s)": 0.041901 |
| }, |
| { |
| "epoch": 0.4814305364511692, |
| "eval_acc": 0.7335672830341476, |
| "eval_loss": 0.9757564663887024, |
| "eval_runtime": 1129.274, |
| "eval_samples_per_second": 3.793, |
| "eval_steps_per_second": 0.068, |
| "step": 700 |
| }, |
| { |
| "acc": 0.73576145, |
| "epoch": 0.48486932599724897, |
| "grad_norm": 1.1550548076629639, |
| "learning_rate": 9.939968267454743e-05, |
| "loss": 0.95160465, |
| "memory(GiB)": 67.71, |
| "step": 705, |
| "train_speed(iter/s)": 0.03937 |
| }, |
| { |
| "acc": 0.71119275, |
| "epoch": 0.48830811554332876, |
| "grad_norm": 1.2182416915893555, |
| "learning_rate": 9.938198562004501e-05, |
| "loss": 1.04482851, |
| "memory(GiB)": 67.71, |
| "step": 710, |
| "train_speed(iter/s)": 0.039477 |
| }, |
| { |
| "acc": 0.74570274, |
| "epoch": 0.49174690508940855, |
| "grad_norm": 1.1353340148925781, |
| "learning_rate": 9.936403311255144e-05, |
| "loss": 0.92555218, |
| "memory(GiB)": 67.71, |
| "step": 715, |
| "train_speed(iter/s)": 0.039603 |
| }, |
| { |
| "acc": 0.74782338, |
| "epoch": 0.4951856946354883, |
| "grad_norm": 1.2046043872833252, |
| "learning_rate": 9.934582524494446e-05, |
| "loss": 0.92999516, |
| "memory(GiB)": 67.71, |
| "step": 720, |
| "train_speed(iter/s)": 0.039731 |
| }, |
| { |
| "acc": 0.73299646, |
| "epoch": 0.4986244841815681, |
| "grad_norm": 1.102347731590271, |
| "learning_rate": 9.932736211142291e-05, |
| "loss": 0.97149315, |
| "memory(GiB)": 67.71, |
| "step": 725, |
| "train_speed(iter/s)": 0.03984 |
| }, |
| { |
| "acc": 0.72648382, |
| "epoch": 0.5020632737276479, |
| "grad_norm": 1.0632636547088623, |
| "learning_rate": 9.930864380750617e-05, |
| "loss": 1.01790123, |
| "memory(GiB)": 67.71, |
| "step": 730, |
| "train_speed(iter/s)": 0.039945 |
| }, |
| { |
| "acc": 0.71636868, |
| "epoch": 0.5055020632737276, |
| "grad_norm": 1.1830312013626099, |
| "learning_rate": 9.928967043003391e-05, |
| "loss": 1.01803741, |
| "memory(GiB)": 67.71, |
| "step": 735, |
| "train_speed(iter/s)": 0.040055 |
| }, |
| { |
| "acc": 0.73447638, |
| "epoch": 0.5089408528198074, |
| "grad_norm": 1.1544054746627808, |
| "learning_rate": 9.92704420771653e-05, |
| "loss": 0.97713757, |
| "memory(GiB)": 67.71, |
| "step": 740, |
| "train_speed(iter/s)": 0.040173 |
| }, |
| { |
| "acc": 0.73799992, |
| "epoch": 0.5123796423658872, |
| "grad_norm": 1.0744158029556274, |
| "learning_rate": 9.925095884837867e-05, |
| "loss": 0.95858746, |
| "memory(GiB)": 67.71, |
| "step": 745, |
| "train_speed(iter/s)": 0.040296 |
| }, |
| { |
| "acc": 0.74002094, |
| "epoch": 0.515818431911967, |
| "grad_norm": 1.086005687713623, |
| "learning_rate": 9.923122084447098e-05, |
| "loss": 0.95759525, |
| "memory(GiB)": 67.71, |
| "step": 750, |
| "train_speed(iter/s)": 0.040432 |
| }, |
| { |
| "acc": 0.73197713, |
| "epoch": 0.5192572214580468, |
| "grad_norm": 1.177945852279663, |
| "learning_rate": 9.921122816755725e-05, |
| "loss": 0.98773813, |
| "memory(GiB)": 67.71, |
| "step": 755, |
| "train_speed(iter/s)": 0.040536 |
| }, |
| { |
| "acc": 0.71955528, |
| "epoch": 0.5226960110041265, |
| "grad_norm": 1.1270967721939087, |
| "learning_rate": 9.919098092107003e-05, |
| "loss": 1.0065423, |
| "memory(GiB)": 67.71, |
| "step": 760, |
| "train_speed(iter/s)": 0.040641 |
| }, |
| { |
| "acc": 0.72435188, |
| "epoch": 0.5261348005502063, |
| "grad_norm": 1.1566613912582397, |
| "learning_rate": 9.917047920975897e-05, |
| "loss": 1.00753899, |
| "memory(GiB)": 67.71, |
| "step": 765, |
| "train_speed(iter/s)": 0.040761 |
| }, |
| { |
| "acc": 0.72682076, |
| "epoch": 0.5295735900962861, |
| "grad_norm": 1.0998412370681763, |
| "learning_rate": 9.914972313969015e-05, |
| "loss": 0.99639912, |
| "memory(GiB)": 67.71, |
| "step": 770, |
| "train_speed(iter/s)": 0.040857 |
| }, |
| { |
| "acc": 0.73786283, |
| "epoch": 0.5330123796423659, |
| "grad_norm": 1.0717042684555054, |
| "learning_rate": 9.912871281824555e-05, |
| "loss": 0.95036526, |
| "memory(GiB)": 67.71, |
| "step": 775, |
| "train_speed(iter/s)": 0.040955 |
| }, |
| { |
| "acc": 0.72474022, |
| "epoch": 0.5364511691884457, |
| "grad_norm": 1.1307621002197266, |
| "learning_rate": 9.910744835412258e-05, |
| "loss": 1.00282173, |
| "memory(GiB)": 67.71, |
| "step": 780, |
| "train_speed(iter/s)": 0.041067 |
| }, |
| { |
| "acc": 0.73896732, |
| "epoch": 0.5398899587345255, |
| "grad_norm": 1.0760217905044556, |
| "learning_rate": 9.908592985733346e-05, |
| "loss": 0.95014591, |
| "memory(GiB)": 67.71, |
| "step": 785, |
| "train_speed(iter/s)": 0.041189 |
| }, |
| { |
| "acc": 0.73375082, |
| "epoch": 0.5433287482806052, |
| "grad_norm": 1.1228985786437988, |
| "learning_rate": 9.90641574392046e-05, |
| "loss": 0.97449379, |
| "memory(GiB)": 67.71, |
| "step": 790, |
| "train_speed(iter/s)": 0.041296 |
| }, |
| { |
| "acc": 0.73906136, |
| "epoch": 0.546767537826685, |
| "grad_norm": 1.0855998992919922, |
| "learning_rate": 9.904213121237616e-05, |
| "loss": 0.9437438, |
| "memory(GiB)": 67.71, |
| "step": 795, |
| "train_speed(iter/s)": 0.041409 |
| }, |
| { |
| "acc": 0.7277792, |
| "epoch": 0.5502063273727648, |
| "grad_norm": 1.24734365940094, |
| "learning_rate": 9.90198512908013e-05, |
| "loss": 1.01125345, |
| "memory(GiB)": 67.71, |
| "step": 800, |
| "train_speed(iter/s)": 0.041532 |
| }, |
| { |
| "epoch": 0.5502063273727648, |
| "eval_acc": 0.736024879650875, |
| "eval_loss": 0.9637655019760132, |
| "eval_runtime": 1126.4376, |
| "eval_samples_per_second": 3.802, |
| "eval_steps_per_second": 0.068, |
| "step": 800 |
| }, |
| { |
| "acc": 0.75724821, |
| "epoch": 0.5536451169188445, |
| "grad_norm": 1.1258316040039062, |
| "learning_rate": 9.899731778974572e-05, |
| "loss": 0.87265921, |
| "memory(GiB)": 67.71, |
| "step": 805, |
| "train_speed(iter/s)": 0.039349 |
| }, |
| { |
| "acc": 0.74204683, |
| "epoch": 0.5570839064649243, |
| "grad_norm": 0.9689936637878418, |
| "learning_rate": 9.897453082578703e-05, |
| "loss": 0.91779423, |
| "memory(GiB)": 67.71, |
| "step": 810, |
| "train_speed(iter/s)": 0.039466 |
| }, |
| { |
| "acc": 0.73968034, |
| "epoch": 0.5605226960110041, |
| "grad_norm": 1.1123220920562744, |
| "learning_rate": 9.895149051681413e-05, |
| "loss": 0.97357388, |
| "memory(GiB)": 67.71, |
| "step": 815, |
| "train_speed(iter/s)": 0.039574 |
| }, |
| { |
| "acc": 0.73935227, |
| "epoch": 0.5639614855570839, |
| "grad_norm": 1.0451692342758179, |
| "learning_rate": 9.892819698202658e-05, |
| "loss": 0.93994102, |
| "memory(GiB)": 67.71, |
| "step": 820, |
| "train_speed(iter/s)": 0.039675 |
| }, |
| { |
| "acc": 0.73578658, |
| "epoch": 0.5674002751031637, |
| "grad_norm": 1.0823888778686523, |
| "learning_rate": 9.890465034193403e-05, |
| "loss": 0.92713509, |
| "memory(GiB)": 67.71, |
| "step": 825, |
| "train_speed(iter/s)": 0.039784 |
| }, |
| { |
| "acc": 0.7370616, |
| "epoch": 0.5708390646492435, |
| "grad_norm": 1.1076163053512573, |
| "learning_rate": 9.888085071835557e-05, |
| "loss": 0.96277084, |
| "memory(GiB)": 67.71, |
| "step": 830, |
| "train_speed(iter/s)": 0.03987 |
| }, |
| { |
| "acc": 0.74359312, |
| "epoch": 0.5742778541953233, |
| "grad_norm": 0.9995237588882446, |
| "learning_rate": 9.885679823441913e-05, |
| "loss": 0.92473927, |
| "memory(GiB)": 67.71, |
| "step": 835, |
| "train_speed(iter/s)": 0.039986 |
| }, |
| { |
| "acc": 0.73567324, |
| "epoch": 0.5777166437414031, |
| "grad_norm": 1.1980810165405273, |
| "learning_rate": 9.883249301456078e-05, |
| "loss": 0.97589169, |
| "memory(GiB)": 67.71, |
| "step": 840, |
| "train_speed(iter/s)": 0.040091 |
| }, |
| { |
| "acc": 0.72378907, |
| "epoch": 0.5811554332874828, |
| "grad_norm": 1.059746503829956, |
| "learning_rate": 9.880793518452414e-05, |
| "loss": 1.01202221, |
| "memory(GiB)": 67.71, |
| "step": 845, |
| "train_speed(iter/s)": 0.040196 |
| }, |
| { |
| "acc": 0.72781639, |
| "epoch": 0.5845942228335625, |
| "grad_norm": 1.1578445434570312, |
| "learning_rate": 9.878312487135973e-05, |
| "loss": 0.98674173, |
| "memory(GiB)": 67.71, |
| "step": 850, |
| "train_speed(iter/s)": 0.040293 |
| }, |
| { |
| "acc": 0.7325696, |
| "epoch": 0.5880330123796423, |
| "grad_norm": 1.1622587442398071, |
| "learning_rate": 9.87580622034243e-05, |
| "loss": 0.96467819, |
| "memory(GiB)": 67.71, |
| "step": 855, |
| "train_speed(iter/s)": 0.040397 |
| }, |
| { |
| "acc": 0.74238405, |
| "epoch": 0.5914718019257221, |
| "grad_norm": 1.221163034439087, |
| "learning_rate": 9.873274731038013e-05, |
| "loss": 0.94902515, |
| "memory(GiB)": 67.71, |
| "step": 860, |
| "train_speed(iter/s)": 0.040497 |
| }, |
| { |
| "acc": 0.73676643, |
| "epoch": 0.5949105914718019, |
| "grad_norm": 1.0908128023147583, |
| "learning_rate": 9.87071803231944e-05, |
| "loss": 0.94923353, |
| "memory(GiB)": 67.71, |
| "step": 865, |
| "train_speed(iter/s)": 0.040613 |
| }, |
| { |
| "acc": 0.7285512, |
| "epoch": 0.5983493810178817, |
| "grad_norm": 0.9778567552566528, |
| "learning_rate": 9.868136137413854e-05, |
| "loss": 0.99065866, |
| "memory(GiB)": 67.71, |
| "step": 870, |
| "train_speed(iter/s)": 0.040705 |
| }, |
| { |
| "acc": 0.75390539, |
| "epoch": 0.6017881705639615, |
| "grad_norm": 1.1204711198806763, |
| "learning_rate": 9.865529059678749e-05, |
| "loss": 0.89114456, |
| "memory(GiB)": 67.71, |
| "step": 875, |
| "train_speed(iter/s)": 0.040815 |
| }, |
| { |
| "acc": 0.72871351, |
| "epoch": 0.6052269601100413, |
| "grad_norm": 1.1295973062515259, |
| "learning_rate": 9.8628968126019e-05, |
| "loss": 0.97484636, |
| "memory(GiB)": 67.71, |
| "step": 880, |
| "train_speed(iter/s)": 0.04091 |
| }, |
| { |
| "acc": 0.75279789, |
| "epoch": 0.6086657496561211, |
| "grad_norm": 1.276840090751648, |
| "learning_rate": 9.8602394098013e-05, |
| "loss": 0.9101244, |
| "memory(GiB)": 67.71, |
| "step": 885, |
| "train_speed(iter/s)": 0.041017 |
| }, |
| { |
| "acc": 0.72960396, |
| "epoch": 0.6121045392022009, |
| "grad_norm": 1.1485203504562378, |
| "learning_rate": 9.857556865025087e-05, |
| "loss": 0.9954258, |
| "memory(GiB)": 67.71, |
| "step": 890, |
| "train_speed(iter/s)": 0.041116 |
| }, |
| { |
| "acc": 0.73271265, |
| "epoch": 0.6155433287482807, |
| "grad_norm": 1.2299952507019043, |
| "learning_rate": 9.854849192151468e-05, |
| "loss": 0.97523527, |
| "memory(GiB)": 67.71, |
| "step": 895, |
| "train_speed(iter/s)": 0.041212 |
| }, |
| { |
| "acc": 0.72924538, |
| "epoch": 0.6189821182943603, |
| "grad_norm": 1.1494402885437012, |
| "learning_rate": 9.852116405188648e-05, |
| "loss": 0.98907299, |
| "memory(GiB)": 67.71, |
| "step": 900, |
| "train_speed(iter/s)": 0.041323 |
| }, |
| { |
| "epoch": 0.6189821182943603, |
| "eval_acc": 0.7370371620101678, |
| "eval_loss": 0.9578044414520264, |
| "eval_runtime": 1104.9055, |
| "eval_samples_per_second": 3.876, |
| "eval_steps_per_second": 0.07, |
| "step": 900 |
| }, |
| { |
| "acc": 0.72602391, |
| "epoch": 0.6224209078404401, |
| "grad_norm": 1.0728832483291626, |
| "learning_rate": 9.849358518274771e-05, |
| "loss": 1.01037588, |
| "memory(GiB)": 67.71, |
| "step": 905, |
| "train_speed(iter/s)": 0.039428 |
| }, |
| { |
| "acc": 0.7520565, |
| "epoch": 0.6258596973865199, |
| "grad_norm": 1.0786807537078857, |
| "learning_rate": 9.846575545677823e-05, |
| "loss": 0.92040062, |
| "memory(GiB)": 67.71, |
| "step": 910, |
| "train_speed(iter/s)": 0.039534 |
| }, |
| { |
| "acc": 0.74691858, |
| "epoch": 0.6292984869325997, |
| "grad_norm": 1.0437581539154053, |
| "learning_rate": 9.843767501795583e-05, |
| "loss": 0.9074029, |
| "memory(GiB)": 67.71, |
| "step": 915, |
| "train_speed(iter/s)": 0.039631 |
| }, |
| { |
| "acc": 0.73221941, |
| "epoch": 0.6327372764786795, |
| "grad_norm": 1.1795591115951538, |
| "learning_rate": 9.840934401155528e-05, |
| "loss": 0.988484, |
| "memory(GiB)": 67.71, |
| "step": 920, |
| "train_speed(iter/s)": 0.039722 |
| }, |
| { |
| "acc": 0.72777405, |
| "epoch": 0.6361760660247593, |
| "grad_norm": 1.1894828081130981, |
| "learning_rate": 9.838076258414776e-05, |
| "loss": 1.01051292, |
| "memory(GiB)": 67.71, |
| "step": 925, |
| "train_speed(iter/s)": 0.039818 |
| }, |
| { |
| "acc": 0.75026011, |
| "epoch": 0.6396148555708391, |
| "grad_norm": 0.9834104180335999, |
| "learning_rate": 9.835193088359988e-05, |
| "loss": 0.90967407, |
| "memory(GiB)": 67.71, |
| "step": 930, |
| "train_speed(iter/s)": 0.0399 |
| }, |
| { |
| "acc": 0.74026661, |
| "epoch": 0.6430536451169189, |
| "grad_norm": 1.2417614459991455, |
| "learning_rate": 9.832284905907318e-05, |
| "loss": 0.92580471, |
| "memory(GiB)": 67.71, |
| "step": 935, |
| "train_speed(iter/s)": 0.03999 |
| }, |
| { |
| "acc": 0.73846035, |
| "epoch": 0.6464924346629987, |
| "grad_norm": 1.1710271835327148, |
| "learning_rate": 9.829351726102313e-05, |
| "loss": 0.95107613, |
| "memory(GiB)": 67.71, |
| "step": 940, |
| "train_speed(iter/s)": 0.040081 |
| }, |
| { |
| "acc": 0.7366385, |
| "epoch": 0.6499312242090785, |
| "grad_norm": 1.0618470907211304, |
| "learning_rate": 9.826393564119847e-05, |
| "loss": 0.94500179, |
| "memory(GiB)": 67.71, |
| "step": 945, |
| "train_speed(iter/s)": 0.040159 |
| }, |
| { |
| "acc": 0.74606085, |
| "epoch": 0.6533700137551581, |
| "grad_norm": 1.0151257514953613, |
| "learning_rate": 9.823410435264042e-05, |
| "loss": 0.90975704, |
| "memory(GiB)": 67.71, |
| "step": 950, |
| "train_speed(iter/s)": 0.040245 |
| }, |
| { |
| "acc": 0.73123455, |
| "epoch": 0.6568088033012379, |
| "grad_norm": 1.1929761171340942, |
| "learning_rate": 9.820402354968183e-05, |
| "loss": 0.95826616, |
| "memory(GiB)": 67.71, |
| "step": 955, |
| "train_speed(iter/s)": 0.040333 |
| }, |
| { |
| "acc": 0.73816185, |
| "epoch": 0.6602475928473177, |
| "grad_norm": 1.240237832069397, |
| "learning_rate": 9.817369338794646e-05, |
| "loss": 0.94996367, |
| "memory(GiB)": 67.71, |
| "step": 960, |
| "train_speed(iter/s)": 0.040436 |
| }, |
| { |
| "acc": 0.74816332, |
| "epoch": 0.6636863823933975, |
| "grad_norm": 0.9286736845970154, |
| "learning_rate": 9.81431140243481e-05, |
| "loss": 0.90342827, |
| "memory(GiB)": 67.71, |
| "step": 965, |
| "train_speed(iter/s)": 0.040521 |
| }, |
| { |
| "acc": 0.74362345, |
| "epoch": 0.6671251719394773, |
| "grad_norm": 1.1020361185073853, |
| "learning_rate": 9.811228561708979e-05, |
| "loss": 0.92705402, |
| "memory(GiB)": 67.71, |
| "step": 970, |
| "train_speed(iter/s)": 0.040625 |
| }, |
| { |
| "acc": 0.73400373, |
| "epoch": 0.6705639614855571, |
| "grad_norm": 1.0580672025680542, |
| "learning_rate": 9.808120832566306e-05, |
| "loss": 0.98702965, |
| "memory(GiB)": 67.71, |
| "step": 975, |
| "train_speed(iter/s)": 0.040726 |
| }, |
| { |
| "acc": 0.71981792, |
| "epoch": 0.6740027510316369, |
| "grad_norm": 1.054178237915039, |
| "learning_rate": 9.804988231084695e-05, |
| "loss": 1.02396307, |
| "memory(GiB)": 67.71, |
| "step": 980, |
| "train_speed(iter/s)": 0.040822 |
| }, |
| { |
| "acc": 0.7412406, |
| "epoch": 0.6774415405777167, |
| "grad_norm": 1.064276933670044, |
| "learning_rate": 9.801830773470738e-05, |
| "loss": 0.92902575, |
| "memory(GiB)": 67.71, |
| "step": 985, |
| "train_speed(iter/s)": 0.040901 |
| }, |
| { |
| "acc": 0.73765955, |
| "epoch": 0.6808803301237965, |
| "grad_norm": 1.02224862575531, |
| "learning_rate": 9.798648476059612e-05, |
| "loss": 0.96069899, |
| "memory(GiB)": 67.71, |
| "step": 990, |
| "train_speed(iter/s)": 0.040989 |
| }, |
| { |
| "acc": 0.72184877, |
| "epoch": 0.6843191196698762, |
| "grad_norm": 1.10880446434021, |
| "learning_rate": 9.795441355315009e-05, |
| "loss": 1.00857792, |
| "memory(GiB)": 67.71, |
| "step": 995, |
| "train_speed(iter/s)": 0.041069 |
| }, |
| { |
| "acc": 0.75687084, |
| "epoch": 0.687757909215956, |
| "grad_norm": 1.0748587846755981, |
| "learning_rate": 9.792209427829044e-05, |
| "loss": 0.89921093, |
| "memory(GiB)": 67.71, |
| "step": 1000, |
| "train_speed(iter/s)": 0.041172 |
| }, |
| { |
| "epoch": 0.687757909215956, |
| "eval_acc": 0.739815314707338, |
| "eval_loss": 0.9472519159317017, |
| "eval_runtime": 1138.6289, |
| "eval_samples_per_second": 3.762, |
| "eval_steps_per_second": 0.068, |
| "step": 1000 |
| }, |
| { |
| "acc": 0.74216719, |
| "epoch": 0.6911966987620357, |
| "grad_norm": 0.9975650906562805, |
| "learning_rate": 9.788952710322168e-05, |
| "loss": 0.92038422, |
| "memory(GiB)": 67.71, |
| "step": 1005, |
| "train_speed(iter/s)": 0.039418 |
| }, |
| { |
| "acc": 0.73918667, |
| "epoch": 0.6946354883081155, |
| "grad_norm": 1.167277455329895, |
| "learning_rate": 9.785671219643086e-05, |
| "loss": 0.95244484, |
| "memory(GiB)": 67.71, |
| "step": 1010, |
| "train_speed(iter/s)": 0.039509 |
| }, |
| { |
| "acc": 0.74078741, |
| "epoch": 0.6980742778541953, |
| "grad_norm": 1.1248480081558228, |
| "learning_rate": 9.782364972768667e-05, |
| "loss": 0.95239239, |
| "memory(GiB)": 67.71, |
| "step": 1015, |
| "train_speed(iter/s)": 0.039611 |
| }, |
| { |
| "acc": 0.73078346, |
| "epoch": 0.7015130674002751, |
| "grad_norm": 1.1177655458450317, |
| "learning_rate": 9.779033986803856e-05, |
| "loss": 0.97850962, |
| "memory(GiB)": 67.71, |
| "step": 1020, |
| "train_speed(iter/s)": 0.039689 |
| }, |
| { |
| "acc": 0.72922001, |
| "epoch": 0.7049518569463549, |
| "grad_norm": 1.025723934173584, |
| "learning_rate": 9.775678278981587e-05, |
| "loss": 0.97461071, |
| "memory(GiB)": 67.71, |
| "step": 1025, |
| "train_speed(iter/s)": 0.039762 |
| }, |
| { |
| "acc": 0.74167843, |
| "epoch": 0.7083906464924347, |
| "grad_norm": 1.0677716732025146, |
| "learning_rate": 9.772297866662694e-05, |
| "loss": 0.93668747, |
| "memory(GiB)": 67.71, |
| "step": 1030, |
| "train_speed(iter/s)": 0.039851 |
| }, |
| { |
| "acc": 0.73580718, |
| "epoch": 0.7118294360385145, |
| "grad_norm": 1.071346402168274, |
| "learning_rate": 9.768892767335818e-05, |
| "loss": 0.94455872, |
| "memory(GiB)": 67.71, |
| "step": 1035, |
| "train_speed(iter/s)": 0.03994 |
| }, |
| { |
| "acc": 0.73037386, |
| "epoch": 0.7152682255845942, |
| "grad_norm": 1.1164538860321045, |
| "learning_rate": 9.76546299861732e-05, |
| "loss": 0.96749563, |
| "memory(GiB)": 67.71, |
| "step": 1040, |
| "train_speed(iter/s)": 0.040026 |
| }, |
| { |
| "acc": 0.73348866, |
| "epoch": 0.718707015130674, |
| "grad_norm": 1.016825556755066, |
| "learning_rate": 9.76200857825119e-05, |
| "loss": 0.97527409, |
| "memory(GiB)": 67.71, |
| "step": 1045, |
| "train_speed(iter/s)": 0.040097 |
| }, |
| { |
| "acc": 0.74810896, |
| "epoch": 0.7221458046767538, |
| "grad_norm": 1.0394419431686401, |
| "learning_rate": 9.758529524108952e-05, |
| "loss": 0.91727133, |
| "memory(GiB)": 67.71, |
| "step": 1050, |
| "train_speed(iter/s)": 0.040188 |
| }, |
| { |
| "acc": 0.7377789, |
| "epoch": 0.7255845942228336, |
| "grad_norm": 1.2021335363388062, |
| "learning_rate": 9.755025854189574e-05, |
| "loss": 0.96904411, |
| "memory(GiB)": 67.71, |
| "step": 1055, |
| "train_speed(iter/s)": 0.040268 |
| }, |
| { |
| "acc": 0.7272027, |
| "epoch": 0.7290233837689133, |
| "grad_norm": 1.012821912765503, |
| "learning_rate": 9.751497586619374e-05, |
| "loss": 0.9858429, |
| "memory(GiB)": 67.71, |
| "step": 1060, |
| "train_speed(iter/s)": 0.040336 |
| }, |
| { |
| "acc": 0.73458595, |
| "epoch": 0.7324621733149931, |
| "grad_norm": 1.1023552417755127, |
| "learning_rate": 9.747944739651928e-05, |
| "loss": 0.95475712, |
| "memory(GiB)": 67.71, |
| "step": 1065, |
| "train_speed(iter/s)": 0.04042 |
| }, |
| { |
| "acc": 0.74384351, |
| "epoch": 0.7359009628610729, |
| "grad_norm": 1.1670334339141846, |
| "learning_rate": 9.744367331667972e-05, |
| "loss": 0.90986481, |
| "memory(GiB)": 67.71, |
| "step": 1070, |
| "train_speed(iter/s)": 0.040504 |
| }, |
| { |
| "acc": 0.74164953, |
| "epoch": 0.7393397524071527, |
| "grad_norm": 1.1003512144088745, |
| "learning_rate": 9.740765381175308e-05, |
| "loss": 0.95252619, |
| "memory(GiB)": 67.71, |
| "step": 1075, |
| "train_speed(iter/s)": 0.040585 |
| }, |
| { |
| "acc": 0.72832394, |
| "epoch": 0.7427785419532325, |
| "grad_norm": 1.141493320465088, |
| "learning_rate": 9.737138906808716e-05, |
| "loss": 0.9896246, |
| "memory(GiB)": 67.71, |
| "step": 1080, |
| "train_speed(iter/s)": 0.040669 |
| }, |
| { |
| "acc": 0.75672712, |
| "epoch": 0.7462173314993122, |
| "grad_norm": 1.0548261404037476, |
| "learning_rate": 9.733487927329842e-05, |
| "loss": 0.89013748, |
| "memory(GiB)": 67.71, |
| "step": 1085, |
| "train_speed(iter/s)": 0.040758 |
| }, |
| { |
| "acc": 0.74028645, |
| "epoch": 0.749656121045392, |
| "grad_norm": 1.058765172958374, |
| "learning_rate": 9.729812461627116e-05, |
| "loss": 0.9446207, |
| "memory(GiB)": 67.71, |
| "step": 1090, |
| "train_speed(iter/s)": 0.040824 |
| }, |
| { |
| "acc": 0.74290891, |
| "epoch": 0.7530949105914718, |
| "grad_norm": 1.0750882625579834, |
| "learning_rate": 9.726112528715645e-05, |
| "loss": 0.93429804, |
| "memory(GiB)": 67.71, |
| "step": 1095, |
| "train_speed(iter/s)": 0.040904 |
| }, |
| { |
| "acc": 0.74171824, |
| "epoch": 0.7565337001375516, |
| "grad_norm": 1.1232870817184448, |
| "learning_rate": 9.722388147737117e-05, |
| "loss": 0.9356823, |
| "memory(GiB)": 67.71, |
| "step": 1100, |
| "train_speed(iter/s)": 0.040982 |
| }, |
| { |
| "epoch": 0.7565337001375516, |
| "eval_acc": 0.7418005128897287, |
| "eval_loss": 0.9385226964950562, |
| "eval_runtime": 1120.0428, |
| "eval_samples_per_second": 3.824, |
| "eval_steps_per_second": 0.069, |
| "step": 1100 |
| }, |
| { |
| "acc": 0.73683257, |
| "epoch": 0.7599724896836314, |
| "grad_norm": 1.0508232116699219, |
| "learning_rate": 9.718639337959709e-05, |
| "loss": 0.95805416, |
| "memory(GiB)": 67.71, |
| "step": 1105, |
| "train_speed(iter/s)": 0.03942 |
| }, |
| { |
| "acc": 0.74061327, |
| "epoch": 0.7634112792297112, |
| "grad_norm": 1.0770542621612549, |
| "learning_rate": 9.714866118777971e-05, |
| "loss": 0.92782459, |
| "memory(GiB)": 67.71, |
| "step": 1110, |
| "train_speed(iter/s)": 0.039509 |
| }, |
| { |
| "acc": 0.72901726, |
| "epoch": 0.7668500687757909, |
| "grad_norm": 1.104008674621582, |
| "learning_rate": 9.711068509712744e-05, |
| "loss": 0.99297533, |
| "memory(GiB)": 67.71, |
| "step": 1115, |
| "train_speed(iter/s)": 0.039593 |
| }, |
| { |
| "acc": 0.73747034, |
| "epoch": 0.7702888583218707, |
| "grad_norm": 1.1213022470474243, |
| "learning_rate": 9.707246530411045e-05, |
| "loss": 0.96422043, |
| "memory(GiB)": 67.71, |
| "step": 1120, |
| "train_speed(iter/s)": 0.039669 |
| }, |
| { |
| "acc": 0.75132704, |
| "epoch": 0.7737276478679505, |
| "grad_norm": 0.9887475967407227, |
| "learning_rate": 9.703400200645976e-05, |
| "loss": 0.90485935, |
| "memory(GiB)": 67.71, |
| "step": 1125, |
| "train_speed(iter/s)": 0.039747 |
| }, |
| { |
| "acc": 0.74963489, |
| "epoch": 0.7771664374140302, |
| "grad_norm": 1.105952501296997, |
| "learning_rate": 9.69952954031661e-05, |
| "loss": 0.89224911, |
| "memory(GiB)": 67.71, |
| "step": 1130, |
| "train_speed(iter/s)": 0.039821 |
| }, |
| { |
| "acc": 0.73749495, |
| "epoch": 0.78060522696011, |
| "grad_norm": 1.140572428703308, |
| "learning_rate": 9.695634569447904e-05, |
| "loss": 0.9487175, |
| "memory(GiB)": 67.71, |
| "step": 1135, |
| "train_speed(iter/s)": 0.039888 |
| }, |
| { |
| "acc": 0.7312088, |
| "epoch": 0.7840440165061898, |
| "grad_norm": 1.1275548934936523, |
| "learning_rate": 9.691715308190576e-05, |
| "loss": 0.96534138, |
| "memory(GiB)": 67.71, |
| "step": 1140, |
| "train_speed(iter/s)": 0.039959 |
| }, |
| { |
| "acc": 0.73048372, |
| "epoch": 0.7874828060522696, |
| "grad_norm": 1.0315409898757935, |
| "learning_rate": 9.68777177682102e-05, |
| "loss": 0.97743053, |
| "memory(GiB)": 67.71, |
| "step": 1145, |
| "train_speed(iter/s)": 0.040036 |
| }, |
| { |
| "acc": 0.72779579, |
| "epoch": 0.7909215955983494, |
| "grad_norm": 0.9368631839752197, |
| "learning_rate": 9.683803995741186e-05, |
| "loss": 0.98139448, |
| "memory(GiB)": 67.71, |
| "step": 1150, |
| "train_speed(iter/s)": 0.040117 |
| }, |
| { |
| "acc": 0.74332333, |
| "epoch": 0.7943603851444292, |
| "grad_norm": 1.1103096008300781, |
| "learning_rate": 9.679811985478483e-05, |
| "loss": 0.9456337, |
| "memory(GiB)": 67.71, |
| "step": 1155, |
| "train_speed(iter/s)": 0.040199 |
| }, |
| { |
| "acc": 0.73605175, |
| "epoch": 0.797799174690509, |
| "grad_norm": 0.9985005259513855, |
| "learning_rate": 9.675795766685669e-05, |
| "loss": 0.94118538, |
| "memory(GiB)": 67.71, |
| "step": 1160, |
| "train_speed(iter/s)": 0.040265 |
| }, |
| { |
| "acc": 0.7368608, |
| "epoch": 0.8012379642365888, |
| "grad_norm": 1.086758017539978, |
| "learning_rate": 9.671755360140746e-05, |
| "loss": 0.94844141, |
| "memory(GiB)": 67.71, |
| "step": 1165, |
| "train_speed(iter/s)": 0.040343 |
| }, |
| { |
| "acc": 0.74641371, |
| "epoch": 0.8046767537826685, |
| "grad_norm": 0.9669944643974304, |
| "learning_rate": 9.667690786746852e-05, |
| "loss": 0.91691303, |
| "memory(GiB)": 67.71, |
| "step": 1170, |
| "train_speed(iter/s)": 0.040416 |
| }, |
| { |
| "acc": 0.74651995, |
| "epoch": 0.8081155433287482, |
| "grad_norm": 1.0597587823867798, |
| "learning_rate": 9.663602067532151e-05, |
| "loss": 0.91813259, |
| "memory(GiB)": 67.71, |
| "step": 1175, |
| "train_speed(iter/s)": 0.040502 |
| }, |
| { |
| "acc": 0.73420897, |
| "epoch": 0.811554332874828, |
| "grad_norm": 1.1257351636886597, |
| "learning_rate": 9.659489223649731e-05, |
| "loss": 0.97081699, |
| "memory(GiB)": 67.71, |
| "step": 1180, |
| "train_speed(iter/s)": 0.040572 |
| }, |
| { |
| "acc": 0.76278071, |
| "epoch": 0.8149931224209078, |
| "grad_norm": 1.0399693250656128, |
| "learning_rate": 9.655352276377484e-05, |
| "loss": 0.85249825, |
| "memory(GiB)": 67.71, |
| "step": 1185, |
| "train_speed(iter/s)": 0.040662 |
| }, |
| { |
| "acc": 0.74080434, |
| "epoch": 0.8184319119669876, |
| "grad_norm": 1.104978322982788, |
| "learning_rate": 9.651191247118003e-05, |
| "loss": 0.93528318, |
| "memory(GiB)": 67.71, |
| "step": 1190, |
| "train_speed(iter/s)": 0.040735 |
| }, |
| { |
| "acc": 0.7570159, |
| "epoch": 0.8218707015130674, |
| "grad_norm": 1.1633975505828857, |
| "learning_rate": 9.647006157398471e-05, |
| "loss": 0.8937582, |
| "memory(GiB)": 67.71, |
| "step": 1195, |
| "train_speed(iter/s)": 0.040819 |
| }, |
| { |
| "acc": 0.75301266, |
| "epoch": 0.8253094910591472, |
| "grad_norm": 1.0719282627105713, |
| "learning_rate": 9.642797028870549e-05, |
| "loss": 0.87665348, |
| "memory(GiB)": 67.71, |
| "step": 1200, |
| "train_speed(iter/s)": 0.040893 |
| }, |
| { |
| "epoch": 0.8253094910591472, |
| "eval_acc": 0.7415305709272506, |
| "eval_loss": 0.9332711100578308, |
| "eval_runtime": 1102.5228, |
| "eval_samples_per_second": 3.885, |
| "eval_steps_per_second": 0.07, |
| "step": 1200 |
| }, |
| { |
| "acc": 0.73832102, |
| "epoch": 0.828748280605227, |
| "grad_norm": 1.1222566366195679, |
| "learning_rate": 9.63856388331026e-05, |
| "loss": 0.92794905, |
| "memory(GiB)": 67.71, |
| "step": 1205, |
| "train_speed(iter/s)": 0.039487 |
| }, |
| { |
| "acc": 0.74549799, |
| "epoch": 0.8321870701513068, |
| "grad_norm": 1.0469160079956055, |
| "learning_rate": 9.634306742617881e-05, |
| "loss": 0.91989012, |
| "memory(GiB)": 67.71, |
| "step": 1210, |
| "train_speed(iter/s)": 0.039561 |
| }, |
| { |
| "acc": 0.73400946, |
| "epoch": 0.8356258596973866, |
| "grad_norm": 1.1092973947525024, |
| "learning_rate": 9.630025628817833e-05, |
| "loss": 0.96797295, |
| "memory(GiB)": 67.71, |
| "step": 1215, |
| "train_speed(iter/s)": 0.039633 |
| }, |
| { |
| "acc": 0.74465179, |
| "epoch": 0.8390646492434664, |
| "grad_norm": 1.0476914644241333, |
| "learning_rate": 9.625720564058553e-05, |
| "loss": 0.9328536, |
| "memory(GiB)": 67.71, |
| "step": 1220, |
| "train_speed(iter/s)": 0.039708 |
| }, |
| { |
| "acc": 0.7472661, |
| "epoch": 0.842503438789546, |
| "grad_norm": 1.002954363822937, |
| "learning_rate": 9.6213915706124e-05, |
| "loss": 0.90329132, |
| "memory(GiB)": 67.71, |
| "step": 1225, |
| "train_speed(iter/s)": 0.039779 |
| }, |
| { |
| "acc": 0.73477154, |
| "epoch": 0.8459422283356258, |
| "grad_norm": 1.1124727725982666, |
| "learning_rate": 9.617038670875518e-05, |
| "loss": 0.97662973, |
| "memory(GiB)": 67.71, |
| "step": 1230, |
| "train_speed(iter/s)": 0.039851 |
| }, |
| { |
| "acc": 0.7584034, |
| "epoch": 0.8493810178817056, |
| "grad_norm": 0.9963657855987549, |
| "learning_rate": 9.612661887367738e-05, |
| "loss": 0.87994137, |
| "memory(GiB)": 67.71, |
| "step": 1235, |
| "train_speed(iter/s)": 0.03993 |
| }, |
| { |
| "acc": 0.74326572, |
| "epoch": 0.8528198074277854, |
| "grad_norm": 1.0891412496566772, |
| "learning_rate": 9.608261242732453e-05, |
| "loss": 0.92397137, |
| "memory(GiB)": 67.71, |
| "step": 1240, |
| "train_speed(iter/s)": 0.039998 |
| }, |
| { |
| "acc": 0.74097652, |
| "epoch": 0.8562585969738652, |
| "grad_norm": 0.9601296186447144, |
| "learning_rate": 9.603836759736501e-05, |
| "loss": 0.92762499, |
| "memory(GiB)": 67.71, |
| "step": 1245, |
| "train_speed(iter/s)": 0.040067 |
| }, |
| { |
| "acc": 0.74741158, |
| "epoch": 0.859697386519945, |
| "grad_norm": 1.0737489461898804, |
| "learning_rate": 9.599388461270046e-05, |
| "loss": 0.89353437, |
| "memory(GiB)": 67.71, |
| "step": 1250, |
| "train_speed(iter/s)": 0.040137 |
| }, |
| { |
| "acc": 0.7489872, |
| "epoch": 0.8631361760660248, |
| "grad_norm": 0.8971010446548462, |
| "learning_rate": 9.594916370346464e-05, |
| "loss": 0.9029624, |
| "memory(GiB)": 67.71, |
| "step": 1255, |
| "train_speed(iter/s)": 0.040204 |
| }, |
| { |
| "acc": 0.76023664, |
| "epoch": 0.8665749656121046, |
| "grad_norm": 1.1241250038146973, |
| "learning_rate": 9.590420510102226e-05, |
| "loss": 0.87794628, |
| "memory(GiB)": 67.71, |
| "step": 1260, |
| "train_speed(iter/s)": 0.040279 |
| }, |
| { |
| "acc": 0.73114996, |
| "epoch": 0.8700137551581844, |
| "grad_norm": 1.2173177003860474, |
| "learning_rate": 9.585900903796766e-05, |
| "loss": 0.97089109, |
| "memory(GiB)": 67.71, |
| "step": 1265, |
| "train_speed(iter/s)": 0.040357 |
| }, |
| { |
| "acc": 0.74563522, |
| "epoch": 0.8734525447042641, |
| "grad_norm": 1.1517419815063477, |
| "learning_rate": 9.581357574812375e-05, |
| "loss": 0.89781647, |
| "memory(GiB)": 67.71, |
| "step": 1270, |
| "train_speed(iter/s)": 0.040428 |
| }, |
| { |
| "acc": 0.77028093, |
| "epoch": 0.8768913342503438, |
| "grad_norm": 1.0377655029296875, |
| "learning_rate": 9.576790546654071e-05, |
| "loss": 0.85278912, |
| "memory(GiB)": 67.71, |
| "step": 1275, |
| "train_speed(iter/s)": 0.040497 |
| }, |
| { |
| "acc": 0.73771534, |
| "epoch": 0.8803301237964236, |
| "grad_norm": 1.3595341444015503, |
| "learning_rate": 9.572199842949484e-05, |
| "loss": 0.94212608, |
| "memory(GiB)": 67.71, |
| "step": 1280, |
| "train_speed(iter/s)": 0.04057 |
| }, |
| { |
| "acc": 0.73818164, |
| "epoch": 0.8837689133425034, |
| "grad_norm": 1.0683890581130981, |
| "learning_rate": 9.567585487448723e-05, |
| "loss": 0.94818478, |
| "memory(GiB)": 67.71, |
| "step": 1285, |
| "train_speed(iter/s)": 0.040642 |
| }, |
| { |
| "acc": 0.73784003, |
| "epoch": 0.8872077028885832, |
| "grad_norm": 1.0474903583526611, |
| "learning_rate": 9.562947504024267e-05, |
| "loss": 0.93362265, |
| "memory(GiB)": 67.71, |
| "step": 1290, |
| "train_speed(iter/s)": 0.040707 |
| }, |
| { |
| "acc": 0.73629189, |
| "epoch": 0.890646492434663, |
| "grad_norm": 0.9906838536262512, |
| "learning_rate": 9.558285916670833e-05, |
| "loss": 0.96513948, |
| "memory(GiB)": 67.71, |
| "step": 1295, |
| "train_speed(iter/s)": 0.04077 |
| }, |
| { |
| "acc": 0.74767346, |
| "epoch": 0.8940852819807428, |
| "grad_norm": 0.937610924243927, |
| "learning_rate": 9.553600749505249e-05, |
| "loss": 0.91039581, |
| "memory(GiB)": 67.71, |
| "step": 1300, |
| "train_speed(iter/s)": 0.040841 |
| }, |
| { |
| "epoch": 0.8940852819807428, |
| "eval_acc": 0.7442806046699959, |
| "eval_loss": 0.9251495003700256, |
| "eval_runtime": 1163.3237, |
| "eval_samples_per_second": 3.682, |
| "eval_steps_per_second": 0.066, |
| "step": 1300 |
| }, |
| { |
| "acc": 0.75481806, |
| "epoch": 0.8975240715268226, |
| "grad_norm": 1.093235969543457, |
| "learning_rate": 9.548892026766336e-05, |
| "loss": 0.89359856, |
| "memory(GiB)": 67.71, |
| "step": 1305, |
| "train_speed(iter/s)": 0.039469 |
| }, |
| { |
| "acc": 0.7461483, |
| "epoch": 0.9009628610729024, |
| "grad_norm": 1.1600829362869263, |
| "learning_rate": 9.544159772814784e-05, |
| "loss": 0.92318993, |
| "memory(GiB)": 67.71, |
| "step": 1310, |
| "train_speed(iter/s)": 0.039531 |
| }, |
| { |
| "acc": 0.74466972, |
| "epoch": 0.9044016506189821, |
| "grad_norm": 1.0172392129898071, |
| "learning_rate": 9.539404012133022e-05, |
| "loss": 0.92656469, |
| "memory(GiB)": 67.71, |
| "step": 1315, |
| "train_speed(iter/s)": 0.039601 |
| }, |
| { |
| "acc": 0.74887996, |
| "epoch": 0.9078404401650619, |
| "grad_norm": 1.0804096460342407, |
| "learning_rate": 9.534624769325086e-05, |
| "loss": 0.9098074, |
| "memory(GiB)": 67.71, |
| "step": 1320, |
| "train_speed(iter/s)": 0.039666 |
| }, |
| { |
| "acc": 0.74121346, |
| "epoch": 0.9112792297111417, |
| "grad_norm": 0.9664003849029541, |
| "learning_rate": 9.529822069116499e-05, |
| "loss": 0.9316514, |
| "memory(GiB)": 67.71, |
| "step": 1325, |
| "train_speed(iter/s)": 0.039723 |
| }, |
| { |
| "acc": 0.75105286, |
| "epoch": 0.9147180192572214, |
| "grad_norm": 0.9869258999824524, |
| "learning_rate": 9.524995936354147e-05, |
| "loss": 0.88554888, |
| "memory(GiB)": 67.71, |
| "step": 1330, |
| "train_speed(iter/s)": 0.039787 |
| }, |
| { |
| "acc": 0.73669834, |
| "epoch": 0.9181568088033012, |
| "grad_norm": 1.0221189260482788, |
| "learning_rate": 9.520146396006138e-05, |
| "loss": 0.96149244, |
| "memory(GiB)": 67.71, |
| "step": 1335, |
| "train_speed(iter/s)": 0.03984 |
| }, |
| { |
| "acc": 0.73676386, |
| "epoch": 0.921595598349381, |
| "grad_norm": 1.1528207063674927, |
| "learning_rate": 9.515273473161683e-05, |
| "loss": 0.96016941, |
| "memory(GiB)": 67.71, |
| "step": 1340, |
| "train_speed(iter/s)": 0.039908 |
| }, |
| { |
| "acc": 0.74311166, |
| "epoch": 0.9250343878954608, |
| "grad_norm": 1.161664366722107, |
| "learning_rate": 9.510377193030963e-05, |
| "loss": 0.93340931, |
| "memory(GiB)": 67.71, |
| "step": 1345, |
| "train_speed(iter/s)": 0.039969 |
| }, |
| { |
| "acc": 0.7412838, |
| "epoch": 0.9284731774415406, |
| "grad_norm": 1.1524734497070312, |
| "learning_rate": 9.505457580944998e-05, |
| "loss": 0.94830551, |
| "memory(GiB)": 67.71, |
| "step": 1350, |
| "train_speed(iter/s)": 0.040033 |
| }, |
| { |
| "acc": 0.7353076, |
| "epoch": 0.9319119669876204, |
| "grad_norm": 0.991431474685669, |
| "learning_rate": 9.500514662355515e-05, |
| "loss": 0.94869709, |
| "memory(GiB)": 67.71, |
| "step": 1355, |
| "train_speed(iter/s)": 0.040099 |
| }, |
| { |
| "acc": 0.74592419, |
| "epoch": 0.9353507565337001, |
| "grad_norm": 0.9828781485557556, |
| "learning_rate": 9.495548462834822e-05, |
| "loss": 0.91297379, |
| "memory(GiB)": 67.71, |
| "step": 1360, |
| "train_speed(iter/s)": 0.040166 |
| }, |
| { |
| "acc": 0.72816801, |
| "epoch": 0.9387895460797799, |
| "grad_norm": 0.9917466640472412, |
| "learning_rate": 9.490559008075665e-05, |
| "loss": 0.97318478, |
| "memory(GiB)": 67.71, |
| "step": 1365, |
| "train_speed(iter/s)": 0.040224 |
| }, |
| { |
| "acc": 0.75423832, |
| "epoch": 0.9422283356258597, |
| "grad_norm": 1.021081566810608, |
| "learning_rate": 9.485546323891107e-05, |
| "loss": 0.88315115, |
| "memory(GiB)": 67.71, |
| "step": 1370, |
| "train_speed(iter/s)": 0.040286 |
| }, |
| { |
| "acc": 0.74885693, |
| "epoch": 0.9456671251719395, |
| "grad_norm": 1.1856554746627808, |
| "learning_rate": 9.480510436214387e-05, |
| "loss": 0.91739559, |
| "memory(GiB)": 67.71, |
| "step": 1375, |
| "train_speed(iter/s)": 0.040345 |
| }, |
| { |
| "acc": 0.7300355, |
| "epoch": 0.9491059147180193, |
| "grad_norm": 0.9983332753181458, |
| "learning_rate": 9.475451371098787e-05, |
| "loss": 0.96374044, |
| "memory(GiB)": 67.71, |
| "step": 1380, |
| "train_speed(iter/s)": 0.040398 |
| }, |
| { |
| "acc": 0.76302462, |
| "epoch": 0.952544704264099, |
| "grad_norm": 1.0011341571807861, |
| "learning_rate": 9.470369154717498e-05, |
| "loss": 0.86735439, |
| "memory(GiB)": 67.71, |
| "step": 1385, |
| "train_speed(iter/s)": 0.040463 |
| }, |
| { |
| "acc": 0.74582882, |
| "epoch": 0.9559834938101788, |
| "grad_norm": 1.051133394241333, |
| "learning_rate": 9.465263813363488e-05, |
| "loss": 0.90945959, |
| "memory(GiB)": 67.71, |
| "step": 1390, |
| "train_speed(iter/s)": 0.040529 |
| }, |
| { |
| "acc": 0.74545488, |
| "epoch": 0.9594222833562586, |
| "grad_norm": 1.0635967254638672, |
| "learning_rate": 9.460135373449359e-05, |
| "loss": 0.92096958, |
| "memory(GiB)": 67.71, |
| "step": 1395, |
| "train_speed(iter/s)": 0.0406 |
| }, |
| { |
| "acc": 0.7286293, |
| "epoch": 0.9628610729023384, |
| "grad_norm": 1.146252155303955, |
| "learning_rate": 9.454983861507213e-05, |
| "loss": 0.98950424, |
| "memory(GiB)": 67.71, |
| "step": 1400, |
| "train_speed(iter/s)": 0.040657 |
| }, |
| { |
| "epoch": 0.9628610729023384, |
| "eval_acc": 0.7448486075493769, |
| "eval_loss": 0.9200888872146606, |
| "eval_runtime": 1140.3609, |
| "eval_samples_per_second": 3.756, |
| "eval_steps_per_second": 0.068, |
| "step": 1400 |
| }, |
| { |
| "acc": 0.73903141, |
| "epoch": 0.9662998624484181, |
| "grad_norm": 1.1544698476791382, |
| "learning_rate": 9.44980930418852e-05, |
| "loss": 0.94653835, |
| "memory(GiB)": 67.71, |
| "step": 1405, |
| "train_speed(iter/s)": 0.039418 |
| }, |
| { |
| "acc": 0.73803802, |
| "epoch": 0.9697386519944979, |
| "grad_norm": 1.1627522706985474, |
| "learning_rate": 9.444611728263972e-05, |
| "loss": 0.92657709, |
| "memory(GiB)": 67.71, |
| "step": 1410, |
| "train_speed(iter/s)": 0.039486 |
| }, |
| { |
| "acc": 0.74716005, |
| "epoch": 0.9731774415405777, |
| "grad_norm": 1.0238428115844727, |
| "learning_rate": 9.439391160623352e-05, |
| "loss": 0.91622248, |
| "memory(GiB)": 67.71, |
| "step": 1415, |
| "train_speed(iter/s)": 0.039543 |
| }, |
| { |
| "acc": 0.72584734, |
| "epoch": 0.9766162310866575, |
| "grad_norm": 1.0079649686813354, |
| "learning_rate": 9.434147628275387e-05, |
| "loss": 0.99349623, |
| "memory(GiB)": 67.71, |
| "step": 1420, |
| "train_speed(iter/s)": 0.039598 |
| }, |
| { |
| "acc": 0.74595861, |
| "epoch": 0.9800550206327373, |
| "grad_norm": 0.93181973695755, |
| "learning_rate": 9.428881158347614e-05, |
| "loss": 0.90428505, |
| "memory(GiB)": 67.71, |
| "step": 1425, |
| "train_speed(iter/s)": 0.039658 |
| }, |
| { |
| "acc": 0.74024305, |
| "epoch": 0.9834938101788171, |
| "grad_norm": 1.0997734069824219, |
| "learning_rate": 9.42359177808624e-05, |
| "loss": 0.92796974, |
| "memory(GiB)": 67.71, |
| "step": 1430, |
| "train_speed(iter/s)": 0.039723 |
| }, |
| { |
| "acc": 0.75488276, |
| "epoch": 0.9869325997248969, |
| "grad_norm": 1.075714111328125, |
| "learning_rate": 9.418279514855995e-05, |
| "loss": 0.88083801, |
| "memory(GiB)": 67.71, |
| "step": 1435, |
| "train_speed(iter/s)": 0.039792 |
| }, |
| { |
| "acc": 0.75168705, |
| "epoch": 0.9903713892709766, |
| "grad_norm": 1.039860486984253, |
| "learning_rate": 9.412944396139998e-05, |
| "loss": 0.89997187, |
| "memory(GiB)": 67.71, |
| "step": 1440, |
| "train_speed(iter/s)": 0.039851 |
| }, |
| { |
| "acc": 0.74686685, |
| "epoch": 0.9938101788170564, |
| "grad_norm": 0.9597694277763367, |
| "learning_rate": 9.407586449539616e-05, |
| "loss": 0.90008278, |
| "memory(GiB)": 67.71, |
| "step": 1445, |
| "train_speed(iter/s)": 0.039911 |
| }, |
| { |
| "acc": 0.74714336, |
| "epoch": 0.9972489683631361, |
| "grad_norm": 1.0538160800933838, |
| "learning_rate": 9.402205702774304e-05, |
| "loss": 0.89391537, |
| "memory(GiB)": 67.71, |
| "step": 1450, |
| "train_speed(iter/s)": 0.039977 |
| }, |
| { |
| "acc": 0.74310694, |
| "epoch": 1.000687757909216, |
| "grad_norm": 1.170095443725586, |
| "learning_rate": 9.396802183681483e-05, |
| "loss": 0.9227499, |
| "memory(GiB)": 67.71, |
| "step": 1455, |
| "train_speed(iter/s)": 0.039992 |
| }, |
| { |
| "acc": 0.75078964, |
| "epoch": 1.0041265474552958, |
| "grad_norm": 0.9855571985244751, |
| "learning_rate": 9.391375920216388e-05, |
| "loss": 0.86350327, |
| "memory(GiB)": 67.71, |
| "step": 1460, |
| "train_speed(iter/s)": 0.040045 |
| }, |
| { |
| "acc": 0.76349792, |
| "epoch": 1.0075653370013755, |
| "grad_norm": 1.0560338497161865, |
| "learning_rate": 9.38592694045192e-05, |
| "loss": 0.8352499, |
| "memory(GiB)": 67.71, |
| "step": 1465, |
| "train_speed(iter/s)": 0.040102 |
| }, |
| { |
| "acc": 0.75685053, |
| "epoch": 1.0110041265474552, |
| "grad_norm": 1.2401360273361206, |
| "learning_rate": 9.380455272578501e-05, |
| "loss": 0.86346865, |
| "memory(GiB)": 67.71, |
| "step": 1470, |
| "train_speed(iter/s)": 0.040151 |
| }, |
| { |
| "acc": 0.74811668, |
| "epoch": 1.014442916093535, |
| "grad_norm": 1.0885376930236816, |
| "learning_rate": 9.374960944903933e-05, |
| "loss": 0.90040436, |
| "memory(GiB)": 67.71, |
| "step": 1475, |
| "train_speed(iter/s)": 0.04021 |
| }, |
| { |
| "acc": 0.76131544, |
| "epoch": 1.0178817056396148, |
| "grad_norm": 1.10344660282135, |
| "learning_rate": 9.36944398585325e-05, |
| "loss": 0.84312658, |
| "memory(GiB)": 67.71, |
| "step": 1480, |
| "train_speed(iter/s)": 0.040266 |
| }, |
| { |
| "acc": 0.76296844, |
| "epoch": 1.0213204951856947, |
| "grad_norm": 0.9839646816253662, |
| "learning_rate": 9.36390442396857e-05, |
| "loss": 0.83021584, |
| "memory(GiB)": 67.71, |
| "step": 1485, |
| "train_speed(iter/s)": 0.040327 |
| }, |
| { |
| "acc": 0.75224285, |
| "epoch": 1.0247592847317744, |
| "grad_norm": 1.2059285640716553, |
| "learning_rate": 9.358342287908944e-05, |
| "loss": 0.90571365, |
| "memory(GiB)": 67.71, |
| "step": 1490, |
| "train_speed(iter/s)": 0.040386 |
| }, |
| { |
| "acc": 0.75849729, |
| "epoch": 1.0281980742778543, |
| "grad_norm": 1.134822130203247, |
| "learning_rate": 9.352757606450213e-05, |
| "loss": 0.86388903, |
| "memory(GiB)": 67.71, |
| "step": 1495, |
| "train_speed(iter/s)": 0.040446 |
| }, |
| { |
| "acc": 0.757271, |
| "epoch": 1.031636863823934, |
| "grad_norm": 1.153064489364624, |
| "learning_rate": 9.34715040848486e-05, |
| "loss": 0.86167965, |
| "memory(GiB)": 67.71, |
| "step": 1500, |
| "train_speed(iter/s)": 0.040505 |
| }, |
| { |
| "epoch": 1.031636863823934, |
| "eval_acc": 0.7464288927880506, |
| "eval_loss": 0.9147906303405762, |
| "eval_runtime": 1155.4137, |
| "eval_samples_per_second": 3.707, |
| "eval_steps_per_second": 0.067, |
| "step": 1500 |
| }, |
| { |
| "acc": 0.74669247, |
| "epoch": 1.0350756533700138, |
| "grad_norm": 1.0673768520355225, |
| "learning_rate": 9.341520723021853e-05, |
| "loss": 0.8943819, |
| "memory(GiB)": 67.71, |
| "step": 1505, |
| "train_speed(iter/s)": 0.03933 |
| }, |
| { |
| "acc": 0.76012006, |
| "epoch": 1.0385144429160935, |
| "grad_norm": 1.1268237829208374, |
| "learning_rate": 9.3358685791865e-05, |
| "loss": 0.82661228, |
| "memory(GiB)": 67.71, |
| "step": 1510, |
| "train_speed(iter/s)": 0.039388 |
| }, |
| { |
| "acc": 0.75681725, |
| "epoch": 1.0419532324621734, |
| "grad_norm": 0.9775263667106628, |
| "learning_rate": 9.330194006220301e-05, |
| "loss": 0.85321465, |
| "memory(GiB)": 67.71, |
| "step": 1515, |
| "train_speed(iter/s)": 0.039438 |
| }, |
| { |
| "acc": 0.75653033, |
| "epoch": 1.045392022008253, |
| "grad_norm": 1.0308629274368286, |
| "learning_rate": 9.324497033480792e-05, |
| "loss": 0.85134258, |
| "memory(GiB)": 67.71, |
| "step": 1520, |
| "train_speed(iter/s)": 0.039499 |
| }, |
| { |
| "acc": 0.75386848, |
| "epoch": 1.0488308115543328, |
| "grad_norm": 1.1134449243545532, |
| "learning_rate": 9.318777690441397e-05, |
| "loss": 0.87145538, |
| "memory(GiB)": 67.71, |
| "step": 1525, |
| "train_speed(iter/s)": 0.039557 |
| }, |
| { |
| "acc": 0.75666504, |
| "epoch": 1.0522696011004127, |
| "grad_norm": 1.0742757320404053, |
| "learning_rate": 9.31303600669127e-05, |
| "loss": 0.86683607, |
| "memory(GiB)": 67.71, |
| "step": 1530, |
| "train_speed(iter/s)": 0.03961 |
| }, |
| { |
| "acc": 0.75246024, |
| "epoch": 1.0557083906464924, |
| "grad_norm": 1.33464515209198, |
| "learning_rate": 9.30727201193514e-05, |
| "loss": 0.85711727, |
| "memory(GiB)": 67.71, |
| "step": 1535, |
| "train_speed(iter/s)": 0.039666 |
| }, |
| { |
| "acc": 0.7529563, |
| "epoch": 1.0591471801925723, |
| "grad_norm": 1.165124535560608, |
| "learning_rate": 9.301485735993179e-05, |
| "loss": 0.86484051, |
| "memory(GiB)": 67.71, |
| "step": 1540, |
| "train_speed(iter/s)": 0.039727 |
| }, |
| { |
| "acc": 0.76439376, |
| "epoch": 1.062585969738652, |
| "grad_norm": 0.9591624736785889, |
| "learning_rate": 9.295677208800816e-05, |
| "loss": 0.82861805, |
| "memory(GiB)": 67.71, |
| "step": 1545, |
| "train_speed(iter/s)": 0.039774 |
| }, |
| { |
| "acc": 0.75873203, |
| "epoch": 1.0660247592847318, |
| "grad_norm": 1.1774530410766602, |
| "learning_rate": 9.289846460408602e-05, |
| "loss": 0.85537472, |
| "memory(GiB)": 67.71, |
| "step": 1550, |
| "train_speed(iter/s)": 0.039836 |
| }, |
| { |
| "acc": 0.75072222, |
| "epoch": 1.0694635488308115, |
| "grad_norm": 1.0823148488998413, |
| "learning_rate": 9.283993520982051e-05, |
| "loss": 0.87792244, |
| "memory(GiB)": 67.71, |
| "step": 1555, |
| "train_speed(iter/s)": 0.039888 |
| }, |
| { |
| "acc": 0.755092, |
| "epoch": 1.0729023383768914, |
| "grad_norm": 1.084096908569336, |
| "learning_rate": 9.278118420801481e-05, |
| "loss": 0.8602149, |
| "memory(GiB)": 67.71, |
| "step": 1560, |
| "train_speed(iter/s)": 0.03995 |
| }, |
| { |
| "acc": 0.76771908, |
| "epoch": 1.076341127922971, |
| "grad_norm": 1.157706379890442, |
| "learning_rate": 9.272221190261863e-05, |
| "loss": 0.83946896, |
| "memory(GiB)": 67.71, |
| "step": 1565, |
| "train_speed(iter/s)": 0.040014 |
| }, |
| { |
| "acc": 0.75533552, |
| "epoch": 1.0797799174690508, |
| "grad_norm": 1.1175612211227417, |
| "learning_rate": 9.266301859872657e-05, |
| "loss": 0.86314983, |
| "memory(GiB)": 67.71, |
| "step": 1570, |
| "train_speed(iter/s)": 0.040069 |
| }, |
| { |
| "acc": 0.74409065, |
| "epoch": 1.0832187070151307, |
| "grad_norm": 1.0851186513900757, |
| "learning_rate": 9.260360460257653e-05, |
| "loss": 0.91000662, |
| "memory(GiB)": 67.71, |
| "step": 1575, |
| "train_speed(iter/s)": 0.040117 |
| }, |
| { |
| "acc": 0.76165962, |
| "epoch": 1.0866574965612104, |
| "grad_norm": 1.208783507347107, |
| "learning_rate": 9.254397022154828e-05, |
| "loss": 0.86310711, |
| "memory(GiB)": 67.71, |
| "step": 1580, |
| "train_speed(iter/s)": 0.040175 |
| }, |
| { |
| "acc": 0.75226078, |
| "epoch": 1.0900962861072903, |
| "grad_norm": 1.2301445007324219, |
| "learning_rate": 9.248411576416162e-05, |
| "loss": 0.87202549, |
| "memory(GiB)": 67.71, |
| "step": 1585, |
| "train_speed(iter/s)": 0.04023 |
| }, |
| { |
| "acc": 0.74834862, |
| "epoch": 1.09353507565337, |
| "grad_norm": 1.0916322469711304, |
| "learning_rate": 9.242404154007502e-05, |
| "loss": 0.89558239, |
| "memory(GiB)": 67.71, |
| "step": 1590, |
| "train_speed(iter/s)": 0.040284 |
| }, |
| { |
| "acc": 0.75830355, |
| "epoch": 1.0969738651994498, |
| "grad_norm": 1.077378749847412, |
| "learning_rate": 9.236374786008389e-05, |
| "loss": 0.85708294, |
| "memory(GiB)": 67.71, |
| "step": 1595, |
| "train_speed(iter/s)": 0.04034 |
| }, |
| { |
| "acc": 0.76014338, |
| "epoch": 1.1004126547455295, |
| "grad_norm": 1.2474371194839478, |
| "learning_rate": 9.230323503611897e-05, |
| "loss": 0.85164671, |
| "memory(GiB)": 67.71, |
| "step": 1600, |
| "train_speed(iter/s)": 0.040404 |
| }, |
| { |
| "epoch": 1.1004126547455295, |
| "eval_acc": 0.7467494488684933, |
| "eval_loss": 0.9101867079734802, |
| "eval_runtime": 1124.3275, |
| "eval_samples_per_second": 3.809, |
| "eval_steps_per_second": 0.068, |
| "step": 1600 |
| }, |
| { |
| "acc": 0.75647812, |
| "epoch": 1.1038514442916094, |
| "grad_norm": 1.179998517036438, |
| "learning_rate": 9.224250338124481e-05, |
| "loss": 0.86214447, |
| "memory(GiB)": 67.71, |
| "step": 1605, |
| "train_speed(iter/s)": 0.039344 |
| }, |
| { |
| "acc": 0.76110773, |
| "epoch": 1.107290233837689, |
| "grad_norm": 1.1358368396759033, |
| "learning_rate": 9.2181553209658e-05, |
| "loss": 0.82860346, |
| "memory(GiB)": 67.71, |
| "step": 1610, |
| "train_speed(iter/s)": 0.039399 |
| }, |
| { |
| "acc": 0.7529737, |
| "epoch": 1.110729023383769, |
| "grad_norm": 1.1643849611282349, |
| "learning_rate": 9.212038483668572e-05, |
| "loss": 0.88976746, |
| "memory(GiB)": 67.71, |
| "step": 1615, |
| "train_speed(iter/s)": 0.039457 |
| }, |
| { |
| "acc": 0.75971909, |
| "epoch": 1.1141678129298487, |
| "grad_norm": 1.1216496229171753, |
| "learning_rate": 9.205899857878396e-05, |
| "loss": 0.85760059, |
| "memory(GiB)": 67.71, |
| "step": 1620, |
| "train_speed(iter/s)": 0.039517 |
| }, |
| { |
| "acc": 0.76570654, |
| "epoch": 1.1176066024759286, |
| "grad_norm": 1.2371224164962769, |
| "learning_rate": 9.199739475353596e-05, |
| "loss": 0.82434063, |
| "memory(GiB)": 67.71, |
| "step": 1625, |
| "train_speed(iter/s)": 0.039574 |
| }, |
| { |
| "acc": 0.75779676, |
| "epoch": 1.1210453920220083, |
| "grad_norm": 1.1810933351516724, |
| "learning_rate": 9.193557367965056e-05, |
| "loss": 0.85758648, |
| "memory(GiB)": 67.71, |
| "step": 1630, |
| "train_speed(iter/s)": 0.03963 |
| }, |
| { |
| "acc": 0.76221857, |
| "epoch": 1.124484181568088, |
| "grad_norm": 1.1857250928878784, |
| "learning_rate": 9.187353567696055e-05, |
| "loss": 0.84511681, |
| "memory(GiB)": 67.71, |
| "step": 1635, |
| "train_speed(iter/s)": 0.039679 |
| }, |
| { |
| "acc": 0.74748664, |
| "epoch": 1.1279229711141678, |
| "grad_norm": 1.026563286781311, |
| "learning_rate": 9.181128106642096e-05, |
| "loss": 0.9065136, |
| "memory(GiB)": 67.71, |
| "step": 1640, |
| "train_speed(iter/s)": 0.039732 |
| }, |
| { |
| "acc": 0.75156937, |
| "epoch": 1.1313617606602475, |
| "grad_norm": 1.0305781364440918, |
| "learning_rate": 9.174881017010746e-05, |
| "loss": 0.86748962, |
| "memory(GiB)": 67.71, |
| "step": 1645, |
| "train_speed(iter/s)": 0.039783 |
| }, |
| { |
| "acc": 0.75970831, |
| "epoch": 1.1348005502063274, |
| "grad_norm": 1.2061082124710083, |
| "learning_rate": 9.168612331121477e-05, |
| "loss": 0.84413948, |
| "memory(GiB)": 67.71, |
| "step": 1650, |
| "train_speed(iter/s)": 0.039833 |
| }, |
| { |
| "acc": 0.75250425, |
| "epoch": 1.138239339752407, |
| "grad_norm": 1.2730051279067993, |
| "learning_rate": 9.162322081405473e-05, |
| "loss": 0.86202583, |
| "memory(GiB)": 67.71, |
| "step": 1655, |
| "train_speed(iter/s)": 0.039887 |
| }, |
| { |
| "acc": 0.7535017, |
| "epoch": 1.141678129298487, |
| "grad_norm": 1.0208563804626465, |
| "learning_rate": 9.156010300405495e-05, |
| "loss": 0.86017208, |
| "memory(GiB)": 67.71, |
| "step": 1660, |
| "train_speed(iter/s)": 0.03994 |
| }, |
| { |
| "acc": 0.7593123, |
| "epoch": 1.1451169188445667, |
| "grad_norm": 1.2210179567337036, |
| "learning_rate": 9.149677020775686e-05, |
| "loss": 0.8386488, |
| "memory(GiB)": 67.71, |
| "step": 1665, |
| "train_speed(iter/s)": 0.039998 |
| }, |
| { |
| "acc": 0.76598496, |
| "epoch": 1.1485557083906466, |
| "grad_norm": 1.1266486644744873, |
| "learning_rate": 9.143322275281419e-05, |
| "loss": 0.84045124, |
| "memory(GiB)": 67.71, |
| "step": 1670, |
| "train_speed(iter/s)": 0.040053 |
| }, |
| { |
| "acc": 0.7449192, |
| "epoch": 1.1519944979367263, |
| "grad_norm": 1.2747905254364014, |
| "learning_rate": 9.136946096799117e-05, |
| "loss": 0.89558125, |
| "memory(GiB)": 67.71, |
| "step": 1675, |
| "train_speed(iter/s)": 0.040103 |
| }, |
| { |
| "acc": 0.77260947, |
| "epoch": 1.155433287482806, |
| "grad_norm": 1.1446512937545776, |
| "learning_rate": 9.13054851831609e-05, |
| "loss": 0.79779301, |
| "memory(GiB)": 67.71, |
| "step": 1680, |
| "train_speed(iter/s)": 0.040158 |
| }, |
| { |
| "acc": 0.73968267, |
| "epoch": 1.1588720770288858, |
| "grad_norm": 1.0520663261413574, |
| "learning_rate": 9.124129572930356e-05, |
| "loss": 0.91217728, |
| "memory(GiB)": 67.71, |
| "step": 1685, |
| "train_speed(iter/s)": 0.040209 |
| }, |
| { |
| "acc": 0.76177702, |
| "epoch": 1.1623108665749655, |
| "grad_norm": 1.0818169116973877, |
| "learning_rate": 9.117689293850484e-05, |
| "loss": 0.84482117, |
| "memory(GiB)": 67.71, |
| "step": 1690, |
| "train_speed(iter/s)": 0.040254 |
| }, |
| { |
| "acc": 0.75831223, |
| "epoch": 1.1657496561210454, |
| "grad_norm": 1.1914788484573364, |
| "learning_rate": 9.111227714395406e-05, |
| "loss": 0.85761623, |
| "memory(GiB)": 67.71, |
| "step": 1695, |
| "train_speed(iter/s)": 0.040302 |
| }, |
| { |
| "acc": 0.75756545, |
| "epoch": 1.169188445667125, |
| "grad_norm": 1.1335783004760742, |
| "learning_rate": 9.104744867994258e-05, |
| "loss": 0.85422668, |
| "memory(GiB)": 67.71, |
| "step": 1700, |
| "train_speed(iter/s)": 0.040347 |
| }, |
| { |
| "epoch": 1.169188445667125, |
| "eval_acc": 0.7492295406487605, |
| "eval_loss": 0.9045791625976562, |
| "eval_runtime": 1125.5138, |
| "eval_samples_per_second": 3.805, |
| "eval_steps_per_second": 0.068, |
| "step": 1700 |
| }, |
| { |
| "acc": 0.75732212, |
| "epoch": 1.172627235213205, |
| "grad_norm": 0.9720064997673035, |
| "learning_rate": 9.098240788186192e-05, |
| "loss": 0.85368481, |
| "memory(GiB)": 67.71, |
| "step": 1705, |
| "train_speed(iter/s)": 0.03935 |
| }, |
| { |
| "acc": 0.76147232, |
| "epoch": 1.1760660247592847, |
| "grad_norm": 1.2705514430999756, |
| "learning_rate": 9.091715508620222e-05, |
| "loss": 0.85527439, |
| "memory(GiB)": 67.71, |
| "step": 1710, |
| "train_speed(iter/s)": 0.039404 |
| }, |
| { |
| "acc": 0.74866266, |
| "epoch": 1.1795048143053646, |
| "grad_norm": 1.1010618209838867, |
| "learning_rate": 9.085169063055032e-05, |
| "loss": 0.8962719, |
| "memory(GiB)": 67.71, |
| "step": 1715, |
| "train_speed(iter/s)": 0.039457 |
| }, |
| { |
| "acc": 0.76777854, |
| "epoch": 1.1829436038514443, |
| "grad_norm": 1.0222831964492798, |
| "learning_rate": 9.078601485358813e-05, |
| "loss": 0.81568956, |
| "memory(GiB)": 67.71, |
| "step": 1720, |
| "train_speed(iter/s)": 0.03951 |
| }, |
| { |
| "acc": 0.74900856, |
| "epoch": 1.1863823933975242, |
| "grad_norm": 1.1607588529586792, |
| "learning_rate": 9.072012809509081e-05, |
| "loss": 0.88696823, |
| "memory(GiB)": 67.71, |
| "step": 1725, |
| "train_speed(iter/s)": 0.039555 |
| }, |
| { |
| "acc": 0.75043535, |
| "epoch": 1.1898211829436038, |
| "grad_norm": 1.1782574653625488, |
| "learning_rate": 9.065403069592505e-05, |
| "loss": 0.86962795, |
| "memory(GiB)": 67.71, |
| "step": 1730, |
| "train_speed(iter/s)": 0.039599 |
| }, |
| { |
| "acc": 0.74629622, |
| "epoch": 1.1932599724896837, |
| "grad_norm": 1.1644479036331177, |
| "learning_rate": 9.058772299804731e-05, |
| "loss": 0.88353643, |
| "memory(GiB)": 67.71, |
| "step": 1735, |
| "train_speed(iter/s)": 0.039653 |
| }, |
| { |
| "acc": 0.76667023, |
| "epoch": 1.1966987620357634, |
| "grad_norm": 1.176121711730957, |
| "learning_rate": 9.052120534450196e-05, |
| "loss": 0.82560787, |
| "memory(GiB)": 67.71, |
| "step": 1740, |
| "train_speed(iter/s)": 0.039705 |
| }, |
| { |
| "acc": 0.7706706, |
| "epoch": 1.200137551581843, |
| "grad_norm": 1.2071737051010132, |
| "learning_rate": 9.045447807941972e-05, |
| "loss": 0.82129135, |
| "memory(GiB)": 67.71, |
| "step": 1745, |
| "train_speed(iter/s)": 0.039758 |
| }, |
| { |
| "acc": 0.76220055, |
| "epoch": 1.203576341127923, |
| "grad_norm": 1.161576509475708, |
| "learning_rate": 9.038754154801559e-05, |
| "loss": 0.84442816, |
| "memory(GiB)": 67.71, |
| "step": 1750, |
| "train_speed(iter/s)": 0.039809 |
| }, |
| { |
| "acc": 0.76516528, |
| "epoch": 1.2070151306740027, |
| "grad_norm": 1.0194506645202637, |
| "learning_rate": 9.032039609658732e-05, |
| "loss": 0.82462807, |
| "memory(GiB)": 67.71, |
| "step": 1755, |
| "train_speed(iter/s)": 0.03985 |
| }, |
| { |
| "acc": 0.76512585, |
| "epoch": 1.2104539202200826, |
| "grad_norm": 1.123105764389038, |
| "learning_rate": 9.025304207251346e-05, |
| "loss": 0.84622154, |
| "memory(GiB)": 67.71, |
| "step": 1760, |
| "train_speed(iter/s)": 0.039905 |
| }, |
| { |
| "acc": 0.75925913, |
| "epoch": 1.2138927097661623, |
| "grad_norm": 1.0418940782546997, |
| "learning_rate": 9.018547982425164e-05, |
| "loss": 0.84370403, |
| "memory(GiB)": 67.71, |
| "step": 1765, |
| "train_speed(iter/s)": 0.03995 |
| }, |
| { |
| "acc": 0.76256437, |
| "epoch": 1.2173314993122422, |
| "grad_norm": 1.133818507194519, |
| "learning_rate": 9.011770970133671e-05, |
| "loss": 0.84478779, |
| "memory(GiB)": 67.71, |
| "step": 1770, |
| "train_speed(iter/s)": 0.040002 |
| }, |
| { |
| "acc": 0.75265675, |
| "epoch": 1.2207702888583218, |
| "grad_norm": 1.3675616979599, |
| "learning_rate": 9.0049732054379e-05, |
| "loss": 0.86621552, |
| "memory(GiB)": 67.71, |
| "step": 1775, |
| "train_speed(iter/s)": 0.040062 |
| }, |
| { |
| "acc": 0.75733051, |
| "epoch": 1.2242090784044017, |
| "grad_norm": 1.2875425815582275, |
| "learning_rate": 8.998154723506249e-05, |
| "loss": 0.88228512, |
| "memory(GiB)": 67.71, |
| "step": 1780, |
| "train_speed(iter/s)": 0.040108 |
| }, |
| { |
| "acc": 0.74635658, |
| "epoch": 1.2276478679504814, |
| "grad_norm": 1.2586891651153564, |
| "learning_rate": 8.991315559614288e-05, |
| "loss": 0.90037432, |
| "memory(GiB)": 67.71, |
| "step": 1785, |
| "train_speed(iter/s)": 0.040152 |
| }, |
| { |
| "acc": 0.7586679, |
| "epoch": 1.231086657496561, |
| "grad_norm": 1.1891663074493408, |
| "learning_rate": 8.984455749144597e-05, |
| "loss": 0.84769564, |
| "memory(GiB)": 67.71, |
| "step": 1790, |
| "train_speed(iter/s)": 0.0402 |
| }, |
| { |
| "acc": 0.74606829, |
| "epoch": 1.234525447042641, |
| "grad_norm": 1.154038667678833, |
| "learning_rate": 8.977575327586563e-05, |
| "loss": 0.88660145, |
| "memory(GiB)": 67.71, |
| "step": 1795, |
| "train_speed(iter/s)": 0.040243 |
| }, |
| { |
| "acc": 0.77012577, |
| "epoch": 1.2379642365887207, |
| "grad_norm": 1.2006701231002808, |
| "learning_rate": 8.97067433053621e-05, |
| "loss": 0.8128231, |
| "memory(GiB)": 67.71, |
| "step": 1800, |
| "train_speed(iter/s)": 0.040293 |
| }, |
| { |
| "epoch": 1.2379642365887207, |
| "eval_acc": 0.7502980609169029, |
| "eval_loss": 0.9007091522216797, |
| "eval_runtime": 1174.5762, |
| "eval_samples_per_second": 3.646, |
| "eval_steps_per_second": 0.066, |
| "step": 1800 |
| }, |
| { |
| "acc": 0.74588566, |
| "epoch": 1.2414030261348006, |
| "grad_norm": 1.2025572061538696, |
| "learning_rate": 8.963752793696004e-05, |
| "loss": 0.89730377, |
| "memory(GiB)": 67.71, |
| "step": 1805, |
| "train_speed(iter/s)": 0.039311 |
| }, |
| { |
| "acc": 0.76906261, |
| "epoch": 1.2448418156808803, |
| "grad_norm": 1.0686986446380615, |
| "learning_rate": 8.956810752874682e-05, |
| "loss": 0.81423302, |
| "memory(GiB)": 67.71, |
| "step": 1810, |
| "train_speed(iter/s)": 0.039359 |
| }, |
| { |
| "acc": 0.77615113, |
| "epoch": 1.2482806052269602, |
| "grad_norm": 1.2386928796768188, |
| "learning_rate": 8.949848243987054e-05, |
| "loss": 0.79887466, |
| "memory(GiB)": 67.71, |
| "step": 1815, |
| "train_speed(iter/s)": 0.039407 |
| }, |
| { |
| "acc": 0.75191274, |
| "epoch": 1.2517193947730398, |
| "grad_norm": 1.180568814277649, |
| "learning_rate": 8.94286530305382e-05, |
| "loss": 0.85600204, |
| "memory(GiB)": 67.71, |
| "step": 1820, |
| "train_speed(iter/s)": 0.039452 |
| }, |
| { |
| "acc": 0.76613312, |
| "epoch": 1.2551581843191197, |
| "grad_norm": 1.1538622379302979, |
| "learning_rate": 8.935861966201393e-05, |
| "loss": 0.82688131, |
| "memory(GiB)": 67.71, |
| "step": 1825, |
| "train_speed(iter/s)": 0.039499 |
| }, |
| { |
| "acc": 0.77081518, |
| "epoch": 1.2585969738651994, |
| "grad_norm": 1.0973575115203857, |
| "learning_rate": 8.928838269661694e-05, |
| "loss": 0.80709963, |
| "memory(GiB)": 67.71, |
| "step": 1830, |
| "train_speed(iter/s)": 0.039543 |
| }, |
| { |
| "acc": 0.74893703, |
| "epoch": 1.262035763411279, |
| "grad_norm": 1.1516822576522827, |
| "learning_rate": 8.921794249771987e-05, |
| "loss": 0.87887421, |
| "memory(GiB)": 67.71, |
| "step": 1835, |
| "train_speed(iter/s)": 0.039584 |
| }, |
| { |
| "acc": 0.74806905, |
| "epoch": 1.265474552957359, |
| "grad_norm": 1.1790939569473267, |
| "learning_rate": 8.914729942974674e-05, |
| "loss": 0.88099899, |
| "memory(GiB)": 67.71, |
| "step": 1840, |
| "train_speed(iter/s)": 0.03963 |
| }, |
| { |
| "acc": 0.77447009, |
| "epoch": 1.268913342503439, |
| "grad_norm": 1.009238600730896, |
| "learning_rate": 8.907645385817104e-05, |
| "loss": 0.7905911, |
| "memory(GiB)": 67.71, |
| "step": 1845, |
| "train_speed(iter/s)": 0.039686 |
| }, |
| { |
| "acc": 0.75110741, |
| "epoch": 1.2723521320495186, |
| "grad_norm": 1.2757585048675537, |
| "learning_rate": 8.900540614951409e-05, |
| "loss": 0.87034512, |
| "memory(GiB)": 67.71, |
| "step": 1850, |
| "train_speed(iter/s)": 0.03973 |
| }, |
| { |
| "acc": 0.74727058, |
| "epoch": 1.2757909215955983, |
| "grad_norm": 1.0743454694747925, |
| "learning_rate": 8.893415667134281e-05, |
| "loss": 0.88521938, |
| "memory(GiB)": 67.71, |
| "step": 1855, |
| "train_speed(iter/s)": 0.039772 |
| }, |
| { |
| "acc": 0.76257467, |
| "epoch": 1.2792297111416782, |
| "grad_norm": 1.0623903274536133, |
| "learning_rate": 8.886270579226807e-05, |
| "loss": 0.84139423, |
| "memory(GiB)": 67.71, |
| "step": 1860, |
| "train_speed(iter/s)": 0.03982 |
| }, |
| { |
| "acc": 0.76310492, |
| "epoch": 1.2826685006877578, |
| "grad_norm": 1.0730196237564087, |
| "learning_rate": 8.879105388194267e-05, |
| "loss": 0.84801579, |
| "memory(GiB)": 67.71, |
| "step": 1865, |
| "train_speed(iter/s)": 0.039868 |
| }, |
| { |
| "acc": 0.76296768, |
| "epoch": 1.2861072902338377, |
| "grad_norm": 1.0681921243667603, |
| "learning_rate": 8.871920131105943e-05, |
| "loss": 0.82966671, |
| "memory(GiB)": 67.71, |
| "step": 1870, |
| "train_speed(iter/s)": 0.039919 |
| }, |
| { |
| "acc": 0.7662539, |
| "epoch": 1.2895460797799174, |
| "grad_norm": 1.1676512956619263, |
| "learning_rate": 8.864714845134931e-05, |
| "loss": 0.82158031, |
| "memory(GiB)": 67.71, |
| "step": 1875, |
| "train_speed(iter/s)": 0.039968 |
| }, |
| { |
| "acc": 0.76386523, |
| "epoch": 1.2929848693259973, |
| "grad_norm": 1.2241677045822144, |
| "learning_rate": 8.857489567557949e-05, |
| "loss": 0.8327158, |
| "memory(GiB)": 67.71, |
| "step": 1880, |
| "train_speed(iter/s)": 0.040021 |
| }, |
| { |
| "acc": 0.77355728, |
| "epoch": 1.296423658872077, |
| "grad_norm": 1.0751720666885376, |
| "learning_rate": 8.850244335755136e-05, |
| "loss": 0.803335, |
| "memory(GiB)": 67.71, |
| "step": 1885, |
| "train_speed(iter/s)": 0.040073 |
| }, |
| { |
| "acc": 0.76320724, |
| "epoch": 1.299862448418157, |
| "grad_norm": 1.292360544204712, |
| "learning_rate": 8.84297918720987e-05, |
| "loss": 0.85545721, |
| "memory(GiB)": 67.71, |
| "step": 1890, |
| "train_speed(iter/s)": 0.040122 |
| }, |
| { |
| "acc": 0.76533775, |
| "epoch": 1.3033012379642366, |
| "grad_norm": 1.27505624294281, |
| "learning_rate": 8.835694159508568e-05, |
| "loss": 0.83456764, |
| "memory(GiB)": 67.71, |
| "step": 1895, |
| "train_speed(iter/s)": 0.040172 |
| }, |
| { |
| "acc": 0.77199011, |
| "epoch": 1.3067400275103163, |
| "grad_norm": 1.0612465143203735, |
| "learning_rate": 8.82838929034049e-05, |
| "loss": 0.81219292, |
| "memory(GiB)": 67.71, |
| "step": 1900, |
| "train_speed(iter/s)": 0.040222 |
| }, |
| { |
| "epoch": 1.3067400275103163, |
| "eval_acc": 0.7521145453727449, |
| "eval_loss": 0.8923233151435852, |
| "eval_runtime": 1131.335, |
| "eval_samples_per_second": 3.786, |
| "eval_steps_per_second": 0.068, |
| "step": 1900 |
| }, |
| { |
| "acc": 0.75053563, |
| "epoch": 1.3101788170563962, |
| "grad_norm": 1.2158348560333252, |
| "learning_rate": 8.821064617497549e-05, |
| "loss": 0.87764034, |
| "memory(GiB)": 67.71, |
| "step": 1905, |
| "train_speed(iter/s)": 0.039328 |
| }, |
| { |
| "acc": 0.76767535, |
| "epoch": 1.313617606602476, |
| "grad_norm": 1.0964173078536987, |
| "learning_rate": 8.81372017887411e-05, |
| "loss": 0.83279819, |
| "memory(GiB)": 67.71, |
| "step": 1910, |
| "train_speed(iter/s)": 0.039378 |
| }, |
| { |
| "acc": 0.75541239, |
| "epoch": 1.3170563961485557, |
| "grad_norm": 1.2945960760116577, |
| "learning_rate": 8.806356012466799e-05, |
| "loss": 0.8567975, |
| "memory(GiB)": 67.71, |
| "step": 1915, |
| "train_speed(iter/s)": 0.03942 |
| }, |
| { |
| "acc": 0.75376849, |
| "epoch": 1.3204951856946354, |
| "grad_norm": 1.2059944868087769, |
| "learning_rate": 8.798972156374303e-05, |
| "loss": 0.86053438, |
| "memory(GiB)": 67.71, |
| "step": 1920, |
| "train_speed(iter/s)": 0.039461 |
| }, |
| { |
| "acc": 0.75244598, |
| "epoch": 1.3239339752407153, |
| "grad_norm": 1.2470142841339111, |
| "learning_rate": 8.791568648797175e-05, |
| "loss": 0.84860821, |
| "memory(GiB)": 67.71, |
| "step": 1925, |
| "train_speed(iter/s)": 0.039509 |
| }, |
| { |
| "acc": 0.76857953, |
| "epoch": 1.327372764786795, |
| "grad_norm": 1.074821949005127, |
| "learning_rate": 8.784145528037633e-05, |
| "loss": 0.81543255, |
| "memory(GiB)": 67.71, |
| "step": 1930, |
| "train_speed(iter/s)": 0.039548 |
| }, |
| { |
| "acc": 0.75690975, |
| "epoch": 1.330811554332875, |
| "grad_norm": 1.2594019174575806, |
| "learning_rate": 8.776702832499369e-05, |
| "loss": 0.85649605, |
| "memory(GiB)": 67.71, |
| "step": 1935, |
| "train_speed(iter/s)": 0.039588 |
| }, |
| { |
| "acc": 0.75640688, |
| "epoch": 1.3342503438789546, |
| "grad_norm": 1.2854877710342407, |
| "learning_rate": 8.769240600687341e-05, |
| "loss": 0.85886908, |
| "memory(GiB)": 67.71, |
| "step": 1940, |
| "train_speed(iter/s)": 0.03963 |
| }, |
| { |
| "acc": 0.76101456, |
| "epoch": 1.3376891334250343, |
| "grad_norm": 1.2323275804519653, |
| "learning_rate": 8.761758871207578e-05, |
| "loss": 0.85549269, |
| "memory(GiB)": 67.71, |
| "step": 1945, |
| "train_speed(iter/s)": 0.039677 |
| }, |
| { |
| "acc": 0.75675645, |
| "epoch": 1.3411279229711142, |
| "grad_norm": 1.3912837505340576, |
| "learning_rate": 8.754257682766987e-05, |
| "loss": 0.86173325, |
| "memory(GiB)": 67.71, |
| "step": 1950, |
| "train_speed(iter/s)": 0.039723 |
| }, |
| { |
| "acc": 0.75836124, |
| "epoch": 1.344566712517194, |
| "grad_norm": 1.325785517692566, |
| "learning_rate": 8.746737074173139e-05, |
| "loss": 0.85381556, |
| "memory(GiB)": 67.71, |
| "step": 1955, |
| "train_speed(iter/s)": 0.039767 |
| }, |
| { |
| "acc": 0.75378246, |
| "epoch": 1.3480055020632737, |
| "grad_norm": 1.3383103609085083, |
| "learning_rate": 8.739197084334078e-05, |
| "loss": 0.85643635, |
| "memory(GiB)": 67.71, |
| "step": 1960, |
| "train_speed(iter/s)": 0.039815 |
| }, |
| { |
| "acc": 0.74990363, |
| "epoch": 1.3514442916093534, |
| "grad_norm": 1.0907026529312134, |
| "learning_rate": 8.731637752258122e-05, |
| "loss": 0.8558506, |
| "memory(GiB)": 67.71, |
| "step": 1965, |
| "train_speed(iter/s)": 0.039861 |
| }, |
| { |
| "acc": 0.75551319, |
| "epoch": 1.3548830811554333, |
| "grad_norm": 1.1416265964508057, |
| "learning_rate": 8.724059117053647e-05, |
| "loss": 0.86469622, |
| "memory(GiB)": 67.71, |
| "step": 1970, |
| "train_speed(iter/s)": 0.039902 |
| }, |
| { |
| "acc": 0.7511488, |
| "epoch": 1.358321870701513, |
| "grad_norm": 1.1393564939498901, |
| "learning_rate": 8.716461217928903e-05, |
| "loss": 0.85416451, |
| "memory(GiB)": 67.71, |
| "step": 1975, |
| "train_speed(iter/s)": 0.039948 |
| }, |
| { |
| "acc": 0.76492167, |
| "epoch": 1.361760660247593, |
| "grad_norm": 1.0601388216018677, |
| "learning_rate": 8.708844094191798e-05, |
| "loss": 0.82022047, |
| "memory(GiB)": 67.71, |
| "step": 1980, |
| "train_speed(iter/s)": 0.039988 |
| }, |
| { |
| "acc": 0.75548849, |
| "epoch": 1.3651994497936726, |
| "grad_norm": 1.1647326946258545, |
| "learning_rate": 8.701207785249703e-05, |
| "loss": 0.8785594, |
| "memory(GiB)": 67.71, |
| "step": 1985, |
| "train_speed(iter/s)": 0.040031 |
| }, |
| { |
| "acc": 0.76257005, |
| "epoch": 1.3686382393397525, |
| "grad_norm": 1.208771824836731, |
| "learning_rate": 8.693552330609235e-05, |
| "loss": 0.82169209, |
| "memory(GiB)": 67.71, |
| "step": 1990, |
| "train_speed(iter/s)": 0.040075 |
| }, |
| { |
| "acc": 0.7662899, |
| "epoch": 1.3720770288858322, |
| "grad_norm": 1.0375357866287231, |
| "learning_rate": 8.685877769876074e-05, |
| "loss": 0.82175579, |
| "memory(GiB)": 67.71, |
| "step": 1995, |
| "train_speed(iter/s)": 0.040117 |
| }, |
| { |
| "acc": 0.75507236, |
| "epoch": 1.375515818431912, |
| "grad_norm": 1.070656180381775, |
| "learning_rate": 8.678184142754736e-05, |
| "loss": 0.84867239, |
| "memory(GiB)": 67.71, |
| "step": 2000, |
| "train_speed(iter/s)": 0.040168 |
| }, |
| { |
| "epoch": 1.375515818431912, |
| "eval_acc": 0.7514059477212399, |
| "eval_loss": 0.890434980392456, |
| "eval_runtime": 1140.0174, |
| "eval_samples_per_second": 3.757, |
| "eval_steps_per_second": 0.068, |
| "step": 2000 |
| }, |
| { |
| "acc": 0.75042534, |
| "epoch": 1.3789546079779917, |
| "grad_norm": 1.181110143661499, |
| "learning_rate": 8.670471489048382e-05, |
| "loss": 0.90365086, |
| "memory(GiB)": 67.71, |
| "step": 2005, |
| "train_speed(iter/s)": 0.039309 |
| }, |
| { |
| "acc": 0.75193415, |
| "epoch": 1.3823933975240714, |
| "grad_norm": 1.0542738437652588, |
| "learning_rate": 8.662739848658605e-05, |
| "loss": 0.86276369, |
| "memory(GiB)": 67.71, |
| "step": 2010, |
| "train_speed(iter/s)": 0.039351 |
| }, |
| { |
| "acc": 0.76266041, |
| "epoch": 1.3858321870701513, |
| "grad_norm": 1.1464662551879883, |
| "learning_rate": 8.654989261585231e-05, |
| "loss": 0.83303232, |
| "memory(GiB)": 67.71, |
| "step": 2015, |
| "train_speed(iter/s)": 0.039396 |
| }, |
| { |
| "acc": 0.7555974, |
| "epoch": 1.3892709766162312, |
| "grad_norm": 1.137511134147644, |
| "learning_rate": 8.6472197679261e-05, |
| "loss": 0.87258329, |
| "memory(GiB)": 67.71, |
| "step": 2020, |
| "train_speed(iter/s)": 0.039441 |
| }, |
| { |
| "acc": 0.75746002, |
| "epoch": 1.392709766162311, |
| "grad_norm": 1.1067372560501099, |
| "learning_rate": 8.639431407876873e-05, |
| "loss": 0.8575942, |
| "memory(GiB)": 67.71, |
| "step": 2025, |
| "train_speed(iter/s)": 0.039486 |
| }, |
| { |
| "acc": 0.74920359, |
| "epoch": 1.3961485557083906, |
| "grad_norm": 1.1339222192764282, |
| "learning_rate": 8.631624221730809e-05, |
| "loss": 0.89333057, |
| "memory(GiB)": 67.71, |
| "step": 2030, |
| "train_speed(iter/s)": 0.039533 |
| }, |
| { |
| "acc": 0.75785513, |
| "epoch": 1.3995873452544705, |
| "grad_norm": 1.193408489227295, |
| "learning_rate": 8.623798249878573e-05, |
| "loss": 0.85004654, |
| "memory(GiB)": 67.71, |
| "step": 2035, |
| "train_speed(iter/s)": 0.039579 |
| }, |
| { |
| "acc": 0.76322355, |
| "epoch": 1.4030261348005502, |
| "grad_norm": 1.4497336149215698, |
| "learning_rate": 8.615953532808008e-05, |
| "loss": 0.85098343, |
| "memory(GiB)": 67.71, |
| "step": 2040, |
| "train_speed(iter/s)": 0.039624 |
| }, |
| { |
| "acc": 0.75989523, |
| "epoch": 1.40646492434663, |
| "grad_norm": 1.424786925315857, |
| "learning_rate": 8.608090111103948e-05, |
| "loss": 0.86450672, |
| "memory(GiB)": 67.71, |
| "step": 2045, |
| "train_speed(iter/s)": 0.039665 |
| }, |
| { |
| "acc": 0.75607204, |
| "epoch": 1.4099037138927097, |
| "grad_norm": 1.1818575859069824, |
| "learning_rate": 8.600208025447983e-05, |
| "loss": 0.859338, |
| "memory(GiB)": 67.71, |
| "step": 2050, |
| "train_speed(iter/s)": 0.039707 |
| }, |
| { |
| "acc": 0.74912252, |
| "epoch": 1.4133425034387894, |
| "grad_norm": 1.0915964841842651, |
| "learning_rate": 8.592307316618272e-05, |
| "loss": 0.88583393, |
| "memory(GiB)": 67.71, |
| "step": 2055, |
| "train_speed(iter/s)": 0.039753 |
| }, |
| { |
| "acc": 0.7610446, |
| "epoch": 1.4167812929848693, |
| "grad_norm": 1.3371332883834839, |
| "learning_rate": 8.584388025489314e-05, |
| "loss": 0.83794365, |
| "memory(GiB)": 67.71, |
| "step": 2060, |
| "train_speed(iter/s)": 0.0398 |
| }, |
| { |
| "acc": 0.76503677, |
| "epoch": 1.4202200825309492, |
| "grad_norm": 1.0140537023544312, |
| "learning_rate": 8.57645019303175e-05, |
| "loss": 0.81193466, |
| "memory(GiB)": 67.71, |
| "step": 2065, |
| "train_speed(iter/s)": 0.039843 |
| }, |
| { |
| "acc": 0.75801926, |
| "epoch": 1.423658872077029, |
| "grad_norm": 1.2762821912765503, |
| "learning_rate": 8.568493860312142e-05, |
| "loss": 0.85571671, |
| "memory(GiB)": 67.71, |
| "step": 2070, |
| "train_speed(iter/s)": 0.039884 |
| }, |
| { |
| "acc": 0.75672455, |
| "epoch": 1.4270976616231086, |
| "grad_norm": 1.1539915800094604, |
| "learning_rate": 8.56051906849276e-05, |
| "loss": 0.86462698, |
| "memory(GiB)": 67.71, |
| "step": 2075, |
| "train_speed(iter/s)": 0.039921 |
| }, |
| { |
| "acc": 0.75165954, |
| "epoch": 1.4305364511691885, |
| "grad_norm": 1.306449055671692, |
| "learning_rate": 8.55252585883138e-05, |
| "loss": 0.88216114, |
| "memory(GiB)": 67.71, |
| "step": 2080, |
| "train_speed(iter/s)": 0.039965 |
| }, |
| { |
| "acc": 0.76647811, |
| "epoch": 1.4339752407152682, |
| "grad_norm": 1.1315670013427734, |
| "learning_rate": 8.544514272681056e-05, |
| "loss": 0.83219862, |
| "memory(GiB)": 67.71, |
| "step": 2085, |
| "train_speed(iter/s)": 0.040006 |
| }, |
| { |
| "acc": 0.75151563, |
| "epoch": 1.437414030261348, |
| "grad_norm": 1.2553141117095947, |
| "learning_rate": 8.536484351489918e-05, |
| "loss": 0.88327541, |
| "memory(GiB)": 67.71, |
| "step": 2090, |
| "train_speed(iter/s)": 0.040054 |
| }, |
| { |
| "acc": 0.75124393, |
| "epoch": 1.4408528198074277, |
| "grad_norm": 1.0970312356948853, |
| "learning_rate": 8.528436136800955e-05, |
| "loss": 0.86639719, |
| "memory(GiB)": 67.71, |
| "step": 2095, |
| "train_speed(iter/s)": 0.040094 |
| }, |
| { |
| "acc": 0.75917168, |
| "epoch": 1.4442916093535076, |
| "grad_norm": 1.1937634944915771, |
| "learning_rate": 8.520369670251787e-05, |
| "loss": 0.85560112, |
| "memory(GiB)": 67.71, |
| "step": 2100, |
| "train_speed(iter/s)": 0.040143 |
| }, |
| { |
| "epoch": 1.4442916093535076, |
| "eval_acc": 0.7529749853781437, |
| "eval_loss": 0.8845105767250061, |
| "eval_runtime": 1065.8538, |
| "eval_samples_per_second": 4.018, |
| "eval_steps_per_second": 0.072, |
| "step": 2100 |
| }, |
| { |
| "acc": 0.75502768, |
| "epoch": 1.4477303988995873, |
| "grad_norm": 1.2217875719070435, |
| "learning_rate": 8.512284993574473e-05, |
| "loss": 0.86460505, |
| "memory(GiB)": 67.71, |
| "step": 2105, |
| "train_speed(iter/s)": 0.039384 |
| }, |
| { |
| "acc": 0.7641192, |
| "epoch": 1.4511691884456672, |
| "grad_norm": 1.1658051013946533, |
| "learning_rate": 8.504182148595275e-05, |
| "loss": 0.82581739, |
| "memory(GiB)": 67.71, |
| "step": 2110, |
| "train_speed(iter/s)": 0.039424 |
| }, |
| { |
| "acc": 0.75860772, |
| "epoch": 1.454607977991747, |
| "grad_norm": 1.4592278003692627, |
| "learning_rate": 8.496061177234452e-05, |
| "loss": 0.84182692, |
| "memory(GiB)": 67.71, |
| "step": 2115, |
| "train_speed(iter/s)": 0.039466 |
| }, |
| { |
| "acc": 0.76232295, |
| "epoch": 1.4580467675378266, |
| "grad_norm": 1.2424806356430054, |
| "learning_rate": 8.487922121506039e-05, |
| "loss": 0.84641819, |
| "memory(GiB)": 67.71, |
| "step": 2120, |
| "train_speed(iter/s)": 0.039509 |
| }, |
| { |
| "acc": 0.75302744, |
| "epoch": 1.4614855570839065, |
| "grad_norm": 1.1985810995101929, |
| "learning_rate": 8.479765023517631e-05, |
| "loss": 0.87050896, |
| "memory(GiB)": 67.71, |
| "step": 2125, |
| "train_speed(iter/s)": 0.039552 |
| }, |
| { |
| "acc": 0.76178207, |
| "epoch": 1.4649243466299862, |
| "grad_norm": 1.108946442604065, |
| "learning_rate": 8.471589925470166e-05, |
| "loss": 0.82996387, |
| "memory(GiB)": 67.71, |
| "step": 2130, |
| "train_speed(iter/s)": 0.039594 |
| }, |
| { |
| "acc": 0.75262017, |
| "epoch": 1.468363136176066, |
| "grad_norm": 1.266554832458496, |
| "learning_rate": 8.463396869657704e-05, |
| "loss": 0.85832672, |
| "memory(GiB)": 67.71, |
| "step": 2135, |
| "train_speed(iter/s)": 0.039634 |
| }, |
| { |
| "acc": 0.76968784, |
| "epoch": 1.4718019257221457, |
| "grad_norm": 1.2651324272155762, |
| "learning_rate": 8.455185898467213e-05, |
| "loss": 0.80993366, |
| "memory(GiB)": 67.71, |
| "step": 2140, |
| "train_speed(iter/s)": 0.039677 |
| }, |
| { |
| "acc": 0.76287999, |
| "epoch": 1.4752407152682256, |
| "grad_norm": 1.4108299016952515, |
| "learning_rate": 8.446957054378344e-05, |
| "loss": 0.82752171, |
| "memory(GiB)": 67.71, |
| "step": 2145, |
| "train_speed(iter/s)": 0.039717 |
| }, |
| { |
| "acc": 0.77508984, |
| "epoch": 1.4786795048143053, |
| "grad_norm": 1.1667840480804443, |
| "learning_rate": 8.438710379963214e-05, |
| "loss": 0.78502192, |
| "memory(GiB)": 67.71, |
| "step": 2150, |
| "train_speed(iter/s)": 0.039761 |
| }, |
| { |
| "acc": 0.74883337, |
| "epoch": 1.4821182943603852, |
| "grad_norm": 1.1578980684280396, |
| "learning_rate": 8.430445917886186e-05, |
| "loss": 0.88730097, |
| "memory(GiB)": 67.71, |
| "step": 2155, |
| "train_speed(iter/s)": 0.039804 |
| }, |
| { |
| "acc": 0.77094564, |
| "epoch": 1.485557083906465, |
| "grad_norm": 1.039753794670105, |
| "learning_rate": 8.422163710903649e-05, |
| "loss": 0.80611877, |
| "memory(GiB)": 67.71, |
| "step": 2160, |
| "train_speed(iter/s)": 0.039843 |
| }, |
| { |
| "acc": 0.76888881, |
| "epoch": 1.4889958734525446, |
| "grad_norm": 1.5004595518112183, |
| "learning_rate": 8.413863801863794e-05, |
| "loss": 0.80163708, |
| "memory(GiB)": 67.71, |
| "step": 2165, |
| "train_speed(iter/s)": 0.039887 |
| }, |
| { |
| "acc": 0.76752806, |
| "epoch": 1.4924346629986245, |
| "grad_norm": 1.2288601398468018, |
| "learning_rate": 8.405546233706395e-05, |
| "loss": 0.82048512, |
| "memory(GiB)": 67.71, |
| "step": 2170, |
| "train_speed(iter/s)": 0.039936 |
| }, |
| { |
| "acc": 0.76018772, |
| "epoch": 1.4958734525447044, |
| "grad_norm": 1.1614660024642944, |
| "learning_rate": 8.397211049462586e-05, |
| "loss": 0.84854307, |
| "memory(GiB)": 67.71, |
| "step": 2175, |
| "train_speed(iter/s)": 0.039975 |
| }, |
| { |
| "acc": 0.77043438, |
| "epoch": 1.499312242090784, |
| "grad_norm": 1.3372976779937744, |
| "learning_rate": 8.388858292254637e-05, |
| "loss": 0.79604712, |
| "memory(GiB)": 67.71, |
| "step": 2180, |
| "train_speed(iter/s)": 0.040022 |
| }, |
| { |
| "acc": 0.76631165, |
| "epoch": 1.5027510316368637, |
| "grad_norm": 1.1987308263778687, |
| "learning_rate": 8.380488005295732e-05, |
| "loss": 0.83228321, |
| "memory(GiB)": 67.71, |
| "step": 2185, |
| "train_speed(iter/s)": 0.040065 |
| }, |
| { |
| "acc": 0.75986252, |
| "epoch": 1.5061898211829436, |
| "grad_norm": 1.0442498922348022, |
| "learning_rate": 8.37210023188975e-05, |
| "loss": 0.84417458, |
| "memory(GiB)": 67.71, |
| "step": 2190, |
| "train_speed(iter/s)": 0.040102 |
| }, |
| { |
| "acc": 0.75435362, |
| "epoch": 1.5096286107290235, |
| "grad_norm": 1.0899875164031982, |
| "learning_rate": 8.363695015431028e-05, |
| "loss": 0.8657095, |
| "memory(GiB)": 67.71, |
| "step": 2195, |
| "train_speed(iter/s)": 0.04015 |
| }, |
| { |
| "acc": 0.76132326, |
| "epoch": 1.5130674002751032, |
| "grad_norm": 1.076157569885254, |
| "learning_rate": 8.355272399404156e-05, |
| "loss": 0.83814745, |
| "memory(GiB)": 67.71, |
| "step": 2200, |
| "train_speed(iter/s)": 0.040192 |
| }, |
| { |
| "epoch": 1.5130674002751032, |
| "eval_acc": 0.7542065955819499, |
| "eval_loss": 0.877788245677948, |
| "eval_runtime": 1157.3904, |
| "eval_samples_per_second": 3.701, |
| "eval_steps_per_second": 0.067, |
| "step": 2200 |
| }, |
| { |
| "acc": 0.75972261, |
| "epoch": 1.516506189821183, |
| "grad_norm": 1.387868046760559, |
| "learning_rate": 8.346832427383732e-05, |
| "loss": 0.84049091, |
| "memory(GiB)": 67.71, |
| "step": 2205, |
| "train_speed(iter/s)": 0.039401 |
| }, |
| { |
| "acc": 0.74931083, |
| "epoch": 1.5199449793672626, |
| "grad_norm": 1.2687524557113647, |
| "learning_rate": 8.338375143034148e-05, |
| "loss": 0.87477436, |
| "memory(GiB)": 67.71, |
| "step": 2210, |
| "train_speed(iter/s)": 0.039447 |
| }, |
| { |
| "acc": 0.76771116, |
| "epoch": 1.5233837689133425, |
| "grad_norm": 1.1818050146102905, |
| "learning_rate": 8.329900590109365e-05, |
| "loss": 0.81554508, |
| "memory(GiB)": 67.71, |
| "step": 2215, |
| "train_speed(iter/s)": 0.039489 |
| }, |
| { |
| "acc": 0.76883683, |
| "epoch": 1.5268225584594224, |
| "grad_norm": 1.1263651847839355, |
| "learning_rate": 8.321408812452678e-05, |
| "loss": 0.82251701, |
| "memory(GiB)": 67.71, |
| "step": 2220, |
| "train_speed(iter/s)": 0.03953 |
| }, |
| { |
| "acc": 0.76298013, |
| "epoch": 1.530261348005502, |
| "grad_norm": 1.1538478136062622, |
| "learning_rate": 8.312899853996501e-05, |
| "loss": 0.81565828, |
| "memory(GiB)": 67.71, |
| "step": 2225, |
| "train_speed(iter/s)": 0.039564 |
| }, |
| { |
| "acc": 0.75918069, |
| "epoch": 1.5337001375515817, |
| "grad_norm": 1.2974464893341064, |
| "learning_rate": 8.304373758762128e-05, |
| "loss": 0.8574604, |
| "memory(GiB)": 67.71, |
| "step": 2230, |
| "train_speed(iter/s)": 0.039603 |
| }, |
| { |
| "acc": 0.76306868, |
| "epoch": 1.5371389270976616, |
| "grad_norm": 1.2755868434906006, |
| "learning_rate": 8.295830570859512e-05, |
| "loss": 0.83660641, |
| "memory(GiB)": 67.71, |
| "step": 2235, |
| "train_speed(iter/s)": 0.039636 |
| }, |
| { |
| "acc": 0.75555844, |
| "epoch": 1.5405777166437415, |
| "grad_norm": 1.196268081665039, |
| "learning_rate": 8.287270334487034e-05, |
| "loss": 0.84184723, |
| "memory(GiB)": 67.71, |
| "step": 2240, |
| "train_speed(iter/s)": 0.039674 |
| }, |
| { |
| "acc": 0.77357531, |
| "epoch": 1.5440165061898212, |
| "grad_norm": 1.2208247184753418, |
| "learning_rate": 8.278693093931282e-05, |
| "loss": 0.79285612, |
| "memory(GiB)": 67.71, |
| "step": 2245, |
| "train_speed(iter/s)": 0.039716 |
| }, |
| { |
| "acc": 0.75058088, |
| "epoch": 1.547455295735901, |
| "grad_norm": 1.1691052913665771, |
| "learning_rate": 8.270098893566807e-05, |
| "loss": 0.864328, |
| "memory(GiB)": 67.73, |
| "step": 2250, |
| "train_speed(iter/s)": 0.03975 |
| }, |
| { |
| "acc": 0.74813089, |
| "epoch": 1.5508940852819806, |
| "grad_norm": 1.300010323524475, |
| "learning_rate": 8.261487777855909e-05, |
| "loss": 0.89021215, |
| "memory(GiB)": 67.73, |
| "step": 2255, |
| "train_speed(iter/s)": 0.039788 |
| }, |
| { |
| "acc": 0.75823145, |
| "epoch": 1.5543328748280605, |
| "grad_norm": 1.080557107925415, |
| "learning_rate": 8.252859791348392e-05, |
| "loss": 0.86599722, |
| "memory(GiB)": 67.73, |
| "step": 2260, |
| "train_speed(iter/s)": 0.039824 |
| }, |
| { |
| "acc": 0.75691137, |
| "epoch": 1.5577716643741404, |
| "grad_norm": 1.098506212234497, |
| "learning_rate": 8.244214978681348e-05, |
| "loss": 0.87960701, |
| "memory(GiB)": 67.73, |
| "step": 2265, |
| "train_speed(iter/s)": 0.039862 |
| }, |
| { |
| "acc": 0.76327119, |
| "epoch": 1.56121045392022, |
| "grad_norm": 1.3256527185440063, |
| "learning_rate": 8.23555338457892e-05, |
| "loss": 0.8320919, |
| "memory(GiB)": 67.73, |
| "step": 2270, |
| "train_speed(iter/s)": 0.039901 |
| }, |
| { |
| "acc": 0.75916958, |
| "epoch": 1.5646492434662997, |
| "grad_norm": 1.2192107439041138, |
| "learning_rate": 8.226875053852066e-05, |
| "loss": 0.84912138, |
| "memory(GiB)": 67.73, |
| "step": 2275, |
| "train_speed(iter/s)": 0.039941 |
| }, |
| { |
| "acc": 0.75922327, |
| "epoch": 1.5680880330123796, |
| "grad_norm": 1.3527653217315674, |
| "learning_rate": 8.218180031398334e-05, |
| "loss": 0.84136915, |
| "memory(GiB)": 67.73, |
| "step": 2280, |
| "train_speed(iter/s)": 0.039986 |
| }, |
| { |
| "acc": 0.76518865, |
| "epoch": 1.5715268225584595, |
| "grad_norm": 1.1937755346298218, |
| "learning_rate": 8.209468362201627e-05, |
| "loss": 0.82890606, |
| "memory(GiB)": 67.73, |
| "step": 2285, |
| "train_speed(iter/s)": 0.040027 |
| }, |
| { |
| "acc": 0.76164193, |
| "epoch": 1.5749656121045392, |
| "grad_norm": 1.1419281959533691, |
| "learning_rate": 8.200740091331969e-05, |
| "loss": 0.8369875, |
| "memory(GiB)": 67.73, |
| "step": 2290, |
| "train_speed(iter/s)": 0.040071 |
| }, |
| { |
| "acc": 0.7657156, |
| "epoch": 1.578404401650619, |
| "grad_norm": 1.2638212442398071, |
| "learning_rate": 8.19199526394527e-05, |
| "loss": 0.82229643, |
| "memory(GiB)": 67.73, |
| "step": 2295, |
| "train_speed(iter/s)": 0.040112 |
| }, |
| { |
| "acc": 0.76849699, |
| "epoch": 1.5818431911966988, |
| "grad_norm": 1.1830896139144897, |
| "learning_rate": 8.183233925283104e-05, |
| "loss": 0.79942322, |
| "memory(GiB)": 67.73, |
| "step": 2300, |
| "train_speed(iter/s)": 0.040155 |
| }, |
| { |
| "epoch": 1.5818431911966988, |
| "eval_acc": 0.7557418904935439, |
| "eval_loss": 0.8737921714782715, |
| "eval_runtime": 1090.3389, |
| "eval_samples_per_second": 3.928, |
| "eval_steps_per_second": 0.071, |
| "step": 2300 |
| }, |
| { |
| "acc": 0.76840105, |
| "epoch": 1.5852819807427787, |
| "grad_norm": 1.1682363748550415, |
| "learning_rate": 8.17445612067246e-05, |
| "loss": 0.82419491, |
| "memory(GiB)": 67.73, |
| "step": 2305, |
| "train_speed(iter/s)": 0.039444 |
| }, |
| { |
| "acc": 0.76440401, |
| "epoch": 1.5887207702888584, |
| "grad_norm": 1.2088557481765747, |
| "learning_rate": 8.165661895525515e-05, |
| "loss": 0.8236021, |
| "memory(GiB)": 67.73, |
| "step": 2310, |
| "train_speed(iter/s)": 0.039487 |
| }, |
| { |
| "acc": 0.76112623, |
| "epoch": 1.592159559834938, |
| "grad_norm": 1.2835819721221924, |
| "learning_rate": 8.156851295339401e-05, |
| "loss": 0.84509296, |
| "memory(GiB)": 67.73, |
| "step": 2315, |
| "train_speed(iter/s)": 0.039527 |
| }, |
| { |
| "acc": 0.75933437, |
| "epoch": 1.5955983493810177, |
| "grad_norm": 1.1950072050094604, |
| "learning_rate": 8.148024365695961e-05, |
| "loss": 0.83572178, |
| "memory(GiB)": 67.73, |
| "step": 2320, |
| "train_speed(iter/s)": 0.039562 |
| }, |
| { |
| "acc": 0.74721594, |
| "epoch": 1.5990371389270976, |
| "grad_norm": 1.1385269165039062, |
| "learning_rate": 8.139181152261524e-05, |
| "loss": 0.87340145, |
| "memory(GiB)": 67.73, |
| "step": 2325, |
| "train_speed(iter/s)": 0.039597 |
| }, |
| { |
| "acc": 0.76489792, |
| "epoch": 1.6024759284731775, |
| "grad_norm": 1.3601405620574951, |
| "learning_rate": 8.130321700786662e-05, |
| "loss": 0.81867256, |
| "memory(GiB)": 67.73, |
| "step": 2330, |
| "train_speed(iter/s)": 0.039642 |
| }, |
| { |
| "acc": 0.7683671, |
| "epoch": 1.6059147180192572, |
| "grad_norm": 1.2533677816390991, |
| "learning_rate": 8.121446057105955e-05, |
| "loss": 0.81394958, |
| "memory(GiB)": 67.73, |
| "step": 2335, |
| "train_speed(iter/s)": 0.039682 |
| }, |
| { |
| "acc": 0.75637655, |
| "epoch": 1.609353507565337, |
| "grad_norm": 1.196452260017395, |
| "learning_rate": 8.112554267137753e-05, |
| "loss": 0.84699097, |
| "memory(GiB)": 67.73, |
| "step": 2340, |
| "train_speed(iter/s)": 0.03972 |
| }, |
| { |
| "acc": 0.77178955, |
| "epoch": 1.6127922971114168, |
| "grad_norm": 1.0918421745300293, |
| "learning_rate": 8.103646376883937e-05, |
| "loss": 0.79872456, |
| "memory(GiB)": 67.73, |
| "step": 2345, |
| "train_speed(iter/s)": 0.039764 |
| }, |
| { |
| "acc": 0.75204129, |
| "epoch": 1.6162310866574967, |
| "grad_norm": 1.2889692783355713, |
| "learning_rate": 8.094722432429691e-05, |
| "loss": 0.88343906, |
| "memory(GiB)": 67.73, |
| "step": 2350, |
| "train_speed(iter/s)": 0.039803 |
| }, |
| { |
| "acc": 0.75908298, |
| "epoch": 1.6196698762035764, |
| "grad_norm": 1.1028622388839722, |
| "learning_rate": 8.085782479943245e-05, |
| "loss": 0.8362504, |
| "memory(GiB)": 67.73, |
| "step": 2355, |
| "train_speed(iter/s)": 0.039844 |
| }, |
| { |
| "acc": 0.76125684, |
| "epoch": 1.623108665749656, |
| "grad_norm": 1.3756259679794312, |
| "learning_rate": 8.076826565675657e-05, |
| "loss": 0.8257452, |
| "memory(GiB)": 67.73, |
| "step": 2360, |
| "train_speed(iter/s)": 0.039885 |
| }, |
| { |
| "acc": 0.76714849, |
| "epoch": 1.6265474552957357, |
| "grad_norm": 1.5173252820968628, |
| "learning_rate": 8.067854735960555e-05, |
| "loss": 0.81308384, |
| "memory(GiB)": 67.73, |
| "step": 2365, |
| "train_speed(iter/s)": 0.039928 |
| }, |
| { |
| "acc": 0.7745882, |
| "epoch": 1.6299862448418156, |
| "grad_norm": 1.2738362550735474, |
| "learning_rate": 8.058867037213916e-05, |
| "loss": 0.79546738, |
| "memory(GiB)": 67.73, |
| "step": 2370, |
| "train_speed(iter/s)": 0.039964 |
| }, |
| { |
| "acc": 0.77209988, |
| "epoch": 1.6334250343878955, |
| "grad_norm": 1.1855344772338867, |
| "learning_rate": 8.049863515933802e-05, |
| "loss": 0.79778285, |
| "memory(GiB)": 67.73, |
| "step": 2375, |
| "train_speed(iter/s)": 0.039997 |
| }, |
| { |
| "acc": 0.76279697, |
| "epoch": 1.6368638239339752, |
| "grad_norm": 1.1562272310256958, |
| "learning_rate": 8.040844218700147e-05, |
| "loss": 0.82462883, |
| "memory(GiB)": 67.73, |
| "step": 2380, |
| "train_speed(iter/s)": 0.040033 |
| }, |
| { |
| "acc": 0.76684308, |
| "epoch": 1.640302613480055, |
| "grad_norm": 1.3373991250991821, |
| "learning_rate": 8.031809192174495e-05, |
| "loss": 0.81806488, |
| "memory(GiB)": 67.73, |
| "step": 2385, |
| "train_speed(iter/s)": 0.040073 |
| }, |
| { |
| "acc": 0.75144334, |
| "epoch": 1.6437414030261348, |
| "grad_norm": 1.3013478517532349, |
| "learning_rate": 8.022758483099767e-05, |
| "loss": 0.86880703, |
| "memory(GiB)": 67.73, |
| "step": 2390, |
| "train_speed(iter/s)": 0.040113 |
| }, |
| { |
| "acc": 0.7674602, |
| "epoch": 1.6471801925722147, |
| "grad_norm": 1.2739620208740234, |
| "learning_rate": 8.013692138300018e-05, |
| "loss": 0.82607212, |
| "memory(GiB)": 67.73, |
| "step": 2395, |
| "train_speed(iter/s)": 0.040148 |
| }, |
| { |
| "acc": 0.75973258, |
| "epoch": 1.6506189821182944, |
| "grad_norm": 1.313481330871582, |
| "learning_rate": 8.004610204680196e-05, |
| "loss": 0.83364353, |
| "memory(GiB)": 67.73, |
| "step": 2400, |
| "train_speed(iter/s)": 0.040184 |
| }, |
| { |
| "epoch": 1.6506189821182944, |
| "eval_acc": 0.7556687812120394, |
| "eval_loss": 0.8708279728889465, |
| "eval_runtime": 1138.5113, |
| "eval_samples_per_second": 3.762, |
| "eval_steps_per_second": 0.068, |
| "step": 2400 |
| }, |
| { |
| "acc": 0.76298213, |
| "epoch": 1.654057771664374, |
| "grad_norm": 1.3461359739303589, |
| "learning_rate": 7.995512729225894e-05, |
| "loss": 0.82495756, |
| "memory(GiB)": 67.73, |
| "step": 2405, |
| "train_speed(iter/s)": 0.03947 |
| }, |
| { |
| "acc": 0.76738596, |
| "epoch": 1.657496561210454, |
| "grad_norm": 1.272608757019043, |
| "learning_rate": 7.986399759003119e-05, |
| "loss": 0.82517872, |
| "memory(GiB)": 67.73, |
| "step": 2410, |
| "train_speed(iter/s)": 0.039512 |
| }, |
| { |
| "acc": 0.75537925, |
| "epoch": 1.6609353507565336, |
| "grad_norm": 1.2757365703582764, |
| "learning_rate": 7.977271341158035e-05, |
| "loss": 0.8790472, |
| "memory(GiB)": 67.73, |
| "step": 2415, |
| "train_speed(iter/s)": 0.039549 |
| }, |
| { |
| "acc": 0.75297923, |
| "epoch": 1.6643741403026135, |
| "grad_norm": 1.1887763738632202, |
| "learning_rate": 7.968127522916723e-05, |
| "loss": 0.8699337, |
| "memory(GiB)": 67.73, |
| "step": 2420, |
| "train_speed(iter/s)": 0.039586 |
| }, |
| { |
| "acc": 0.76874723, |
| "epoch": 1.6678129298486932, |
| "grad_norm": 1.0565059185028076, |
| "learning_rate": 7.95896835158494e-05, |
| "loss": 0.81132565, |
| "memory(GiB)": 67.73, |
| "step": 2425, |
| "train_speed(iter/s)": 0.03962 |
| }, |
| { |
| "acc": 0.75187912, |
| "epoch": 1.671251719394773, |
| "grad_norm": 1.1199684143066406, |
| "learning_rate": 7.949793874547877e-05, |
| "loss": 0.89120388, |
| "memory(GiB)": 67.73, |
| "step": 2430, |
| "train_speed(iter/s)": 0.039655 |
| }, |
| { |
| "acc": 0.7603467, |
| "epoch": 1.6746905089408528, |
| "grad_norm": 1.225197196006775, |
| "learning_rate": 7.940604139269903e-05, |
| "loss": 0.83448153, |
| "memory(GiB)": 67.73, |
| "step": 2435, |
| "train_speed(iter/s)": 0.039691 |
| }, |
| { |
| "acc": 0.76669693, |
| "epoch": 1.6781292984869327, |
| "grad_norm": 1.2933470010757446, |
| "learning_rate": 7.931399193294331e-05, |
| "loss": 0.81721525, |
| "memory(GiB)": 67.73, |
| "step": 2440, |
| "train_speed(iter/s)": 0.039727 |
| }, |
| { |
| "acc": 0.76869669, |
| "epoch": 1.6815680880330124, |
| "grad_norm": 1.311872124671936, |
| "learning_rate": 7.922179084243161e-05, |
| "loss": 0.82446499, |
| "memory(GiB)": 67.73, |
| "step": 2445, |
| "train_speed(iter/s)": 0.039768 |
| }, |
| { |
| "acc": 0.74998088, |
| "epoch": 1.685006877579092, |
| "grad_norm": 1.109681248664856, |
| "learning_rate": 7.912943859816847e-05, |
| "loss": 0.88515491, |
| "memory(GiB)": 67.73, |
| "step": 2450, |
| "train_speed(iter/s)": 0.039804 |
| }, |
| { |
| "acc": 0.76143503, |
| "epoch": 1.688445667125172, |
| "grad_norm": 1.2891324758529663, |
| "learning_rate": 7.903693567794035e-05, |
| "loss": 0.84492321, |
| "memory(GiB)": 67.73, |
| "step": 2455, |
| "train_speed(iter/s)": 0.039838 |
| }, |
| { |
| "acc": 0.77193007, |
| "epoch": 1.6918844566712519, |
| "grad_norm": 1.0803942680358887, |
| "learning_rate": 7.894428256031332e-05, |
| "loss": 0.78853378, |
| "memory(GiB)": 67.73, |
| "step": 2460, |
| "train_speed(iter/s)": 0.039872 |
| }, |
| { |
| "acc": 0.78182478, |
| "epoch": 1.6953232462173315, |
| "grad_norm": 1.1716827154159546, |
| "learning_rate": 7.88514797246304e-05, |
| "loss": 0.76458054, |
| "memory(GiB)": 67.73, |
| "step": 2465, |
| "train_speed(iter/s)": 0.039909 |
| }, |
| { |
| "acc": 0.76684537, |
| "epoch": 1.6987620357634112, |
| "grad_norm": 1.2370884418487549, |
| "learning_rate": 7.875852765100926e-05, |
| "loss": 0.83122387, |
| "memory(GiB)": 67.73, |
| "step": 2470, |
| "train_speed(iter/s)": 0.03994 |
| }, |
| { |
| "acc": 0.7637423, |
| "epoch": 1.702200825309491, |
| "grad_norm": 1.4126001596450806, |
| "learning_rate": 7.866542682033964e-05, |
| "loss": 0.83239994, |
| "memory(GiB)": 67.73, |
| "step": 2475, |
| "train_speed(iter/s)": 0.03998 |
| }, |
| { |
| "acc": 0.7585422, |
| "epoch": 1.7056396148555708, |
| "grad_norm": 1.277557373046875, |
| "learning_rate": 7.857217771428085e-05, |
| "loss": 0.84281693, |
| "memory(GiB)": 67.73, |
| "step": 2480, |
| "train_speed(iter/s)": 0.040019 |
| }, |
| { |
| "acc": 0.76609259, |
| "epoch": 1.7090784044016507, |
| "grad_norm": 1.2501623630523682, |
| "learning_rate": 7.847878081525932e-05, |
| "loss": 0.8170804, |
| "memory(GiB)": 67.73, |
| "step": 2485, |
| "train_speed(iter/s)": 0.040056 |
| }, |
| { |
| "acc": 0.75793953, |
| "epoch": 1.7125171939477304, |
| "grad_norm": 1.5299009084701538, |
| "learning_rate": 7.838523660646611e-05, |
| "loss": 0.86527452, |
| "memory(GiB)": 67.73, |
| "step": 2490, |
| "train_speed(iter/s)": 0.040087 |
| }, |
| { |
| "acc": 0.75957718, |
| "epoch": 1.71595598349381, |
| "grad_norm": 1.2600144147872925, |
| "learning_rate": 7.829154557185438e-05, |
| "loss": 0.84371586, |
| "memory(GiB)": 67.73, |
| "step": 2495, |
| "train_speed(iter/s)": 0.040119 |
| }, |
| { |
| "acc": 0.77097268, |
| "epoch": 1.71939477303989, |
| "grad_norm": 1.2481366395950317, |
| "learning_rate": 7.819770819613685e-05, |
| "loss": 0.79474764, |
| "memory(GiB)": 67.73, |
| "step": 2500, |
| "train_speed(iter/s)": 0.040159 |
| }, |
| { |
| "epoch": 1.71939477303989, |
| "eval_acc": 0.755859990102128, |
| "eval_loss": 0.8665845990180969, |
| "eval_runtime": 1172.0149, |
| "eval_samples_per_second": 3.654, |
| "eval_steps_per_second": 0.066, |
| "step": 2500 |
| }, |
| { |
| "acc": 0.76427364, |
| "epoch": 1.7228335625859699, |
| "grad_norm": 1.3622970581054688, |
| "learning_rate": 7.810372496478342e-05, |
| "loss": 0.83532944, |
| "memory(GiB)": 67.73, |
| "step": 2505, |
| "train_speed(iter/s)": 0.039453 |
| }, |
| { |
| "acc": 0.75734344, |
| "epoch": 1.7262723521320495, |
| "grad_norm": 1.2002394199371338, |
| "learning_rate": 7.800959636401853e-05, |
| "loss": 0.85398045, |
| "memory(GiB)": 67.73, |
| "step": 2510, |
| "train_speed(iter/s)": 0.039487 |
| }, |
| { |
| "acc": 0.76924725, |
| "epoch": 1.7297111416781292, |
| "grad_norm": 1.1298774480819702, |
| "learning_rate": 7.791532288081868e-05, |
| "loss": 0.81432209, |
| "memory(GiB)": 67.73, |
| "step": 2515, |
| "train_speed(iter/s)": 0.03953 |
| }, |
| { |
| "acc": 0.76489067, |
| "epoch": 1.7331499312242091, |
| "grad_norm": 1.3914809226989746, |
| "learning_rate": 7.782090500290998e-05, |
| "loss": 0.83234596, |
| "memory(GiB)": 67.73, |
| "step": 2520, |
| "train_speed(iter/s)": 0.039566 |
| }, |
| { |
| "acc": 0.75667624, |
| "epoch": 1.7365887207702888, |
| "grad_norm": 1.2779312133789062, |
| "learning_rate": 7.77263432187655e-05, |
| "loss": 0.84862852, |
| "memory(GiB)": 67.73, |
| "step": 2525, |
| "train_speed(iter/s)": 0.039602 |
| }, |
| { |
| "acc": 0.76841941, |
| "epoch": 1.7400275103163687, |
| "grad_norm": 1.1182903051376343, |
| "learning_rate": 7.763163801760286e-05, |
| "loss": 0.80550995, |
| "memory(GiB)": 67.73, |
| "step": 2530, |
| "train_speed(iter/s)": 0.039639 |
| }, |
| { |
| "acc": 0.75564499, |
| "epoch": 1.7434662998624484, |
| "grad_norm": 1.325380802154541, |
| "learning_rate": 7.753678988938162e-05, |
| "loss": 0.85131378, |
| "memory(GiB)": 67.73, |
| "step": 2535, |
| "train_speed(iter/s)": 0.039668 |
| }, |
| { |
| "acc": 0.77792916, |
| "epoch": 1.746905089408528, |
| "grad_norm": 1.2355977296829224, |
| "learning_rate": 7.74417993248008e-05, |
| "loss": 0.76762047, |
| "memory(GiB)": 67.73, |
| "step": 2540, |
| "train_speed(iter/s)": 0.039703 |
| }, |
| { |
| "acc": 0.7703722, |
| "epoch": 1.750343878954608, |
| "grad_norm": 1.1645699739456177, |
| "learning_rate": 7.734666681529633e-05, |
| "loss": 0.80783539, |
| "memory(GiB)": 67.73, |
| "step": 2545, |
| "train_speed(iter/s)": 0.039737 |
| }, |
| { |
| "acc": 0.76581202, |
| "epoch": 1.7537826685006879, |
| "grad_norm": 1.2667499780654907, |
| "learning_rate": 7.725139285303843e-05, |
| "loss": 0.81426716, |
| "memory(GiB)": 67.73, |
| "step": 2550, |
| "train_speed(iter/s)": 0.039769 |
| }, |
| { |
| "acc": 0.75635591, |
| "epoch": 1.7572214580467675, |
| "grad_norm": 1.325819492340088, |
| "learning_rate": 7.71559779309292e-05, |
| "loss": 0.85436335, |
| "memory(GiB)": 67.73, |
| "step": 2555, |
| "train_speed(iter/s)": 0.039804 |
| }, |
| { |
| "acc": 0.76963515, |
| "epoch": 1.7606602475928472, |
| "grad_norm": 1.1206269264221191, |
| "learning_rate": 7.70604225426e-05, |
| "loss": 0.79051266, |
| "memory(GiB)": 67.73, |
| "step": 2560, |
| "train_speed(iter/s)": 0.039844 |
| }, |
| { |
| "acc": 0.77096367, |
| "epoch": 1.7640990371389271, |
| "grad_norm": 1.2406977415084839, |
| "learning_rate": 7.696472718240883e-05, |
| "loss": 0.8147171, |
| "memory(GiB)": 67.73, |
| "step": 2565, |
| "train_speed(iter/s)": 0.039879 |
| }, |
| { |
| "acc": 0.76435289, |
| "epoch": 1.767537826685007, |
| "grad_norm": 1.2148582935333252, |
| "learning_rate": 7.686889234543788e-05, |
| "loss": 0.82190208, |
| "memory(GiB)": 67.73, |
| "step": 2570, |
| "train_speed(iter/s)": 0.039906 |
| }, |
| { |
| "acc": 0.76752815, |
| "epoch": 1.7709766162310867, |
| "grad_norm": 1.0485085248947144, |
| "learning_rate": 7.677291852749093e-05, |
| "loss": 0.81348267, |
| "memory(GiB)": 67.73, |
| "step": 2575, |
| "train_speed(iter/s)": 0.039941 |
| }, |
| { |
| "acc": 0.75209255, |
| "epoch": 1.7744154057771664, |
| "grad_norm": 1.2998508214950562, |
| "learning_rate": 7.667680622509081e-05, |
| "loss": 0.85738831, |
| "memory(GiB)": 67.73, |
| "step": 2580, |
| "train_speed(iter/s)": 0.039973 |
| }, |
| { |
| "acc": 0.77212009, |
| "epoch": 1.777854195323246, |
| "grad_norm": 1.1790145635604858, |
| "learning_rate": 7.65805559354767e-05, |
| "loss": 0.81079607, |
| "memory(GiB)": 67.73, |
| "step": 2585, |
| "train_speed(iter/s)": 0.040007 |
| }, |
| { |
| "acc": 0.76209583, |
| "epoch": 1.781292984869326, |
| "grad_norm": 1.147714614868164, |
| "learning_rate": 7.648416815660177e-05, |
| "loss": 0.82997284, |
| "memory(GiB)": 67.73, |
| "step": 2590, |
| "train_speed(iter/s)": 0.040039 |
| }, |
| { |
| "acc": 0.76417446, |
| "epoch": 1.7847317744154059, |
| "grad_norm": 1.3315579891204834, |
| "learning_rate": 7.638764338713044e-05, |
| "loss": 0.81722393, |
| "memory(GiB)": 67.73, |
| "step": 2595, |
| "train_speed(iter/s)": 0.040074 |
| }, |
| { |
| "acc": 0.76321087, |
| "epoch": 1.7881705639614855, |
| "grad_norm": 1.282499074935913, |
| "learning_rate": 7.629098212643586e-05, |
| "loss": 0.82541628, |
| "memory(GiB)": 67.73, |
| "step": 2600, |
| "train_speed(iter/s)": 0.040108 |
| }, |
| { |
| "epoch": 1.7881705639614855, |
| "eval_acc": 0.7575696225311558, |
| "eval_loss": 0.8597843050956726, |
| "eval_runtime": 1165.0354, |
| "eval_samples_per_second": 3.676, |
| "eval_steps_per_second": 0.066, |
| "step": 2600 |
| }, |
| { |
| "acc": 0.76687446, |
| "epoch": 1.7916093535075652, |
| "grad_norm": 1.1122969388961792, |
| "learning_rate": 7.619418487459733e-05, |
| "loss": 0.80312977, |
| "memory(GiB)": 67.73, |
| "step": 2605, |
| "train_speed(iter/s)": 0.039432 |
| }, |
| { |
| "acc": 0.76852121, |
| "epoch": 1.7950481430536451, |
| "grad_norm": 1.3779712915420532, |
| "learning_rate": 7.609725213239771e-05, |
| "loss": 0.79960012, |
| "memory(GiB)": 67.73, |
| "step": 2610, |
| "train_speed(iter/s)": 0.039468 |
| }, |
| { |
| "acc": 0.76329231, |
| "epoch": 1.798486932599725, |
| "grad_norm": 1.4102786779403687, |
| "learning_rate": 7.60001844013208e-05, |
| "loss": 0.83775997, |
| "memory(GiB)": 67.73, |
| "step": 2615, |
| "train_speed(iter/s)": 0.039502 |
| }, |
| { |
| "acc": 0.76466594, |
| "epoch": 1.8019257221458047, |
| "grad_norm": 1.2906368970870972, |
| "learning_rate": 7.590298218354877e-05, |
| "loss": 0.82873688, |
| "memory(GiB)": 67.73, |
| "step": 2620, |
| "train_speed(iter/s)": 0.039534 |
| }, |
| { |
| "acc": 0.76969028, |
| "epoch": 1.8053645116918844, |
| "grad_norm": 1.2397427558898926, |
| "learning_rate": 7.580564598195957e-05, |
| "loss": 0.81822834, |
| "memory(GiB)": 67.73, |
| "step": 2625, |
| "train_speed(iter/s)": 0.03957 |
| }, |
| { |
| "acc": 0.7630662, |
| "epoch": 1.8088033012379643, |
| "grad_norm": 1.3433514833450317, |
| "learning_rate": 7.570817630012435e-05, |
| "loss": 0.82502728, |
| "memory(GiB)": 67.73, |
| "step": 2630, |
| "train_speed(iter/s)": 0.039606 |
| }, |
| { |
| "acc": 0.76269207, |
| "epoch": 1.812242090784044, |
| "grad_norm": 1.0851596593856812, |
| "learning_rate": 7.561057364230475e-05, |
| "loss": 0.81567841, |
| "memory(GiB)": 67.73, |
| "step": 2635, |
| "train_speed(iter/s)": 0.03964 |
| }, |
| { |
| "acc": 0.75853286, |
| "epoch": 1.8156808803301239, |
| "grad_norm": 1.2418478727340698, |
| "learning_rate": 7.551283851345042e-05, |
| "loss": 0.84409733, |
| "memory(GiB)": 67.73, |
| "step": 2640, |
| "train_speed(iter/s)": 0.039671 |
| }, |
| { |
| "acc": 0.76311216, |
| "epoch": 1.8191196698762035, |
| "grad_norm": 1.1131020784378052, |
| "learning_rate": 7.541497141919636e-05, |
| "loss": 0.82704649, |
| "memory(GiB)": 67.73, |
| "step": 2645, |
| "train_speed(iter/s)": 0.039706 |
| }, |
| { |
| "acc": 0.76137314, |
| "epoch": 1.8225584594222832, |
| "grad_norm": 1.378839373588562, |
| "learning_rate": 7.531697286586024e-05, |
| "loss": 0.833605, |
| "memory(GiB)": 67.73, |
| "step": 2650, |
| "train_speed(iter/s)": 0.039741 |
| }, |
| { |
| "acc": 0.76775074, |
| "epoch": 1.8259972489683631, |
| "grad_norm": 1.269956350326538, |
| "learning_rate": 7.521884336043988e-05, |
| "loss": 0.82008057, |
| "memory(GiB)": 67.73, |
| "step": 2655, |
| "train_speed(iter/s)": 0.039776 |
| }, |
| { |
| "acc": 0.77038703, |
| "epoch": 1.829436038514443, |
| "grad_norm": 1.1615530252456665, |
| "learning_rate": 7.51205834106106e-05, |
| "loss": 0.79894481, |
| "memory(GiB)": 67.73, |
| "step": 2660, |
| "train_speed(iter/s)": 0.039811 |
| }, |
| { |
| "acc": 0.76342058, |
| "epoch": 1.8328748280605227, |
| "grad_norm": 1.482840895652771, |
| "learning_rate": 7.502219352472252e-05, |
| "loss": 0.85134239, |
| "memory(GiB)": 67.73, |
| "step": 2665, |
| "train_speed(iter/s)": 0.039842 |
| }, |
| { |
| "acc": 0.77493591, |
| "epoch": 1.8363136176066024, |
| "grad_norm": 1.321035385131836, |
| "learning_rate": 7.492367421179802e-05, |
| "loss": 0.77966547, |
| "memory(GiB)": 67.73, |
| "step": 2670, |
| "train_speed(iter/s)": 0.039874 |
| }, |
| { |
| "acc": 0.7690084, |
| "epoch": 1.8397524071526823, |
| "grad_norm": 1.1279528141021729, |
| "learning_rate": 7.482502598152908e-05, |
| "loss": 0.80104809, |
| "memory(GiB)": 67.73, |
| "step": 2675, |
| "train_speed(iter/s)": 0.039909 |
| }, |
| { |
| "acc": 0.75845652, |
| "epoch": 1.8431911966987622, |
| "grad_norm": 1.340296983718872, |
| "learning_rate": 7.472624934427461e-05, |
| "loss": 0.8414402, |
| "memory(GiB)": 67.73, |
| "step": 2680, |
| "train_speed(iter/s)": 0.039938 |
| }, |
| { |
| "acc": 0.77168741, |
| "epoch": 1.8466299862448419, |
| "grad_norm": 1.1220262050628662, |
| "learning_rate": 7.462734481105788e-05, |
| "loss": 0.80376453, |
| "memory(GiB)": 67.73, |
| "step": 2685, |
| "train_speed(iter/s)": 0.03997 |
| }, |
| { |
| "acc": 0.77061701, |
| "epoch": 1.8500687757909215, |
| "grad_norm": 1.15684974193573, |
| "learning_rate": 7.452831289356382e-05, |
| "loss": 0.80241566, |
| "memory(GiB)": 67.73, |
| "step": 2690, |
| "train_speed(iter/s)": 0.040001 |
| }, |
| { |
| "acc": 0.77713642, |
| "epoch": 1.8535075653370012, |
| "grad_norm": 1.0986595153808594, |
| "learning_rate": 7.442915410413635e-05, |
| "loss": 0.76731901, |
| "memory(GiB)": 67.73, |
| "step": 2695, |
| "train_speed(iter/s)": 0.040033 |
| }, |
| { |
| "acc": 0.77222977, |
| "epoch": 1.8569463548830811, |
| "grad_norm": 1.258157730102539, |
| "learning_rate": 7.43298689557758e-05, |
| "loss": 0.78445282, |
| "memory(GiB)": 67.73, |
| "step": 2700, |
| "train_speed(iter/s)": 0.040066 |
| }, |
| { |
| "epoch": 1.8569463548830811, |
| "eval_acc": 0.7587618661987673, |
| "eval_loss": 0.8582912087440491, |
| "eval_runtime": 1138.3587, |
| "eval_samples_per_second": 3.762, |
| "eval_steps_per_second": 0.068, |
| "step": 2700 |
| }, |
| { |
| "acc": 0.76180067, |
| "epoch": 1.860385144429161, |
| "grad_norm": 1.2461254596710205, |
| "learning_rate": 7.423045796213618e-05, |
| "loss": 0.84116306, |
| "memory(GiB)": 67.73, |
| "step": 2705, |
| "train_speed(iter/s)": 0.039433 |
| }, |
| { |
| "acc": 0.76939058, |
| "epoch": 1.8638239339752407, |
| "grad_norm": 1.1999588012695312, |
| "learning_rate": 7.413092163752263e-05, |
| "loss": 0.79865079, |
| "memory(GiB)": 67.73, |
| "step": 2710, |
| "train_speed(iter/s)": 0.039464 |
| }, |
| { |
| "acc": 0.76056762, |
| "epoch": 1.8672627235213204, |
| "grad_norm": 1.1177629232406616, |
| "learning_rate": 7.403126049688864e-05, |
| "loss": 0.83327274, |
| "memory(GiB)": 67.73, |
| "step": 2715, |
| "train_speed(iter/s)": 0.0395 |
| }, |
| { |
| "acc": 0.76080637, |
| "epoch": 1.8707015130674003, |
| "grad_norm": 1.1269170045852661, |
| "learning_rate": 7.393147505583345e-05, |
| "loss": 0.84126358, |
| "memory(GiB)": 67.73, |
| "step": 2720, |
| "train_speed(iter/s)": 0.039532 |
| }, |
| { |
| "acc": 0.76141424, |
| "epoch": 1.8741403026134802, |
| "grad_norm": 1.4532649517059326, |
| "learning_rate": 7.383156583059941e-05, |
| "loss": 0.83713207, |
| "memory(GiB)": 67.73, |
| "step": 2725, |
| "train_speed(iter/s)": 0.039565 |
| }, |
| { |
| "acc": 0.76509705, |
| "epoch": 1.8775790921595599, |
| "grad_norm": 1.7495522499084473, |
| "learning_rate": 7.373153333806917e-05, |
| "loss": 0.82034264, |
| "memory(GiB)": 67.73, |
| "step": 2730, |
| "train_speed(iter/s)": 0.039601 |
| }, |
| { |
| "acc": 0.7695118, |
| "epoch": 1.8810178817056395, |
| "grad_norm": 1.3915905952453613, |
| "learning_rate": 7.363137809576322e-05, |
| "loss": 0.81267509, |
| "memory(GiB)": 67.73, |
| "step": 2735, |
| "train_speed(iter/s)": 0.039634 |
| }, |
| { |
| "acc": 0.76744928, |
| "epoch": 1.8844566712517192, |
| "grad_norm": 1.1005185842514038, |
| "learning_rate": 7.353110062183706e-05, |
| "loss": 0.82437191, |
| "memory(GiB)": 67.73, |
| "step": 2740, |
| "train_speed(iter/s)": 0.039664 |
| }, |
| { |
| "acc": 0.75223131, |
| "epoch": 1.8878954607977991, |
| "grad_norm": 1.4645339250564575, |
| "learning_rate": 7.343070143507853e-05, |
| "loss": 0.84084778, |
| "memory(GiB)": 67.73, |
| "step": 2745, |
| "train_speed(iter/s)": 0.039692 |
| }, |
| { |
| "acc": 0.76176004, |
| "epoch": 1.891334250343879, |
| "grad_norm": 1.2676668167114258, |
| "learning_rate": 7.333018105490515e-05, |
| "loss": 0.83568439, |
| "memory(GiB)": 67.73, |
| "step": 2750, |
| "train_speed(iter/s)": 0.039723 |
| }, |
| { |
| "acc": 0.75615792, |
| "epoch": 1.8947730398899587, |
| "grad_norm": 1.2724095582962036, |
| "learning_rate": 7.322954000136148e-05, |
| "loss": 0.8729351, |
| "memory(GiB)": 67.73, |
| "step": 2755, |
| "train_speed(iter/s)": 0.03975 |
| }, |
| { |
| "acc": 0.78407092, |
| "epoch": 1.8982118294360384, |
| "grad_norm": 1.0895689725875854, |
| "learning_rate": 7.312877879511639e-05, |
| "loss": 0.76207438, |
| "memory(GiB)": 67.73, |
| "step": 2760, |
| "train_speed(iter/s)": 0.039781 |
| }, |
| { |
| "acc": 0.76615877, |
| "epoch": 1.9016506189821183, |
| "grad_norm": 1.2832506895065308, |
| "learning_rate": 7.30278979574603e-05, |
| "loss": 0.82421865, |
| "memory(GiB)": 67.73, |
| "step": 2765, |
| "train_speed(iter/s)": 0.039817 |
| }, |
| { |
| "acc": 0.75757556, |
| "epoch": 1.9050894085281982, |
| "grad_norm": 1.069419503211975, |
| "learning_rate": 7.292689801030262e-05, |
| "loss": 0.84605732, |
| "memory(GiB)": 67.73, |
| "step": 2770, |
| "train_speed(iter/s)": 0.039848 |
| }, |
| { |
| "acc": 0.7682868, |
| "epoch": 1.9085281980742779, |
| "grad_norm": 1.2053790092468262, |
| "learning_rate": 7.282577947616894e-05, |
| "loss": 0.81153774, |
| "memory(GiB)": 67.73, |
| "step": 2775, |
| "train_speed(iter/s)": 0.039879 |
| }, |
| { |
| "acc": 0.76934462, |
| "epoch": 1.9119669876203575, |
| "grad_norm": 1.1767531633377075, |
| "learning_rate": 7.272454287819833e-05, |
| "loss": 0.80814152, |
| "memory(GiB)": 67.73, |
| "step": 2780, |
| "train_speed(iter/s)": 0.039913 |
| }, |
| { |
| "acc": 0.7687346, |
| "epoch": 1.9154057771664375, |
| "grad_norm": 1.2420388460159302, |
| "learning_rate": 7.262318874014073e-05, |
| "loss": 0.79890871, |
| "memory(GiB)": 67.73, |
| "step": 2785, |
| "train_speed(iter/s)": 0.039945 |
| }, |
| { |
| "acc": 0.76412306, |
| "epoch": 1.9188445667125174, |
| "grad_norm": 1.1197503805160522, |
| "learning_rate": 7.252171758635413e-05, |
| "loss": 0.8147377, |
| "memory(GiB)": 67.73, |
| "step": 2790, |
| "train_speed(iter/s)": 0.039979 |
| }, |
| { |
| "acc": 0.76467457, |
| "epoch": 1.922283356258597, |
| "grad_norm": 1.3166663646697998, |
| "learning_rate": 7.242012994180194e-05, |
| "loss": 0.80287476, |
| "memory(GiB)": 67.73, |
| "step": 2795, |
| "train_speed(iter/s)": 0.040006 |
| }, |
| { |
| "acc": 0.75744901, |
| "epoch": 1.9257221458046767, |
| "grad_norm": 1.226488709449768, |
| "learning_rate": 7.231842633205018e-05, |
| "loss": 0.84611988, |
| "memory(GiB)": 67.73, |
| "step": 2800, |
| "train_speed(iter/s)": 0.040037 |
| }, |
| { |
| "epoch": 1.9257221458046767, |
| "eval_acc": 0.7585087956089441, |
| "eval_loss": 0.8532779812812805, |
| "eval_runtime": 1120.0858, |
| "eval_samples_per_second": 3.824, |
| "eval_steps_per_second": 0.069, |
| "step": 2800 |
| }, |
| { |
| "acc": 0.75332041, |
| "epoch": 1.9291609353507564, |
| "grad_norm": 1.3092836141586304, |
| "learning_rate": 7.221660728326488e-05, |
| "loss": 0.86540194, |
| "memory(GiB)": 67.73, |
| "step": 2805, |
| "train_speed(iter/s)": 0.039438 |
| }, |
| { |
| "acc": 0.75658636, |
| "epoch": 1.9325997248968363, |
| "grad_norm": 1.507814645767212, |
| "learning_rate": 7.211467332220929e-05, |
| "loss": 0.84582443, |
| "memory(GiB)": 67.73, |
| "step": 2810, |
| "train_speed(iter/s)": 0.039471 |
| }, |
| { |
| "acc": 0.77044678, |
| "epoch": 1.9360385144429162, |
| "grad_norm": 1.1073263883590698, |
| "learning_rate": 7.201262497624113e-05, |
| "loss": 0.80068121, |
| "memory(GiB)": 67.73, |
| "step": 2815, |
| "train_speed(iter/s)": 0.0395 |
| }, |
| { |
| "acc": 0.76184196, |
| "epoch": 1.9394773039889959, |
| "grad_norm": 1.2935534715652466, |
| "learning_rate": 7.191046277330989e-05, |
| "loss": 0.80897961, |
| "memory(GiB)": 67.73, |
| "step": 2820, |
| "train_speed(iter/s)": 0.039535 |
| }, |
| { |
| "acc": 0.77721043, |
| "epoch": 1.9429160935350756, |
| "grad_norm": 1.291559100151062, |
| "learning_rate": 7.180818724195413e-05, |
| "loss": 0.78424644, |
| "memory(GiB)": 67.73, |
| "step": 2825, |
| "train_speed(iter/s)": 0.039567 |
| }, |
| { |
| "acc": 0.76044025, |
| "epoch": 1.9463548830811555, |
| "grad_norm": 1.3120973110198975, |
| "learning_rate": 7.170579891129872e-05, |
| "loss": 0.83824387, |
| "memory(GiB)": 67.73, |
| "step": 2830, |
| "train_speed(iter/s)": 0.039597 |
| }, |
| { |
| "acc": 0.768398, |
| "epoch": 1.9497936726272354, |
| "grad_norm": 1.4003841876983643, |
| "learning_rate": 7.160329831105207e-05, |
| "loss": 0.81456871, |
| "memory(GiB)": 67.73, |
| "step": 2835, |
| "train_speed(iter/s)": 0.039628 |
| }, |
| { |
| "acc": 0.76318188, |
| "epoch": 1.953232462173315, |
| "grad_norm": 1.2559746503829956, |
| "learning_rate": 7.150068597150343e-05, |
| "loss": 0.81296453, |
| "memory(GiB)": 67.73, |
| "step": 2840, |
| "train_speed(iter/s)": 0.039657 |
| }, |
| { |
| "acc": 0.76650887, |
| "epoch": 1.9566712517193947, |
| "grad_norm": 1.4540189504623413, |
| "learning_rate": 7.139796242352016e-05, |
| "loss": 0.80663786, |
| "memory(GiB)": 67.73, |
| "step": 2845, |
| "train_speed(iter/s)": 0.039685 |
| }, |
| { |
| "acc": 0.75642557, |
| "epoch": 1.9601100412654744, |
| "grad_norm": 1.2288488149642944, |
| "learning_rate": 7.129512819854492e-05, |
| "loss": 0.85932999, |
| "memory(GiB)": 67.73, |
| "step": 2850, |
| "train_speed(iter/s)": 0.039713 |
| }, |
| { |
| "acc": 0.76574574, |
| "epoch": 1.9635488308115543, |
| "grad_norm": 1.0715101957321167, |
| "learning_rate": 7.1192183828593e-05, |
| "loss": 0.81189203, |
| "memory(GiB)": 67.73, |
| "step": 2855, |
| "train_speed(iter/s)": 0.039744 |
| }, |
| { |
| "acc": 0.76550779, |
| "epoch": 1.9669876203576342, |
| "grad_norm": 1.1467492580413818, |
| "learning_rate": 7.108912984624951e-05, |
| "loss": 0.816045, |
| "memory(GiB)": 67.73, |
| "step": 2860, |
| "train_speed(iter/s)": 0.039771 |
| }, |
| { |
| "acc": 0.77190948, |
| "epoch": 1.9704264099037139, |
| "grad_norm": 1.3903864622116089, |
| "learning_rate": 7.098596678466663e-05, |
| "loss": 0.79169202, |
| "memory(GiB)": 67.73, |
| "step": 2865, |
| "train_speed(iter/s)": 0.039802 |
| }, |
| { |
| "acc": 0.77034965, |
| "epoch": 1.9738651994497936, |
| "grad_norm": 1.4153941869735718, |
| "learning_rate": 7.088269517756085e-05, |
| "loss": 0.8023941, |
| "memory(GiB)": 67.73, |
| "step": 2870, |
| "train_speed(iter/s)": 0.039833 |
| }, |
| { |
| "acc": 0.7611412, |
| "epoch": 1.9773039889958735, |
| "grad_norm": 1.420799732208252, |
| "learning_rate": 7.07793155592103e-05, |
| "loss": 0.82977829, |
| "memory(GiB)": 67.73, |
| "step": 2875, |
| "train_speed(iter/s)": 0.039863 |
| }, |
| { |
| "acc": 0.7672267, |
| "epoch": 1.9807427785419534, |
| "grad_norm": 1.3404732942581177, |
| "learning_rate": 7.06758284644518e-05, |
| "loss": 0.82698822, |
| "memory(GiB)": 67.73, |
| "step": 2880, |
| "train_speed(iter/s)": 0.039895 |
| }, |
| { |
| "acc": 0.7625721, |
| "epoch": 1.984181568088033, |
| "grad_norm": 1.215389370918274, |
| "learning_rate": 7.057223442867828e-05, |
| "loss": 0.81949692, |
| "memory(GiB)": 67.73, |
| "step": 2885, |
| "train_speed(iter/s)": 0.039923 |
| }, |
| { |
| "acc": 0.76380196, |
| "epoch": 1.9876203576341127, |
| "grad_norm": 1.1068233251571655, |
| "learning_rate": 7.046853398783595e-05, |
| "loss": 0.82260599, |
| "memory(GiB)": 67.73, |
| "step": 2890, |
| "train_speed(iter/s)": 0.039951 |
| }, |
| { |
| "acc": 0.77088547, |
| "epoch": 1.9910591471801926, |
| "grad_norm": 1.2111361026763916, |
| "learning_rate": 7.03647276784214e-05, |
| "loss": 0.79111667, |
| "memory(GiB)": 67.73, |
| "step": 2895, |
| "train_speed(iter/s)": 0.039979 |
| }, |
| { |
| "acc": 0.77545271, |
| "epoch": 1.9944979367262725, |
| "grad_norm": 1.5610926151275635, |
| "learning_rate": 7.026081603747905e-05, |
| "loss": 0.78897448, |
| "memory(GiB)": 67.73, |
| "step": 2900, |
| "train_speed(iter/s)": 0.040011 |
| }, |
| { |
| "epoch": 1.9944979367262725, |
| "eval_acc": 0.7610057587618662, |
| "eval_loss": 0.8448835015296936, |
| "eval_runtime": 1173.7451, |
| "eval_samples_per_second": 3.649, |
| "eval_steps_per_second": 0.066, |
| "step": 2900 |
| }, |
| { |
| "acc": 0.76741076, |
| "epoch": 1.9979367262723522, |
| "grad_norm": 1.279598593711853, |
| "learning_rate": 7.01567996025982e-05, |
| "loss": 0.81564512, |
| "memory(GiB)": 67.73, |
| "step": 2905, |
| "train_speed(iter/s)": 0.039405 |
| }, |
| { |
| "acc": 0.76975098, |
| "epoch": 2.001375515818432, |
| "grad_norm": 1.2036709785461426, |
| "learning_rate": 7.00526789119103e-05, |
| "loss": 0.80328093, |
| "memory(GiB)": 67.73, |
| "step": 2910, |
| "train_speed(iter/s)": 0.039417 |
| }, |
| { |
| "acc": 0.77956657, |
| "epoch": 2.0048143053645116, |
| "grad_norm": 1.2451400756835938, |
| "learning_rate": 6.994845450408618e-05, |
| "loss": 0.77778225, |
| "memory(GiB)": 67.73, |
| "step": 2915, |
| "train_speed(iter/s)": 0.039446 |
| }, |
| { |
| "acc": 0.77100277, |
| "epoch": 2.0082530949105917, |
| "grad_norm": 1.501703143119812, |
| "learning_rate": 6.984412691833326e-05, |
| "loss": 0.78023448, |
| "memory(GiB)": 67.73, |
| "step": 2920, |
| "train_speed(iter/s)": 0.039476 |
| }, |
| { |
| "acc": 0.78834424, |
| "epoch": 2.0116918844566714, |
| "grad_norm": 1.3272697925567627, |
| "learning_rate": 6.973969669439275e-05, |
| "loss": 0.72417383, |
| "memory(GiB)": 67.73, |
| "step": 2925, |
| "train_speed(iter/s)": 0.039503 |
| }, |
| { |
| "acc": 0.78664284, |
| "epoch": 2.015130674002751, |
| "grad_norm": 1.2188291549682617, |
| "learning_rate": 6.963516437253684e-05, |
| "loss": 0.72837029, |
| "memory(GiB)": 67.73, |
| "step": 2930, |
| "train_speed(iter/s)": 0.039531 |
| }, |
| { |
| "acc": 0.78195653, |
| "epoch": 2.0185694635488307, |
| "grad_norm": 1.386407494544983, |
| "learning_rate": 6.953053049356597e-05, |
| "loss": 0.74600391, |
| "memory(GiB)": 67.73, |
| "step": 2935, |
| "train_speed(iter/s)": 0.039557 |
| }, |
| { |
| "acc": 0.77869625, |
| "epoch": 2.0220082530949104, |
| "grad_norm": 1.3403911590576172, |
| "learning_rate": 6.942579559880596e-05, |
| "loss": 0.75640421, |
| "memory(GiB)": 67.73, |
| "step": 2940, |
| "train_speed(iter/s)": 0.039588 |
| }, |
| { |
| "acc": 0.78800364, |
| "epoch": 2.0254470426409905, |
| "grad_norm": 1.2047083377838135, |
| "learning_rate": 6.932096023010522e-05, |
| "loss": 0.73669438, |
| "memory(GiB)": 67.73, |
| "step": 2945, |
| "train_speed(iter/s)": 0.039621 |
| }, |
| { |
| "acc": 0.79104028, |
| "epoch": 2.02888583218707, |
| "grad_norm": 1.208552360534668, |
| "learning_rate": 6.921602492983198e-05, |
| "loss": 0.71291051, |
| "memory(GiB)": 67.73, |
| "step": 2950, |
| "train_speed(iter/s)": 0.039651 |
| }, |
| { |
| "acc": 0.77535133, |
| "epoch": 2.03232462173315, |
| "grad_norm": 1.5250719785690308, |
| "learning_rate": 6.911099024087153e-05, |
| "loss": 0.76551533, |
| "memory(GiB)": 67.73, |
| "step": 2955, |
| "train_speed(iter/s)": 0.039681 |
| }, |
| { |
| "acc": 0.76844397, |
| "epoch": 2.0357634112792296, |
| "grad_norm": 1.2615329027175903, |
| "learning_rate": 6.900585670662321e-05, |
| "loss": 0.79221487, |
| "memory(GiB)": 67.73, |
| "step": 2960, |
| "train_speed(iter/s)": 0.039706 |
| }, |
| { |
| "acc": 0.7686276, |
| "epoch": 2.0392022008253097, |
| "grad_norm": 1.3869153261184692, |
| "learning_rate": 6.890062487099788e-05, |
| "loss": 0.79985504, |
| "memory(GiB)": 67.73, |
| "step": 2965, |
| "train_speed(iter/s)": 0.039733 |
| }, |
| { |
| "acc": 0.79315829, |
| "epoch": 2.0426409903713894, |
| "grad_norm": 1.3412541151046753, |
| "learning_rate": 6.87952952784149e-05, |
| "loss": 0.70044346, |
| "memory(GiB)": 67.73, |
| "step": 2970, |
| "train_speed(iter/s)": 0.039764 |
| }, |
| { |
| "acc": 0.76822557, |
| "epoch": 2.046079779917469, |
| "grad_norm": 1.4066526889801025, |
| "learning_rate": 6.868986847379934e-05, |
| "loss": 0.79726977, |
| "memory(GiB)": 67.73, |
| "step": 2975, |
| "train_speed(iter/s)": 0.03979 |
| }, |
| { |
| "acc": 0.77974381, |
| "epoch": 2.0495185694635487, |
| "grad_norm": 1.2747722864151, |
| "learning_rate": 6.858434500257929e-05, |
| "loss": 0.75174856, |
| "memory(GiB)": 67.73, |
| "step": 2980, |
| "train_speed(iter/s)": 0.039819 |
| }, |
| { |
| "acc": 0.7757237, |
| "epoch": 2.0529573590096284, |
| "grad_norm": 1.3321025371551514, |
| "learning_rate": 6.847872541068289e-05, |
| "loss": 0.76223741, |
| "memory(GiB)": 67.73, |
| "step": 2985, |
| "train_speed(iter/s)": 0.039845 |
| }, |
| { |
| "acc": 0.78108168, |
| "epoch": 2.0563961485557085, |
| "grad_norm": 1.3092007637023926, |
| "learning_rate": 6.837301024453556e-05, |
| "loss": 0.75712924, |
| "memory(GiB)": 67.73, |
| "step": 2990, |
| "train_speed(iter/s)": 0.03987 |
| }, |
| { |
| "acc": 0.78598289, |
| "epoch": 2.059834938101788, |
| "grad_norm": 1.305657982826233, |
| "learning_rate": 6.826720005105718e-05, |
| "loss": 0.73581972, |
| "memory(GiB)": 67.73, |
| "step": 2995, |
| "train_speed(iter/s)": 0.039898 |
| }, |
| { |
| "acc": 0.78907838, |
| "epoch": 2.063273727647868, |
| "grad_norm": 1.4367668628692627, |
| "learning_rate": 6.816129537765928e-05, |
| "loss": 0.72936554, |
| "memory(GiB)": 67.73, |
| "step": 3000, |
| "train_speed(iter/s)": 0.039927 |
| }, |
| { |
| "epoch": 2.063273727647868, |
| "eval_acc": 0.7620799028208936, |
| "eval_loss": 0.8483734726905823, |
| "eval_runtime": 1164.2299, |
| "eval_samples_per_second": 3.679, |
| "eval_steps_per_second": 0.066, |
| "step": 3000 |
| }, |
| { |
| "acc": 0.78301954, |
| "epoch": 2.0667125171939476, |
| "grad_norm": 1.5559133291244507, |
| "learning_rate": 6.805529677224216e-05, |
| "loss": 0.7379096, |
| "memory(GiB)": 67.73, |
| "step": 3005, |
| "train_speed(iter/s)": 0.039348 |
| }, |
| { |
| "acc": 0.77635798, |
| "epoch": 2.0701513067400277, |
| "grad_norm": 1.266300916671753, |
| "learning_rate": 6.79492047831921e-05, |
| "loss": 0.74843826, |
| "memory(GiB)": 67.73, |
| "step": 3010, |
| "train_speed(iter/s)": 0.039374 |
| }, |
| { |
| "acc": 0.78403974, |
| "epoch": 2.0735900962861074, |
| "grad_norm": 1.3442883491516113, |
| "learning_rate": 6.784301995937846e-05, |
| "loss": 0.73924718, |
| "memory(GiB)": 67.73, |
| "step": 3015, |
| "train_speed(iter/s)": 0.039405 |
| }, |
| { |
| "acc": 0.7821476, |
| "epoch": 2.077028885832187, |
| "grad_norm": 1.3284059762954712, |
| "learning_rate": 6.773674285015092e-05, |
| "loss": 0.731111, |
| "memory(GiB)": 67.73, |
| "step": 3020, |
| "train_speed(iter/s)": 0.039435 |
| }, |
| { |
| "acc": 0.78100576, |
| "epoch": 2.0804676753782667, |
| "grad_norm": 1.2452079057693481, |
| "learning_rate": 6.76303740053366e-05, |
| "loss": 0.75636292, |
| "memory(GiB)": 67.73, |
| "step": 3025, |
| "train_speed(iter/s)": 0.039465 |
| }, |
| { |
| "acc": 0.77952466, |
| "epoch": 2.083906464924347, |
| "grad_norm": 1.5737926959991455, |
| "learning_rate": 6.752391397523725e-05, |
| "loss": 0.75301075, |
| "memory(GiB)": 67.73, |
| "step": 3030, |
| "train_speed(iter/s)": 0.039494 |
| }, |
| { |
| "acc": 0.78698683, |
| "epoch": 2.0873452544704265, |
| "grad_norm": 1.337703824043274, |
| "learning_rate": 6.741736331062626e-05, |
| "loss": 0.73099127, |
| "memory(GiB)": 67.73, |
| "step": 3035, |
| "train_speed(iter/s)": 0.039519 |
| }, |
| { |
| "acc": 0.79346113, |
| "epoch": 2.090784044016506, |
| "grad_norm": 1.203200340270996, |
| "learning_rate": 6.731072256274604e-05, |
| "loss": 0.70464416, |
| "memory(GiB)": 67.73, |
| "step": 3040, |
| "train_speed(iter/s)": 0.039547 |
| }, |
| { |
| "acc": 0.78062749, |
| "epoch": 2.094222833562586, |
| "grad_norm": 1.5236440896987915, |
| "learning_rate": 6.720399228330494e-05, |
| "loss": 0.75513244, |
| "memory(GiB)": 67.73, |
| "step": 3045, |
| "train_speed(iter/s)": 0.039576 |
| }, |
| { |
| "acc": 0.78337817, |
| "epoch": 2.0976616231086656, |
| "grad_norm": 1.533868670463562, |
| "learning_rate": 6.709717302447462e-05, |
| "loss": 0.73356051, |
| "memory(GiB)": 67.73, |
| "step": 3050, |
| "train_speed(iter/s)": 0.039604 |
| }, |
| { |
| "acc": 0.7775434, |
| "epoch": 2.1011004126547457, |
| "grad_norm": 1.5052993297576904, |
| "learning_rate": 6.699026533888696e-05, |
| "loss": 0.75711803, |
| "memory(GiB)": 67.73, |
| "step": 3055, |
| "train_speed(iter/s)": 0.039632 |
| }, |
| { |
| "acc": 0.76806664, |
| "epoch": 2.1045392022008254, |
| "grad_norm": 1.5001362562179565, |
| "learning_rate": 6.688326977963142e-05, |
| "loss": 0.78131714, |
| "memory(GiB)": 67.73, |
| "step": 3060, |
| "train_speed(iter/s)": 0.039658 |
| }, |
| { |
| "acc": 0.76824741, |
| "epoch": 2.107977991746905, |
| "grad_norm": 1.422959327697754, |
| "learning_rate": 6.677618690025201e-05, |
| "loss": 0.79324121, |
| "memory(GiB)": 67.73, |
| "step": 3065, |
| "train_speed(iter/s)": 0.039682 |
| }, |
| { |
| "acc": 0.79779997, |
| "epoch": 2.1114167812929847, |
| "grad_norm": 1.4451581239700317, |
| "learning_rate": 6.666901725474453e-05, |
| "loss": 0.69419732, |
| "memory(GiB)": 67.73, |
| "step": 3070, |
| "train_speed(iter/s)": 0.039716 |
| }, |
| { |
| "acc": 0.77174778, |
| "epoch": 2.114855570839065, |
| "grad_norm": 1.47465980052948, |
| "learning_rate": 6.656176139755361e-05, |
| "loss": 0.79069195, |
| "memory(GiB)": 67.73, |
| "step": 3075, |
| "train_speed(iter/s)": 0.039741 |
| }, |
| { |
| "acc": 0.77605443, |
| "epoch": 2.1182943603851445, |
| "grad_norm": 1.3267581462860107, |
| "learning_rate": 6.645441988356998e-05, |
| "loss": 0.74461274, |
| "memory(GiB)": 67.73, |
| "step": 3080, |
| "train_speed(iter/s)": 0.03977 |
| }, |
| { |
| "acc": 0.77349963, |
| "epoch": 2.121733149931224, |
| "grad_norm": 1.391455054283142, |
| "learning_rate": 6.634699326812746e-05, |
| "loss": 0.77960453, |
| "memory(GiB)": 67.73, |
| "step": 3085, |
| "train_speed(iter/s)": 0.039803 |
| }, |
| { |
| "acc": 0.79598751, |
| "epoch": 2.125171939477304, |
| "grad_norm": 1.2205495834350586, |
| "learning_rate": 6.623948210700017e-05, |
| "loss": 0.70987749, |
| "memory(GiB)": 67.73, |
| "step": 3090, |
| "train_speed(iter/s)": 0.039834 |
| }, |
| { |
| "acc": 0.77750764, |
| "epoch": 2.128610729023384, |
| "grad_norm": 1.4609593152999878, |
| "learning_rate": 6.613188695639961e-05, |
| "loss": 0.75586929, |
| "memory(GiB)": 67.73, |
| "step": 3095, |
| "train_speed(iter/s)": 0.039862 |
| }, |
| { |
| "acc": 0.78546953, |
| "epoch": 2.1320495185694637, |
| "grad_norm": 1.264543890953064, |
| "learning_rate": 6.602420837297181e-05, |
| "loss": 0.71513643, |
| "memory(GiB)": 67.73, |
| "step": 3100, |
| "train_speed(iter/s)": 0.039888 |
| }, |
| { |
| "epoch": 2.1320495185694637, |
| "eval_acc": 0.7627378863544338, |
| "eval_loss": 0.8477216958999634, |
| "eval_runtime": 1140.3429, |
| "eval_samples_per_second": 3.756, |
| "eval_steps_per_second": 0.068, |
| "step": 3100 |
| }, |
| { |
| "acc": 0.79360695, |
| "epoch": 2.1354883081155434, |
| "grad_norm": 1.446578025817871, |
| "learning_rate": 6.591644691379445e-05, |
| "loss": 0.69685826, |
| "memory(GiB)": 67.73, |
| "step": 3105, |
| "train_speed(iter/s)": 0.039338 |
| }, |
| { |
| "acc": 0.78709059, |
| "epoch": 2.138927097661623, |
| "grad_norm": 1.3313159942626953, |
| "learning_rate": 6.580860313637395e-05, |
| "loss": 0.71339779, |
| "memory(GiB)": 67.73, |
| "step": 3110, |
| "train_speed(iter/s)": 0.039367 |
| }, |
| { |
| "acc": 0.77740135, |
| "epoch": 2.1423658872077027, |
| "grad_norm": 1.2659940719604492, |
| "learning_rate": 6.57006775986426e-05, |
| "loss": 0.77046852, |
| "memory(GiB)": 67.73, |
| "step": 3115, |
| "train_speed(iter/s)": 0.039389 |
| }, |
| { |
| "acc": 0.77650619, |
| "epoch": 2.145804676753783, |
| "grad_norm": 1.4188055992126465, |
| "learning_rate": 6.559267085895575e-05, |
| "loss": 0.77363644, |
| "memory(GiB)": 67.73, |
| "step": 3120, |
| "train_speed(iter/s)": 0.039413 |
| }, |
| { |
| "acc": 0.7744916, |
| "epoch": 2.1492434662998625, |
| "grad_norm": 1.5709620714187622, |
| "learning_rate": 6.548458347608877e-05, |
| "loss": 0.78516483, |
| "memory(GiB)": 67.73, |
| "step": 3125, |
| "train_speed(iter/s)": 0.039441 |
| }, |
| { |
| "acc": 0.79064846, |
| "epoch": 2.152682255845942, |
| "grad_norm": 1.4528831243515015, |
| "learning_rate": 6.537641600923424e-05, |
| "loss": 0.72166934, |
| "memory(GiB)": 67.73, |
| "step": 3130, |
| "train_speed(iter/s)": 0.039473 |
| }, |
| { |
| "acc": 0.78410482, |
| "epoch": 2.156121045392022, |
| "grad_norm": 1.2021089792251587, |
| "learning_rate": 6.52681690179991e-05, |
| "loss": 0.72740555, |
| "memory(GiB)": 67.73, |
| "step": 3135, |
| "train_speed(iter/s)": 0.039502 |
| }, |
| { |
| "acc": 0.78779097, |
| "epoch": 2.1595598349381016, |
| "grad_norm": 1.3785122632980347, |
| "learning_rate": 6.515984306240171e-05, |
| "loss": 0.7183506, |
| "memory(GiB)": 67.73, |
| "step": 3140, |
| "train_speed(iter/s)": 0.039532 |
| }, |
| { |
| "acc": 0.78875618, |
| "epoch": 2.1629986244841817, |
| "grad_norm": 1.4517391920089722, |
| "learning_rate": 6.505143870286892e-05, |
| "loss": 0.72756548, |
| "memory(GiB)": 67.73, |
| "step": 3145, |
| "train_speed(iter/s)": 0.039559 |
| }, |
| { |
| "acc": 0.77699008, |
| "epoch": 2.1664374140302614, |
| "grad_norm": 1.3971807956695557, |
| "learning_rate": 6.494295650023326e-05, |
| "loss": 0.76216259, |
| "memory(GiB)": 67.73, |
| "step": 3150, |
| "train_speed(iter/s)": 0.039584 |
| }, |
| { |
| "acc": 0.78072176, |
| "epoch": 2.169876203576341, |
| "grad_norm": 1.3789912462234497, |
| "learning_rate": 6.483439701572987e-05, |
| "loss": 0.74354634, |
| "memory(GiB)": 67.73, |
| "step": 3155, |
| "train_speed(iter/s)": 0.039607 |
| }, |
| { |
| "acc": 0.78208303, |
| "epoch": 2.1733149931224207, |
| "grad_norm": 1.4556952714920044, |
| "learning_rate": 6.472576081099384e-05, |
| "loss": 0.74753394, |
| "memory(GiB)": 67.73, |
| "step": 3160, |
| "train_speed(iter/s)": 0.03963 |
| }, |
| { |
| "acc": 0.77552352, |
| "epoch": 2.176753782668501, |
| "grad_norm": 1.3656238317489624, |
| "learning_rate": 6.461704844805711e-05, |
| "loss": 0.76849761, |
| "memory(GiB)": 67.73, |
| "step": 3165, |
| "train_speed(iter/s)": 0.039651 |
| }, |
| { |
| "acc": 0.79463406, |
| "epoch": 2.1801925722145805, |
| "grad_norm": 1.6309324502944946, |
| "learning_rate": 6.450826048934564e-05, |
| "loss": 0.71653328, |
| "memory(GiB)": 67.73, |
| "step": 3170, |
| "train_speed(iter/s)": 0.03968 |
| }, |
| { |
| "acc": 0.78766985, |
| "epoch": 2.18363136176066, |
| "grad_norm": 1.5563981533050537, |
| "learning_rate": 6.439939749767646e-05, |
| "loss": 0.72953587, |
| "memory(GiB)": 67.73, |
| "step": 3175, |
| "train_speed(iter/s)": 0.039706 |
| }, |
| { |
| "acc": 0.78887863, |
| "epoch": 2.18707015130674, |
| "grad_norm": 1.4095101356506348, |
| "learning_rate": 6.429046003625481e-05, |
| "loss": 0.71779909, |
| "memory(GiB)": 67.73, |
| "step": 3180, |
| "train_speed(iter/s)": 0.039734 |
| }, |
| { |
| "acc": 0.78688583, |
| "epoch": 2.19050894085282, |
| "grad_norm": 1.3465501070022583, |
| "learning_rate": 6.418144866867121e-05, |
| "loss": 0.73801022, |
| "memory(GiB)": 67.73, |
| "step": 3185, |
| "train_speed(iter/s)": 0.03976 |
| }, |
| { |
| "acc": 0.78006182, |
| "epoch": 2.1939477303988997, |
| "grad_norm": 1.4523009061813354, |
| "learning_rate": 6.407236395889853e-05, |
| "loss": 0.75504117, |
| "memory(GiB)": 67.73, |
| "step": 3190, |
| "train_speed(iter/s)": 0.039787 |
| }, |
| { |
| "acc": 0.77997456, |
| "epoch": 2.1973865199449794, |
| "grad_norm": 1.4462857246398926, |
| "learning_rate": 6.396320647128904e-05, |
| "loss": 0.7402379, |
| "memory(GiB)": 67.73, |
| "step": 3195, |
| "train_speed(iter/s)": 0.039814 |
| }, |
| { |
| "acc": 0.78546247, |
| "epoch": 2.200825309491059, |
| "grad_norm": 1.3453855514526367, |
| "learning_rate": 6.385397677057158e-05, |
| "loss": 0.74274435, |
| "memory(GiB)": 67.73, |
| "step": 3200, |
| "train_speed(iter/s)": 0.039843 |
| }, |
| { |
| "epoch": 2.200825309491059, |
| "eval_acc": 0.7633396319791245, |
| "eval_loss": 0.8417202234268188, |
| "eval_runtime": 1162.9585, |
| "eval_samples_per_second": 3.683, |
| "eval_steps_per_second": 0.066, |
| "step": 3200 |
| }, |
| { |
| "acc": 0.7864996, |
| "epoch": 2.2042640990371387, |
| "grad_norm": 1.5509644746780396, |
| "learning_rate": 6.374467542184858e-05, |
| "loss": 0.7381216, |
| "memory(GiB)": 67.73, |
| "step": 3205, |
| "train_speed(iter/s)": 0.0393 |
| }, |
| { |
| "acc": 0.7754878, |
| "epoch": 2.207702888583219, |
| "grad_norm": 1.4426201581954956, |
| "learning_rate": 6.363530299059309e-05, |
| "loss": 0.76541862, |
| "memory(GiB)": 67.73, |
| "step": 3210, |
| "train_speed(iter/s)": 0.039326 |
| }, |
| { |
| "acc": 0.78851786, |
| "epoch": 2.2111416781292985, |
| "grad_norm": 1.4319977760314941, |
| "learning_rate": 6.352586004264595e-05, |
| "loss": 0.71185198, |
| "memory(GiB)": 67.73, |
| "step": 3215, |
| "train_speed(iter/s)": 0.039353 |
| }, |
| { |
| "acc": 0.78093901, |
| "epoch": 2.214580467675378, |
| "grad_norm": 1.4418880939483643, |
| "learning_rate": 6.341634714421283e-05, |
| "loss": 0.73851643, |
| "memory(GiB)": 67.73, |
| "step": 3220, |
| "train_speed(iter/s)": 0.039379 |
| }, |
| { |
| "acc": 0.78669729, |
| "epoch": 2.218019257221458, |
| "grad_norm": 1.270719289779663, |
| "learning_rate": 6.330676486186129e-05, |
| "loss": 0.71485491, |
| "memory(GiB)": 67.73, |
| "step": 3225, |
| "train_speed(iter/s)": 0.039408 |
| }, |
| { |
| "acc": 0.79144497, |
| "epoch": 2.221458046767538, |
| "grad_norm": 1.4314090013504028, |
| "learning_rate": 6.31971137625178e-05, |
| "loss": 0.7153636, |
| "memory(GiB)": 67.73, |
| "step": 3230, |
| "train_speed(iter/s)": 0.039435 |
| }, |
| { |
| "acc": 0.77727938, |
| "epoch": 2.2248968363136177, |
| "grad_norm": 1.4725736379623413, |
| "learning_rate": 6.308739441346485e-05, |
| "loss": 0.76273413, |
| "memory(GiB)": 67.73, |
| "step": 3235, |
| "train_speed(iter/s)": 0.039462 |
| }, |
| { |
| "acc": 0.78571582, |
| "epoch": 2.2283356258596974, |
| "grad_norm": 1.512195348739624, |
| "learning_rate": 6.297760738233815e-05, |
| "loss": 0.72405062, |
| "memory(GiB)": 67.73, |
| "step": 3240, |
| "train_speed(iter/s)": 0.039488 |
| }, |
| { |
| "acc": 0.79117641, |
| "epoch": 2.231774415405777, |
| "grad_norm": 1.3315008878707886, |
| "learning_rate": 6.286775323712345e-05, |
| "loss": 0.69954386, |
| "memory(GiB)": 67.73, |
| "step": 3245, |
| "train_speed(iter/s)": 0.039513 |
| }, |
| { |
| "acc": 0.7794364, |
| "epoch": 2.235213204951857, |
| "grad_norm": 1.3924676179885864, |
| "learning_rate": 6.275783254615373e-05, |
| "loss": 0.73521681, |
| "memory(GiB)": 67.73, |
| "step": 3250, |
| "train_speed(iter/s)": 0.039537 |
| }, |
| { |
| "acc": 0.77737999, |
| "epoch": 2.238651994497937, |
| "grad_norm": 1.2619364261627197, |
| "learning_rate": 6.264784587810623e-05, |
| "loss": 0.74972701, |
| "memory(GiB)": 67.73, |
| "step": 3255, |
| "train_speed(iter/s)": 0.039565 |
| }, |
| { |
| "acc": 0.7738802, |
| "epoch": 2.2420907840440165, |
| "grad_norm": 1.5401955842971802, |
| "learning_rate": 6.25377938019996e-05, |
| "loss": 0.79901037, |
| "memory(GiB)": 67.73, |
| "step": 3260, |
| "train_speed(iter/s)": 0.039591 |
| }, |
| { |
| "acc": 0.78558297, |
| "epoch": 2.245529573590096, |
| "grad_norm": 1.2368144989013672, |
| "learning_rate": 6.24276768871908e-05, |
| "loss": 0.72351027, |
| "memory(GiB)": 67.73, |
| "step": 3265, |
| "train_speed(iter/s)": 0.039617 |
| }, |
| { |
| "acc": 0.78128538, |
| "epoch": 2.248968363136176, |
| "grad_norm": 1.4421414136886597, |
| "learning_rate": 6.23174957033723e-05, |
| "loss": 0.72427325, |
| "memory(GiB)": 67.73, |
| "step": 3270, |
| "train_speed(iter/s)": 0.039638 |
| }, |
| { |
| "acc": 0.79050694, |
| "epoch": 2.252407152682256, |
| "grad_norm": 1.473962426185608, |
| "learning_rate": 6.220725082056901e-05, |
| "loss": 0.70825963, |
| "memory(GiB)": 67.73, |
| "step": 3275, |
| "train_speed(iter/s)": 0.039665 |
| }, |
| { |
| "acc": 0.78690662, |
| "epoch": 2.2558459422283357, |
| "grad_norm": 1.4135388135910034, |
| "learning_rate": 6.209694280913539e-05, |
| "loss": 0.71959724, |
| "memory(GiB)": 67.73, |
| "step": 3280, |
| "train_speed(iter/s)": 0.039691 |
| }, |
| { |
| "acc": 0.78945398, |
| "epoch": 2.2592847317744154, |
| "grad_norm": 1.604658842086792, |
| "learning_rate": 6.198657223975257e-05, |
| "loss": 0.71993084, |
| "memory(GiB)": 67.73, |
| "step": 3285, |
| "train_speed(iter/s)": 0.039719 |
| }, |
| { |
| "acc": 0.78186049, |
| "epoch": 2.262723521320495, |
| "grad_norm": 1.5232980251312256, |
| "learning_rate": 6.187613968342524e-05, |
| "loss": 0.75053821, |
| "memory(GiB)": 67.73, |
| "step": 3290, |
| "train_speed(iter/s)": 0.039741 |
| }, |
| { |
| "acc": 0.78012853, |
| "epoch": 2.266162310866575, |
| "grad_norm": 1.2351890802383423, |
| "learning_rate": 6.176564571147882e-05, |
| "loss": 0.76610746, |
| "memory(GiB)": 67.73, |
| "step": 3295, |
| "train_speed(iter/s)": 0.039772 |
| }, |
| { |
| "acc": 0.79100294, |
| "epoch": 2.269601100412655, |
| "grad_norm": 1.2078155279159546, |
| "learning_rate": 6.165509089555647e-05, |
| "loss": 0.69998646, |
| "memory(GiB)": 67.73, |
| "step": 3300, |
| "train_speed(iter/s)": 0.039798 |
| }, |
| { |
| "epoch": 2.269601100412655, |
| "eval_acc": 0.7635139694965583, |
| "eval_loss": 0.8397796750068665, |
| "eval_runtime": 1111.351, |
| "eval_samples_per_second": 3.854, |
| "eval_steps_per_second": 0.069, |
| "step": 3300 |
| }, |
| { |
| "acc": 0.78393035, |
| "epoch": 2.2730398899587345, |
| "grad_norm": 1.453913688659668, |
| "learning_rate": 6.154447580761612e-05, |
| "loss": 0.72859416, |
| "memory(GiB)": 67.73, |
| "step": 3305, |
| "train_speed(iter/s)": 0.039299 |
| }, |
| { |
| "acc": 0.78495998, |
| "epoch": 2.276478679504814, |
| "grad_norm": 1.4776540994644165, |
| "learning_rate": 6.143380101992752e-05, |
| "loss": 0.7363111, |
| "memory(GiB)": 67.73, |
| "step": 3310, |
| "train_speed(iter/s)": 0.039328 |
| }, |
| { |
| "acc": 0.78339643, |
| "epoch": 2.2799174690508943, |
| "grad_norm": 1.5362030267715454, |
| "learning_rate": 6.132306710506926e-05, |
| "loss": 0.7379076, |
| "memory(GiB)": 67.73, |
| "step": 3315, |
| "train_speed(iter/s)": 0.039357 |
| }, |
| { |
| "acc": 0.7797461, |
| "epoch": 2.283356258596974, |
| "grad_norm": 1.5111163854599, |
| "learning_rate": 6.121227463592585e-05, |
| "loss": 0.76058264, |
| "memory(GiB)": 67.73, |
| "step": 3320, |
| "train_speed(iter/s)": 0.039381 |
| }, |
| { |
| "acc": 0.77675905, |
| "epoch": 2.2867950481430537, |
| "grad_norm": 1.4410961866378784, |
| "learning_rate": 6.11014241856847e-05, |
| "loss": 0.7604672, |
| "memory(GiB)": 67.73, |
| "step": 3325, |
| "train_speed(iter/s)": 0.039401 |
| }, |
| { |
| "acc": 0.78640685, |
| "epoch": 2.2902338376891334, |
| "grad_norm": 1.7038127183914185, |
| "learning_rate": 6.099051632783327e-05, |
| "loss": 0.72746906, |
| "memory(GiB)": 67.73, |
| "step": 3330, |
| "train_speed(iter/s)": 0.039426 |
| }, |
| { |
| "acc": 0.78558645, |
| "epoch": 2.293672627235213, |
| "grad_norm": 1.2854883670806885, |
| "learning_rate": 6.0879551636155894e-05, |
| "loss": 0.74064126, |
| "memory(GiB)": 67.73, |
| "step": 3335, |
| "train_speed(iter/s)": 0.039449 |
| }, |
| { |
| "acc": 0.781954, |
| "epoch": 2.297111416781293, |
| "grad_norm": 1.7075494527816772, |
| "learning_rate": 6.076853068473104e-05, |
| "loss": 0.73064299, |
| "memory(GiB)": 67.73, |
| "step": 3340, |
| "train_speed(iter/s)": 0.039476 |
| }, |
| { |
| "acc": 0.7864274, |
| "epoch": 2.300550206327373, |
| "grad_norm": 1.3785117864608765, |
| "learning_rate": 6.065745404792816e-05, |
| "loss": 0.72167091, |
| "memory(GiB)": 67.73, |
| "step": 3345, |
| "train_speed(iter/s)": 0.039497 |
| }, |
| { |
| "acc": 0.77905812, |
| "epoch": 2.3039889958734525, |
| "grad_norm": 1.2673250436782837, |
| "learning_rate": 6.054632230040489e-05, |
| "loss": 0.75723281, |
| "memory(GiB)": 67.73, |
| "step": 3350, |
| "train_speed(iter/s)": 0.039522 |
| }, |
| { |
| "acc": 0.78323727, |
| "epoch": 2.307427785419532, |
| "grad_norm": 1.5396391153335571, |
| "learning_rate": 6.043513601710391e-05, |
| "loss": 0.75284595, |
| "memory(GiB)": 67.73, |
| "step": 3355, |
| "train_speed(iter/s)": 0.039546 |
| }, |
| { |
| "acc": 0.77735343, |
| "epoch": 2.310866574965612, |
| "grad_norm": 1.4035242795944214, |
| "learning_rate": 6.032389577325004e-05, |
| "loss": 0.76564093, |
| "memory(GiB)": 67.73, |
| "step": 3360, |
| "train_speed(iter/s)": 0.039572 |
| }, |
| { |
| "acc": 0.78433137, |
| "epoch": 2.314305364511692, |
| "grad_norm": 1.3705512285232544, |
| "learning_rate": 6.0212602144347295e-05, |
| "loss": 0.74389467, |
| "memory(GiB)": 67.73, |
| "step": 3365, |
| "train_speed(iter/s)": 0.039597 |
| }, |
| { |
| "acc": 0.7952363, |
| "epoch": 2.3177441540577717, |
| "grad_norm": 1.601040005683899, |
| "learning_rate": 6.010125570617587e-05, |
| "loss": 0.69709463, |
| "memory(GiB)": 67.73, |
| "step": 3370, |
| "train_speed(iter/s)": 0.039624 |
| }, |
| { |
| "acc": 0.78479719, |
| "epoch": 2.3211829436038514, |
| "grad_norm": 1.6512783765792847, |
| "learning_rate": 5.998985703478916e-05, |
| "loss": 0.7351841, |
| "memory(GiB)": 67.73, |
| "step": 3375, |
| "train_speed(iter/s)": 0.039648 |
| }, |
| { |
| "acc": 0.7886528, |
| "epoch": 2.324621733149931, |
| "grad_norm": 1.4211089611053467, |
| "learning_rate": 5.9878406706510834e-05, |
| "loss": 0.72703929, |
| "memory(GiB)": 67.73, |
| "step": 3380, |
| "train_speed(iter/s)": 0.039672 |
| }, |
| { |
| "acc": 0.79997978, |
| "epoch": 2.328060522696011, |
| "grad_norm": 1.5382604598999023, |
| "learning_rate": 5.976690529793175e-05, |
| "loss": 0.68571553, |
| "memory(GiB)": 67.73, |
| "step": 3385, |
| "train_speed(iter/s)": 0.039698 |
| }, |
| { |
| "acc": 0.78704443, |
| "epoch": 2.331499312242091, |
| "grad_norm": 1.4559470415115356, |
| "learning_rate": 5.9655353385907055e-05, |
| "loss": 0.7418499, |
| "memory(GiB)": 67.73, |
| "step": 3390, |
| "train_speed(iter/s)": 0.039721 |
| }, |
| { |
| "acc": 0.77985802, |
| "epoch": 2.3349381017881705, |
| "grad_norm": 1.4432960748672485, |
| "learning_rate": 5.954375154755316e-05, |
| "loss": 0.75312023, |
| "memory(GiB)": 67.73, |
| "step": 3395, |
| "train_speed(iter/s)": 0.039747 |
| }, |
| { |
| "acc": 0.79101877, |
| "epoch": 2.33837689133425, |
| "grad_norm": 1.3332661390304565, |
| "learning_rate": 5.9432100360244816e-05, |
| "loss": 0.71166148, |
| "memory(GiB)": 67.73, |
| "step": 3400, |
| "train_speed(iter/s)": 0.039772 |
| }, |
| { |
| "epoch": 2.33837689133425, |
| "eval_acc": 0.7662640032393035, |
| "eval_loss": 0.8363276124000549, |
| "eval_runtime": 1137.5857, |
| "eval_samples_per_second": 3.765, |
| "eval_steps_per_second": 0.068, |
| "step": 3400 |
| }, |
| { |
| "acc": 0.77607446, |
| "epoch": 2.3418156808803303, |
| "grad_norm": 1.4800801277160645, |
| "learning_rate": 5.9320400401612055e-05, |
| "loss": 0.75290956, |
| "memory(GiB)": 67.73, |
| "step": 3405, |
| "train_speed(iter/s)": 0.039276 |
| }, |
| { |
| "acc": 0.79126697, |
| "epoch": 2.34525447042641, |
| "grad_norm": 1.4802907705307007, |
| "learning_rate": 5.9208652249537224e-05, |
| "loss": 0.71252222, |
| "memory(GiB)": 67.73, |
| "step": 3410, |
| "train_speed(iter/s)": 0.039305 |
| }, |
| { |
| "acc": 0.79908376, |
| "epoch": 2.3486932599724897, |
| "grad_norm": 1.5732394456863403, |
| "learning_rate": 5.909685648215198e-05, |
| "loss": 0.68212581, |
| "memory(GiB)": 67.73, |
| "step": 3415, |
| "train_speed(iter/s)": 0.039334 |
| }, |
| { |
| "acc": 0.79249792, |
| "epoch": 2.3521320495185694, |
| "grad_norm": 1.5170937776565552, |
| "learning_rate": 5.8985013677834376e-05, |
| "loss": 0.69914331, |
| "memory(GiB)": 67.73, |
| "step": 3420, |
| "train_speed(iter/s)": 0.039361 |
| }, |
| { |
| "acc": 0.76918459, |
| "epoch": 2.355570839064649, |
| "grad_norm": 1.4346693754196167, |
| "learning_rate": 5.887312441520577e-05, |
| "loss": 0.7888124, |
| "memory(GiB)": 67.73, |
| "step": 3425, |
| "train_speed(iter/s)": 0.039379 |
| }, |
| { |
| "acc": 0.77460504, |
| "epoch": 2.359009628610729, |
| "grad_norm": 1.5379953384399414, |
| "learning_rate": 5.876118927312788e-05, |
| "loss": 0.77686357, |
| "memory(GiB)": 67.73, |
| "step": 3430, |
| "train_speed(iter/s)": 0.039402 |
| }, |
| { |
| "acc": 0.78930125, |
| "epoch": 2.362448418156809, |
| "grad_norm": 1.5620882511138916, |
| "learning_rate": 5.8649208830699776e-05, |
| "loss": 0.72846537, |
| "memory(GiB)": 67.73, |
| "step": 3435, |
| "train_speed(iter/s)": 0.039427 |
| }, |
| { |
| "acc": 0.7783771, |
| "epoch": 2.3658872077028885, |
| "grad_norm": 1.3905718326568604, |
| "learning_rate": 5.85371836672549e-05, |
| "loss": 0.74923038, |
| "memory(GiB)": 67.73, |
| "step": 3440, |
| "train_speed(iter/s)": 0.039449 |
| }, |
| { |
| "acc": 0.77820864, |
| "epoch": 2.369325997248968, |
| "grad_norm": 1.2664304971694946, |
| "learning_rate": 5.842511436235807e-05, |
| "loss": 0.77669792, |
| "memory(GiB)": 67.73, |
| "step": 3445, |
| "train_speed(iter/s)": 0.039476 |
| }, |
| { |
| "acc": 0.78637152, |
| "epoch": 2.3727647867950483, |
| "grad_norm": 1.3347089290618896, |
| "learning_rate": 5.831300149580245e-05, |
| "loss": 0.72091188, |
| "memory(GiB)": 67.73, |
| "step": 3450, |
| "train_speed(iter/s)": 0.039501 |
| }, |
| { |
| "acc": 0.77844844, |
| "epoch": 2.376203576341128, |
| "grad_norm": 1.4759833812713623, |
| "learning_rate": 5.820084564760657e-05, |
| "loss": 0.76853113, |
| "memory(GiB)": 67.73, |
| "step": 3455, |
| "train_speed(iter/s)": 0.039528 |
| }, |
| { |
| "acc": 0.7886765, |
| "epoch": 2.3796423658872077, |
| "grad_norm": 1.7147775888442993, |
| "learning_rate": 5.808864739801136e-05, |
| "loss": 0.73901868, |
| "memory(GiB)": 67.73, |
| "step": 3460, |
| "train_speed(iter/s)": 0.039549 |
| }, |
| { |
| "acc": 0.78849745, |
| "epoch": 2.3830811554332874, |
| "grad_norm": 1.4572715759277344, |
| "learning_rate": 5.797640732747707e-05, |
| "loss": 0.71194096, |
| "memory(GiB)": 67.73, |
| "step": 3465, |
| "train_speed(iter/s)": 0.039573 |
| }, |
| { |
| "acc": 0.79340534, |
| "epoch": 2.3865199449793675, |
| "grad_norm": 1.8968569040298462, |
| "learning_rate": 5.7864126016680354e-05, |
| "loss": 0.69921117, |
| "memory(GiB)": 67.73, |
| "step": 3470, |
| "train_speed(iter/s)": 0.039599 |
| }, |
| { |
| "acc": 0.7905076, |
| "epoch": 2.389958734525447, |
| "grad_norm": 1.6759856939315796, |
| "learning_rate": 5.7751804046511185e-05, |
| "loss": 0.71475925, |
| "memory(GiB)": 67.73, |
| "step": 3475, |
| "train_speed(iter/s)": 0.039625 |
| }, |
| { |
| "acc": 0.78112564, |
| "epoch": 2.393397524071527, |
| "grad_norm": 1.301619052886963, |
| "learning_rate": 5.763944199806991e-05, |
| "loss": 0.74565101, |
| "memory(GiB)": 67.73, |
| "step": 3480, |
| "train_speed(iter/s)": 0.039651 |
| }, |
| { |
| "acc": 0.77826796, |
| "epoch": 2.3968363136176065, |
| "grad_norm": 1.726420283317566, |
| "learning_rate": 5.7527040452664215e-05, |
| "loss": 0.76559715, |
| "memory(GiB)": 67.73, |
| "step": 3485, |
| "train_speed(iter/s)": 0.039679 |
| }, |
| { |
| "acc": 0.78291936, |
| "epoch": 2.400275103163686, |
| "grad_norm": 1.7139410972595215, |
| "learning_rate": 5.741459999180612e-05, |
| "loss": 0.74492068, |
| "memory(GiB)": 67.73, |
| "step": 3490, |
| "train_speed(iter/s)": 0.039704 |
| }, |
| { |
| "acc": 0.7825911, |
| "epoch": 2.4037138927097663, |
| "grad_norm": 1.6379047632217407, |
| "learning_rate": 5.7302121197209e-05, |
| "loss": 0.7245317, |
| "memory(GiB)": 67.73, |
| "step": 3495, |
| "train_speed(iter/s)": 0.039726 |
| }, |
| { |
| "acc": 0.78516874, |
| "epoch": 2.407152682255846, |
| "grad_norm": 1.6368329524993896, |
| "learning_rate": 5.7189604650784524e-05, |
| "loss": 0.71679149, |
| "memory(GiB)": 67.73, |
| "step": 3500, |
| "train_speed(iter/s)": 0.039751 |
| }, |
| { |
| "epoch": 2.407152682255846, |
| "eval_acc": 0.7661683987942592, |
| "eval_loss": 0.8319239020347595, |
| "eval_runtime": 1159.9932, |
| "eval_samples_per_second": 3.692, |
| "eval_steps_per_second": 0.066, |
| "step": 3500 |
| }, |
| { |
| "acc": 0.77984338, |
| "epoch": 2.4105914718019257, |
| "grad_norm": 1.3497166633605957, |
| "learning_rate": 5.707705093463969e-05, |
| "loss": 0.76508818, |
| "memory(GiB)": 67.73, |
| "step": 3505, |
| "train_speed(iter/s)": 0.039259 |
| }, |
| { |
| "acc": 0.77530251, |
| "epoch": 2.4140302613480054, |
| "grad_norm": 1.4699667692184448, |
| "learning_rate": 5.6964460631073736e-05, |
| "loss": 0.7737175, |
| "memory(GiB)": 67.73, |
| "step": 3510, |
| "train_speed(iter/s)": 0.039282 |
| }, |
| { |
| "acc": 0.77435117, |
| "epoch": 2.417469050894085, |
| "grad_norm": 1.3567484617233276, |
| "learning_rate": 5.685183432257529e-05, |
| "loss": 0.76602321, |
| "memory(GiB)": 67.73, |
| "step": 3515, |
| "train_speed(iter/s)": 0.039309 |
| }, |
| { |
| "acc": 0.77687979, |
| "epoch": 2.420907840440165, |
| "grad_norm": 1.6473392248153687, |
| "learning_rate": 5.6739172591819187e-05, |
| "loss": 0.76530232, |
| "memory(GiB)": 67.73, |
| "step": 3520, |
| "train_speed(iter/s)": 0.039334 |
| }, |
| { |
| "acc": 0.78237972, |
| "epoch": 2.424346629986245, |
| "grad_norm": 1.5507371425628662, |
| "learning_rate": 5.662647602166351e-05, |
| "loss": 0.75080051, |
| "memory(GiB)": 67.73, |
| "step": 3525, |
| "train_speed(iter/s)": 0.039356 |
| }, |
| { |
| "acc": 0.78154564, |
| "epoch": 2.4277854195323245, |
| "grad_norm": 1.4991931915283203, |
| "learning_rate": 5.65137451951466e-05, |
| "loss": 0.75352793, |
| "memory(GiB)": 67.73, |
| "step": 3530, |
| "train_speed(iter/s)": 0.039378 |
| }, |
| { |
| "acc": 0.77997007, |
| "epoch": 2.4312242090784046, |
| "grad_norm": 1.6739842891693115, |
| "learning_rate": 5.640098069548404e-05, |
| "loss": 0.76081572, |
| "memory(GiB)": 67.73, |
| "step": 3535, |
| "train_speed(iter/s)": 0.039401 |
| }, |
| { |
| "acc": 0.78768792, |
| "epoch": 2.4346629986244843, |
| "grad_norm": 1.7248750925064087, |
| "learning_rate": 5.628818310606561e-05, |
| "loss": 0.7255187, |
| "memory(GiB)": 67.73, |
| "step": 3540, |
| "train_speed(iter/s)": 0.039426 |
| }, |
| { |
| "acc": 0.79025412, |
| "epoch": 2.438101788170564, |
| "grad_norm": 1.62557053565979, |
| "learning_rate": 5.617535301045228e-05, |
| "loss": 0.70803857, |
| "memory(GiB)": 67.73, |
| "step": 3545, |
| "train_speed(iter/s)": 0.039454 |
| }, |
| { |
| "acc": 0.78769302, |
| "epoch": 2.4415405777166437, |
| "grad_norm": 1.4635558128356934, |
| "learning_rate": 5.606249099237318e-05, |
| "loss": 0.73414497, |
| "memory(GiB)": 67.73, |
| "step": 3550, |
| "train_speed(iter/s)": 0.039479 |
| }, |
| { |
| "acc": 0.79148045, |
| "epoch": 2.4449793672627234, |
| "grad_norm": 1.3263139724731445, |
| "learning_rate": 5.594959763572263e-05, |
| "loss": 0.71763167, |
| "memory(GiB)": 67.73, |
| "step": 3555, |
| "train_speed(iter/s)": 0.039504 |
| }, |
| { |
| "acc": 0.79330978, |
| "epoch": 2.4484181568088035, |
| "grad_norm": 1.5712461471557617, |
| "learning_rate": 5.5836673524557e-05, |
| "loss": 0.71670427, |
| "memory(GiB)": 67.73, |
| "step": 3560, |
| "train_speed(iter/s)": 0.039529 |
| }, |
| { |
| "acc": 0.78728065, |
| "epoch": 2.451856946354883, |
| "grad_norm": 1.610227108001709, |
| "learning_rate": 5.572371924309188e-05, |
| "loss": 0.71835189, |
| "memory(GiB)": 67.73, |
| "step": 3565, |
| "train_speed(iter/s)": 0.039555 |
| }, |
| { |
| "acc": 0.7980279, |
| "epoch": 2.455295735900963, |
| "grad_norm": 1.3638176918029785, |
| "learning_rate": 5.5610735375698863e-05, |
| "loss": 0.68947468, |
| "memory(GiB)": 67.73, |
| "step": 3570, |
| "train_speed(iter/s)": 0.039583 |
| }, |
| { |
| "acc": 0.79382896, |
| "epoch": 2.4587345254470425, |
| "grad_norm": 1.671080470085144, |
| "learning_rate": 5.549772250690264e-05, |
| "loss": 0.6997715, |
| "memory(GiB)": 67.73, |
| "step": 3575, |
| "train_speed(iter/s)": 0.039609 |
| }, |
| { |
| "acc": 0.78277836, |
| "epoch": 2.462173314993122, |
| "grad_norm": 1.5335805416107178, |
| "learning_rate": 5.538468122137791e-05, |
| "loss": 0.75039816, |
| "memory(GiB)": 67.73, |
| "step": 3580, |
| "train_speed(iter/s)": 0.039637 |
| }, |
| { |
| "acc": 0.78852177, |
| "epoch": 2.4656121045392023, |
| "grad_norm": 1.5238432884216309, |
| "learning_rate": 5.527161210394645e-05, |
| "loss": 0.72055502, |
| "memory(GiB)": 67.73, |
| "step": 3585, |
| "train_speed(iter/s)": 0.039663 |
| }, |
| { |
| "acc": 0.77950158, |
| "epoch": 2.469050894085282, |
| "grad_norm": 1.3826063871383667, |
| "learning_rate": 5.515851573957397e-05, |
| "loss": 0.74620533, |
| "memory(GiB)": 67.73, |
| "step": 3590, |
| "train_speed(iter/s)": 0.039684 |
| }, |
| { |
| "acc": 0.78283319, |
| "epoch": 2.4724896836313617, |
| "grad_norm": 1.637787938117981, |
| "learning_rate": 5.504539271336714e-05, |
| "loss": 0.75348463, |
| "memory(GiB)": 67.73, |
| "step": 3595, |
| "train_speed(iter/s)": 0.039709 |
| }, |
| { |
| "acc": 0.79472337, |
| "epoch": 2.4759284731774414, |
| "grad_norm": 1.4630149602890015, |
| "learning_rate": 5.493224361057062e-05, |
| "loss": 0.70524812, |
| "memory(GiB)": 67.73, |
| "step": 3600, |
| "train_speed(iter/s)": 0.039738 |
| }, |
| { |
| "epoch": 2.4759284731774414, |
| "eval_acc": 0.7665226976200117, |
| "eval_loss": 0.8275927901268005, |
| "eval_runtime": 1123.227, |
| "eval_samples_per_second": 3.813, |
| "eval_steps_per_second": 0.069, |
| "step": 3600 |
| }, |
| { |
| "acc": 0.79677072, |
| "epoch": 2.4793672627235215, |
| "grad_norm": 1.354331374168396, |
| "learning_rate": 5.481906901656389e-05, |
| "loss": 0.70224314, |
| "memory(GiB)": 67.73, |
| "step": 3605, |
| "train_speed(iter/s)": 0.039277 |
| }, |
| { |
| "acc": 0.78522711, |
| "epoch": 2.482806052269601, |
| "grad_norm": 1.4437576532363892, |
| "learning_rate": 5.470586951685842e-05, |
| "loss": 0.74459286, |
| "memory(GiB)": 67.73, |
| "step": 3610, |
| "train_speed(iter/s)": 0.039302 |
| }, |
| { |
| "acc": 0.79154515, |
| "epoch": 2.486244841815681, |
| "grad_norm": 1.358216643333435, |
| "learning_rate": 5.4592645697094434e-05, |
| "loss": 0.70617638, |
| "memory(GiB)": 67.73, |
| "step": 3615, |
| "train_speed(iter/s)": 0.039323 |
| }, |
| { |
| "acc": 0.78887815, |
| "epoch": 2.4896836313617605, |
| "grad_norm": 1.6288851499557495, |
| "learning_rate": 5.447939814303803e-05, |
| "loss": 0.72137556, |
| "memory(GiB)": 67.73, |
| "step": 3620, |
| "train_speed(iter/s)": 0.039345 |
| }, |
| { |
| "acc": 0.7799171, |
| "epoch": 2.4931224209078406, |
| "grad_norm": 1.4252561330795288, |
| "learning_rate": 5.4366127440578063e-05, |
| "loss": 0.75225086, |
| "memory(GiB)": 67.73, |
| "step": 3625, |
| "train_speed(iter/s)": 0.039369 |
| }, |
| { |
| "acc": 0.78901777, |
| "epoch": 2.4965612104539203, |
| "grad_norm": 1.3052763938903809, |
| "learning_rate": 5.42528341757232e-05, |
| "loss": 0.73022747, |
| "memory(GiB)": 67.73, |
| "step": 3630, |
| "train_speed(iter/s)": 0.039392 |
| }, |
| { |
| "acc": 0.77776222, |
| "epoch": 2.5, |
| "grad_norm": 1.3259241580963135, |
| "learning_rate": 5.413951893459877e-05, |
| "loss": 0.74716744, |
| "memory(GiB)": 67.73, |
| "step": 3635, |
| "train_speed(iter/s)": 0.039413 |
| }, |
| { |
| "acc": 0.79203482, |
| "epoch": 2.5034387895460797, |
| "grad_norm": 1.491448998451233, |
| "learning_rate": 5.4026182303443826e-05, |
| "loss": 0.71442933, |
| "memory(GiB)": 67.73, |
| "step": 3640, |
| "train_speed(iter/s)": 0.039433 |
| }, |
| { |
| "acc": 0.78454857, |
| "epoch": 2.5068775790921594, |
| "grad_norm": 1.6916753053665161, |
| "learning_rate": 5.391282486860809e-05, |
| "loss": 0.74134259, |
| "memory(GiB)": 67.73, |
| "step": 3645, |
| "train_speed(iter/s)": 0.039458 |
| }, |
| { |
| "acc": 0.78789535, |
| "epoch": 2.5103163686382395, |
| "grad_norm": 1.5004796981811523, |
| "learning_rate": 5.3799447216548907e-05, |
| "loss": 0.7244381, |
| "memory(GiB)": 67.73, |
| "step": 3650, |
| "train_speed(iter/s)": 0.039483 |
| }, |
| { |
| "acc": 0.80307121, |
| "epoch": 2.513755158184319, |
| "grad_norm": 1.3776211738586426, |
| "learning_rate": 5.368604993382822e-05, |
| "loss": 0.67283263, |
| "memory(GiB)": 67.73, |
| "step": 3655, |
| "train_speed(iter/s)": 0.039509 |
| }, |
| { |
| "acc": 0.78840837, |
| "epoch": 2.517193947730399, |
| "grad_norm": 1.489513635635376, |
| "learning_rate": 5.357263360710951e-05, |
| "loss": 0.73468142, |
| "memory(GiB)": 67.73, |
| "step": 3660, |
| "train_speed(iter/s)": 0.039533 |
| }, |
| { |
| "acc": 0.78454609, |
| "epoch": 2.5206327372764785, |
| "grad_norm": 1.5130376815795898, |
| "learning_rate": 5.345919882315481e-05, |
| "loss": 0.74815798, |
| "memory(GiB)": 67.73, |
| "step": 3665, |
| "train_speed(iter/s)": 0.039558 |
| }, |
| { |
| "acc": 0.77992659, |
| "epoch": 2.524071526822558, |
| "grad_norm": 1.5401512384414673, |
| "learning_rate": 5.3345746168821634e-05, |
| "loss": 0.74576526, |
| "memory(GiB)": 67.73, |
| "step": 3670, |
| "train_speed(iter/s)": 0.039581 |
| }, |
| { |
| "acc": 0.78538713, |
| "epoch": 2.5275103163686383, |
| "grad_norm": 1.6626590490341187, |
| "learning_rate": 5.3232276231059905e-05, |
| "loss": 0.72729344, |
| "memory(GiB)": 67.73, |
| "step": 3675, |
| "train_speed(iter/s)": 0.039608 |
| }, |
| { |
| "acc": 0.77599993, |
| "epoch": 2.530949105914718, |
| "grad_norm": 1.4081122875213623, |
| "learning_rate": 5.311878959690906e-05, |
| "loss": 0.76209216, |
| "memory(GiB)": 67.73, |
| "step": 3680, |
| "train_speed(iter/s)": 0.039628 |
| }, |
| { |
| "acc": 0.78826327, |
| "epoch": 2.5343878954607977, |
| "grad_norm": 1.474022626876831, |
| "learning_rate": 5.3005286853494854e-05, |
| "loss": 0.71333871, |
| "memory(GiB)": 67.73, |
| "step": 3685, |
| "train_speed(iter/s)": 0.039652 |
| }, |
| { |
| "acc": 0.78787079, |
| "epoch": 2.537826685006878, |
| "grad_norm": 1.648646354675293, |
| "learning_rate": 5.289176858802634e-05, |
| "loss": 0.72448759, |
| "memory(GiB)": 67.73, |
| "step": 3690, |
| "train_speed(iter/s)": 0.039677 |
| }, |
| { |
| "acc": 0.78479404, |
| "epoch": 2.5412654745529575, |
| "grad_norm": 1.4439847469329834, |
| "learning_rate": 5.277823538779295e-05, |
| "loss": 0.72407675, |
| "memory(GiB)": 67.73, |
| "step": 3695, |
| "train_speed(iter/s)": 0.039702 |
| }, |
| { |
| "acc": 0.77956858, |
| "epoch": 2.544704264099037, |
| "grad_norm": 1.2535481452941895, |
| "learning_rate": 5.2664687840161364e-05, |
| "loss": 0.74480648, |
| "memory(GiB)": 67.73, |
| "step": 3700, |
| "train_speed(iter/s)": 0.039725 |
| }, |
| { |
| "epoch": 2.544704264099037, |
| "eval_acc": 0.7681029828586854, |
| "eval_loss": 0.8239570260047913, |
| "eval_runtime": 1156.7503, |
| "eval_samples_per_second": 3.703, |
| "eval_steps_per_second": 0.067, |
| "step": 3700 |
| }, |
| { |
| "acc": 0.78182096, |
| "epoch": 2.548143053645117, |
| "grad_norm": 1.328555703163147, |
| "learning_rate": 5.255112653257247e-05, |
| "loss": 0.75617981, |
| "memory(GiB)": 67.73, |
| "step": 3705, |
| "train_speed(iter/s)": 0.039261 |
| }, |
| { |
| "acc": 0.78516607, |
| "epoch": 2.5515818431911965, |
| "grad_norm": 1.5017790794372559, |
| "learning_rate": 5.243755205253834e-05, |
| "loss": 0.73223658, |
| "memory(GiB)": 67.73, |
| "step": 3710, |
| "train_speed(iter/s)": 0.039286 |
| }, |
| { |
| "acc": 0.78861194, |
| "epoch": 2.5550206327372766, |
| "grad_norm": 1.309441089630127, |
| "learning_rate": 5.232396498763923e-05, |
| "loss": 0.7213201, |
| "memory(GiB)": 67.73, |
| "step": 3715, |
| "train_speed(iter/s)": 0.039308 |
| }, |
| { |
| "acc": 0.78652673, |
| "epoch": 2.5584594222833563, |
| "grad_norm": 2.0742311477661133, |
| "learning_rate": 5.2210365925520445e-05, |
| "loss": 0.73911443, |
| "memory(GiB)": 67.73, |
| "step": 3720, |
| "train_speed(iter/s)": 0.039335 |
| }, |
| { |
| "acc": 0.78357706, |
| "epoch": 2.561898211829436, |
| "grad_norm": 1.4650071859359741, |
| "learning_rate": 5.2096755453889404e-05, |
| "loss": 0.74594064, |
| "memory(GiB)": 67.73, |
| "step": 3725, |
| "train_speed(iter/s)": 0.039357 |
| }, |
| { |
| "acc": 0.78125381, |
| "epoch": 2.5653370013755157, |
| "grad_norm": 1.7474429607391357, |
| "learning_rate": 5.198313416051257e-05, |
| "loss": 0.75290685, |
| "memory(GiB)": 67.73, |
| "step": 3730, |
| "train_speed(iter/s)": 0.039381 |
| }, |
| { |
| "acc": 0.77420011, |
| "epoch": 2.5687757909215954, |
| "grad_norm": 1.6091666221618652, |
| "learning_rate": 5.186950263321233e-05, |
| "loss": 0.79236693, |
| "memory(GiB)": 67.73, |
| "step": 3735, |
| "train_speed(iter/s)": 0.039403 |
| }, |
| { |
| "acc": 0.77931113, |
| "epoch": 2.5722145804676755, |
| "grad_norm": 1.9077335596084595, |
| "learning_rate": 5.1755861459864064e-05, |
| "loss": 0.74636703, |
| "memory(GiB)": 67.73, |
| "step": 3740, |
| "train_speed(iter/s)": 0.039425 |
| }, |
| { |
| "acc": 0.7796699, |
| "epoch": 2.575653370013755, |
| "grad_norm": 1.6318970918655396, |
| "learning_rate": 5.164221122839306e-05, |
| "loss": 0.76515536, |
| "memory(GiB)": 67.73, |
| "step": 3745, |
| "train_speed(iter/s)": 0.039444 |
| }, |
| { |
| "acc": 0.77925997, |
| "epoch": 2.579092159559835, |
| "grad_norm": 1.563817024230957, |
| "learning_rate": 5.1528552526771425e-05, |
| "loss": 0.74128981, |
| "memory(GiB)": 67.73, |
| "step": 3750, |
| "train_speed(iter/s)": 0.039469 |
| }, |
| { |
| "acc": 0.78557882, |
| "epoch": 2.582530949105915, |
| "grad_norm": 1.3365668058395386, |
| "learning_rate": 5.141488594301512e-05, |
| "loss": 0.72270107, |
| "memory(GiB)": 67.73, |
| "step": 3755, |
| "train_speed(iter/s)": 0.039493 |
| }, |
| { |
| "acc": 0.78748364, |
| "epoch": 2.5859697386519946, |
| "grad_norm": 1.33451247215271, |
| "learning_rate": 5.1301212065180895e-05, |
| "loss": 0.74060202, |
| "memory(GiB)": 67.73, |
| "step": 3760, |
| "train_speed(iter/s)": 0.039516 |
| }, |
| { |
| "acc": 0.79184856, |
| "epoch": 2.5894085281980743, |
| "grad_norm": 1.4293380975723267, |
| "learning_rate": 5.118753148136318e-05, |
| "loss": 0.7231204, |
| "memory(GiB)": 67.73, |
| "step": 3765, |
| "train_speed(iter/s)": 0.039538 |
| }, |
| { |
| "acc": 0.7917345, |
| "epoch": 2.592847317744154, |
| "grad_norm": 1.4640839099884033, |
| "learning_rate": 5.107384477969117e-05, |
| "loss": 0.72228947, |
| "memory(GiB)": 67.73, |
| "step": 3770, |
| "train_speed(iter/s)": 0.039558 |
| }, |
| { |
| "acc": 0.78101654, |
| "epoch": 2.5962861072902337, |
| "grad_norm": 1.5235430002212524, |
| "learning_rate": 5.0960152548325676e-05, |
| "loss": 0.75548983, |
| "memory(GiB)": 67.73, |
| "step": 3775, |
| "train_speed(iter/s)": 0.039578 |
| }, |
| { |
| "acc": 0.79459238, |
| "epoch": 2.599724896836314, |
| "grad_norm": 1.3068392276763916, |
| "learning_rate": 5.08464553754561e-05, |
| "loss": 0.70593162, |
| "memory(GiB)": 67.73, |
| "step": 3780, |
| "train_speed(iter/s)": 0.039602 |
| }, |
| { |
| "acc": 0.79126248, |
| "epoch": 2.6031636863823935, |
| "grad_norm": 1.3516395092010498, |
| "learning_rate": 5.0732753849297434e-05, |
| "loss": 0.72088032, |
| "memory(GiB)": 67.73, |
| "step": 3785, |
| "train_speed(iter/s)": 0.039626 |
| }, |
| { |
| "acc": 0.79167919, |
| "epoch": 2.606602475928473, |
| "grad_norm": 1.7003644704818726, |
| "learning_rate": 5.06190485580872e-05, |
| "loss": 0.68689594, |
| "memory(GiB)": 67.73, |
| "step": 3790, |
| "train_speed(iter/s)": 0.039647 |
| }, |
| { |
| "acc": 0.78057427, |
| "epoch": 2.610041265474553, |
| "grad_norm": 1.7799345254898071, |
| "learning_rate": 5.0505340090082376e-05, |
| "loss": 0.75313406, |
| "memory(GiB)": 67.73, |
| "step": 3795, |
| "train_speed(iter/s)": 0.03967 |
| }, |
| { |
| "acc": 0.78115511, |
| "epoch": 2.6134800550206325, |
| "grad_norm": 1.3012539148330688, |
| "learning_rate": 5.039162903355639e-05, |
| "loss": 0.75619287, |
| "memory(GiB)": 67.73, |
| "step": 3800, |
| "train_speed(iter/s)": 0.039687 |
| }, |
| { |
| "epoch": 2.6134800550206325, |
| "eval_acc": 0.7681029828586854, |
| "eval_loss": 0.8214000463485718, |
| "eval_runtime": 1118.8883, |
| "eval_samples_per_second": 3.828, |
| "eval_steps_per_second": 0.069, |
| "step": 3800 |
| }, |
| { |
| "acc": 0.78926849, |
| "epoch": 2.6169188445667126, |
| "grad_norm": 1.3302139043807983, |
| "learning_rate": 5.027791597679603e-05, |
| "loss": 0.72202902, |
| "memory(GiB)": 67.73, |
| "step": 3805, |
| "train_speed(iter/s)": 0.039252 |
| }, |
| { |
| "acc": 0.788554, |
| "epoch": 2.6203576341127923, |
| "grad_norm": 1.3796292543411255, |
| "learning_rate": 5.0164201508098486e-05, |
| "loss": 0.73341327, |
| "memory(GiB)": 67.73, |
| "step": 3810, |
| "train_speed(iter/s)": 0.039275 |
| }, |
| { |
| "acc": 0.78986712, |
| "epoch": 2.623796423658872, |
| "grad_norm": 1.5008918046951294, |
| "learning_rate": 5.00504862157682e-05, |
| "loss": 0.70993729, |
| "memory(GiB)": 67.73, |
| "step": 3815, |
| "train_speed(iter/s)": 0.0393 |
| }, |
| { |
| "acc": 0.79516368, |
| "epoch": 2.627235213204952, |
| "grad_norm": 1.3220473527908325, |
| "learning_rate": 4.9936770688113924e-05, |
| "loss": 0.70671806, |
| "memory(GiB)": 67.73, |
| "step": 3820, |
| "train_speed(iter/s)": 0.039326 |
| }, |
| { |
| "acc": 0.77930651, |
| "epoch": 2.6306740027510314, |
| "grad_norm": 1.3324934244155884, |
| "learning_rate": 4.982305551344558e-05, |
| "loss": 0.76113019, |
| "memory(GiB)": 67.73, |
| "step": 3825, |
| "train_speed(iter/s)": 0.039345 |
| }, |
| { |
| "acc": 0.78931274, |
| "epoch": 2.6341127922971115, |
| "grad_norm": 1.561617374420166, |
| "learning_rate": 4.970934128007131e-05, |
| "loss": 0.73203354, |
| "memory(GiB)": 67.73, |
| "step": 3830, |
| "train_speed(iter/s)": 0.039369 |
| }, |
| { |
| "acc": 0.78196325, |
| "epoch": 2.637551581843191, |
| "grad_norm": 1.5396491289138794, |
| "learning_rate": 4.959562857629432e-05, |
| "loss": 0.74629278, |
| "memory(GiB)": 67.73, |
| "step": 3835, |
| "train_speed(iter/s)": 0.039389 |
| }, |
| { |
| "acc": 0.79152188, |
| "epoch": 2.640990371389271, |
| "grad_norm": 1.5757373571395874, |
| "learning_rate": 4.948191799041e-05, |
| "loss": 0.71405354, |
| "memory(GiB)": 67.73, |
| "step": 3840, |
| "train_speed(iter/s)": 0.039408 |
| }, |
| { |
| "acc": 0.78608985, |
| "epoch": 2.644429160935351, |
| "grad_norm": 1.47767174243927, |
| "learning_rate": 4.936821011070271e-05, |
| "loss": 0.72424574, |
| "memory(GiB)": 67.73, |
| "step": 3845, |
| "train_speed(iter/s)": 0.039432 |
| }, |
| { |
| "acc": 0.78729639, |
| "epoch": 2.6478679504814306, |
| "grad_norm": 1.2262262105941772, |
| "learning_rate": 4.925450552544281e-05, |
| "loss": 0.72731237, |
| "memory(GiB)": 67.73, |
| "step": 3850, |
| "train_speed(iter/s)": 0.039453 |
| }, |
| { |
| "acc": 0.78679304, |
| "epoch": 2.6513067400275103, |
| "grad_norm": 1.4017452001571655, |
| "learning_rate": 4.914080482288365e-05, |
| "loss": 0.71175966, |
| "memory(GiB)": 67.73, |
| "step": 3855, |
| "train_speed(iter/s)": 0.039476 |
| }, |
| { |
| "acc": 0.79027119, |
| "epoch": 2.65474552957359, |
| "grad_norm": 1.5579813718795776, |
| "learning_rate": 4.902710859125846e-05, |
| "loss": 0.71102552, |
| "memory(GiB)": 67.98, |
| "step": 3860, |
| "train_speed(iter/s)": 0.039498 |
| }, |
| { |
| "acc": 0.79366422, |
| "epoch": 2.6581843191196697, |
| "grad_norm": 1.3325603008270264, |
| "learning_rate": 4.8913417418777377e-05, |
| "loss": 0.69916854, |
| "memory(GiB)": 67.98, |
| "step": 3865, |
| "train_speed(iter/s)": 0.039517 |
| }, |
| { |
| "acc": 0.78973618, |
| "epoch": 2.66162310866575, |
| "grad_norm": 1.4464627504348755, |
| "learning_rate": 4.879973189362433e-05, |
| "loss": 0.72573528, |
| "memory(GiB)": 67.98, |
| "step": 3870, |
| "train_speed(iter/s)": 0.039542 |
| }, |
| { |
| "acc": 0.78937593, |
| "epoch": 2.6650618982118295, |
| "grad_norm": 1.4809215068817139, |
| "learning_rate": 4.8686052603954065e-05, |
| "loss": 0.72520885, |
| "memory(GiB)": 67.98, |
| "step": 3875, |
| "train_speed(iter/s)": 0.039562 |
| }, |
| { |
| "acc": 0.7916564, |
| "epoch": 2.668500687757909, |
| "grad_norm": 1.4060372114181519, |
| "learning_rate": 4.857238013788902e-05, |
| "loss": 0.71384468, |
| "memory(GiB)": 67.98, |
| "step": 3880, |
| "train_speed(iter/s)": 0.039586 |
| }, |
| { |
| "acc": 0.78981237, |
| "epoch": 2.671939477303989, |
| "grad_norm": 1.481585144996643, |
| "learning_rate": 4.845871508351637e-05, |
| "loss": 0.72426672, |
| "memory(GiB)": 67.98, |
| "step": 3885, |
| "train_speed(iter/s)": 0.039608 |
| }, |
| { |
| "acc": 0.79329553, |
| "epoch": 2.6753782668500685, |
| "grad_norm": 1.6132746934890747, |
| "learning_rate": 4.834505802888493e-05, |
| "loss": 0.70904198, |
| "memory(GiB)": 67.98, |
| "step": 3890, |
| "train_speed(iter/s)": 0.039631 |
| }, |
| { |
| "acc": 0.78727617, |
| "epoch": 2.6788170563961486, |
| "grad_norm": 1.845495343208313, |
| "learning_rate": 4.8231409562002164e-05, |
| "loss": 0.72750425, |
| "memory(GiB)": 67.98, |
| "step": 3895, |
| "train_speed(iter/s)": 0.039654 |
| }, |
| { |
| "acc": 0.78334684, |
| "epoch": 2.6822558459422283, |
| "grad_norm": 1.6697547435760498, |
| "learning_rate": 4.811777027083104e-05, |
| "loss": 0.74594717, |
| "memory(GiB)": 67.98, |
| "step": 3900, |
| "train_speed(iter/s)": 0.039676 |
| }, |
| { |
| "epoch": 2.6822558459422283, |
| "eval_acc": 0.7695764160705448, |
| "eval_loss": 0.8164530396461487, |
| "eval_runtime": 1094.9986, |
| "eval_samples_per_second": 3.911, |
| "eval_steps_per_second": 0.07, |
| "step": 3900 |
| }, |
| { |
| "acc": 0.78133011, |
| "epoch": 2.685694635488308, |
| "grad_norm": 1.5049043893814087, |
| "learning_rate": 4.80041407432871e-05, |
| "loss": 0.74013877, |
| "memory(GiB)": 67.98, |
| "step": 3905, |
| "train_speed(iter/s)": 0.039262 |
| }, |
| { |
| "acc": 0.78779163, |
| "epoch": 2.689133425034388, |
| "grad_norm": 1.292845606803894, |
| "learning_rate": 4.7890521567235375e-05, |
| "loss": 0.73777471, |
| "memory(GiB)": 67.98, |
| "step": 3910, |
| "train_speed(iter/s)": 0.039285 |
| }, |
| { |
| "acc": 0.78793478, |
| "epoch": 2.692572214580468, |
| "grad_norm": 1.6969997882843018, |
| "learning_rate": 4.7776913330487335e-05, |
| "loss": 0.72460685, |
| "memory(GiB)": 67.98, |
| "step": 3915, |
| "train_speed(iter/s)": 0.039309 |
| }, |
| { |
| "acc": 0.78481874, |
| "epoch": 2.6960110041265475, |
| "grad_norm": 1.6642791032791138, |
| "learning_rate": 4.766331662079784e-05, |
| "loss": 0.73782244, |
| "memory(GiB)": 67.98, |
| "step": 3920, |
| "train_speed(iter/s)": 0.039331 |
| }, |
| { |
| "acc": 0.77672281, |
| "epoch": 2.699449793672627, |
| "grad_norm": 1.464065670967102, |
| "learning_rate": 4.754973202586213e-05, |
| "loss": 0.77285328, |
| "memory(GiB)": 67.98, |
| "step": 3925, |
| "train_speed(iter/s)": 0.039351 |
| }, |
| { |
| "acc": 0.78013086, |
| "epoch": 2.702888583218707, |
| "grad_norm": 1.6267447471618652, |
| "learning_rate": 4.7436160133312756e-05, |
| "loss": 0.77444224, |
| "memory(GiB)": 67.98, |
| "step": 3930, |
| "train_speed(iter/s)": 0.039372 |
| }, |
| { |
| "acc": 0.79396415, |
| "epoch": 2.706327372764787, |
| "grad_norm": 1.377986192703247, |
| "learning_rate": 4.7322601530716593e-05, |
| "loss": 0.69987969, |
| "memory(GiB)": 67.98, |
| "step": 3935, |
| "train_speed(iter/s)": 0.039392 |
| }, |
| { |
| "acc": 0.78015747, |
| "epoch": 2.7097661623108666, |
| "grad_norm": 1.5132167339324951, |
| "learning_rate": 4.72090568055717e-05, |
| "loss": 0.73972359, |
| "memory(GiB)": 67.98, |
| "step": 3940, |
| "train_speed(iter/s)": 0.039413 |
| }, |
| { |
| "acc": 0.78305364, |
| "epoch": 2.7132049518569463, |
| "grad_norm": 1.3939101696014404, |
| "learning_rate": 4.709552654530438e-05, |
| "loss": 0.74475136, |
| "memory(GiB)": 67.98, |
| "step": 3945, |
| "train_speed(iter/s)": 0.039437 |
| }, |
| { |
| "acc": 0.79228973, |
| "epoch": 2.716643741403026, |
| "grad_norm": 1.5657391548156738, |
| "learning_rate": 4.69820113372661e-05, |
| "loss": 0.70100541, |
| "memory(GiB)": 67.98, |
| "step": 3950, |
| "train_speed(iter/s)": 0.039459 |
| }, |
| { |
| "acc": 0.79344339, |
| "epoch": 2.7200825309491057, |
| "grad_norm": 1.480087399482727, |
| "learning_rate": 4.686851176873045e-05, |
| "loss": 0.70072994, |
| "memory(GiB)": 67.98, |
| "step": 3955, |
| "train_speed(iter/s)": 0.039483 |
| }, |
| { |
| "acc": 0.79308243, |
| "epoch": 2.723521320495186, |
| "grad_norm": 1.5921666622161865, |
| "learning_rate": 4.6755028426890096e-05, |
| "loss": 0.70272703, |
| "memory(GiB)": 67.98, |
| "step": 3960, |
| "train_speed(iter/s)": 0.039506 |
| }, |
| { |
| "acc": 0.79001474, |
| "epoch": 2.7269601100412655, |
| "grad_norm": 1.3979772329330444, |
| "learning_rate": 4.664156189885376e-05, |
| "loss": 0.69688091, |
| "memory(GiB)": 67.98, |
| "step": 3965, |
| "train_speed(iter/s)": 0.039529 |
| }, |
| { |
| "acc": 0.77611008, |
| "epoch": 2.730398899587345, |
| "grad_norm": 1.5082849264144897, |
| "learning_rate": 4.65281127716432e-05, |
| "loss": 0.774436, |
| "memory(GiB)": 67.98, |
| "step": 3970, |
| "train_speed(iter/s)": 0.039554 |
| }, |
| { |
| "acc": 0.78162088, |
| "epoch": 2.7338376891334253, |
| "grad_norm": 1.5324316024780273, |
| "learning_rate": 4.64146816321901e-05, |
| "loss": 0.73829603, |
| "memory(GiB)": 67.98, |
| "step": 3975, |
| "train_speed(iter/s)": 0.039575 |
| }, |
| { |
| "acc": 0.78739605, |
| "epoch": 2.737276478679505, |
| "grad_norm": 1.5039098262786865, |
| "learning_rate": 4.630126906733315e-05, |
| "loss": 0.73118725, |
| "memory(GiB)": 67.98, |
| "step": 3980, |
| "train_speed(iter/s)": 0.039598 |
| }, |
| { |
| "acc": 0.7873105, |
| "epoch": 2.7407152682255846, |
| "grad_norm": 1.6895498037338257, |
| "learning_rate": 4.6187875663814886e-05, |
| "loss": 0.72477093, |
| "memory(GiB)": 67.98, |
| "step": 3985, |
| "train_speed(iter/s)": 0.039618 |
| }, |
| { |
| "acc": 0.78072052, |
| "epoch": 2.7441540577716643, |
| "grad_norm": 1.350480318069458, |
| "learning_rate": 4.607450200827874e-05, |
| "loss": 0.73954563, |
| "memory(GiB)": 67.98, |
| "step": 3990, |
| "train_speed(iter/s)": 0.039639 |
| }, |
| { |
| "acc": 0.78461032, |
| "epoch": 2.747592847317744, |
| "grad_norm": 1.5248438119888306, |
| "learning_rate": 4.596114868726598e-05, |
| "loss": 0.7439085, |
| "memory(GiB)": 67.98, |
| "step": 3995, |
| "train_speed(iter/s)": 0.039656 |
| }, |
| { |
| "acc": 0.7952045, |
| "epoch": 2.751031636863824, |
| "grad_norm": 1.2919889688491821, |
| "learning_rate": 4.5847816287212645e-05, |
| "loss": 0.70409346, |
| "memory(GiB)": 67.98, |
| "step": 4000, |
| "train_speed(iter/s)": 0.03968 |
| }, |
| { |
| "epoch": 2.751031636863824, |
| "eval_acc": 0.7708024024834661, |
| "eval_loss": 0.8120156526565552, |
| "eval_runtime": 1144.2771, |
| "eval_samples_per_second": 3.743, |
| "eval_steps_per_second": 0.067, |
| "step": 4000 |
| }, |
| { |
| "acc": 0.78138909, |
| "epoch": 2.754470426409904, |
| "grad_norm": 1.685054063796997, |
| "learning_rate": 4.57345053944466e-05, |
| "loss": 0.76331453, |
| "memory(GiB)": 67.98, |
| "step": 4005, |
| "train_speed(iter/s)": 0.039257 |
| }, |
| { |
| "acc": 0.79411173, |
| "epoch": 2.7579092159559835, |
| "grad_norm": 2.0349268913269043, |
| "learning_rate": 4.562121659518438e-05, |
| "loss": 0.71027813, |
| "memory(GiB)": 67.98, |
| "step": 4010, |
| "train_speed(iter/s)": 0.039282 |
| }, |
| { |
| "acc": 0.78988757, |
| "epoch": 2.761348005502063, |
| "grad_norm": 1.3015258312225342, |
| "learning_rate": 4.5507950475528236e-05, |
| "loss": 0.71334782, |
| "memory(GiB)": 67.98, |
| "step": 4015, |
| "train_speed(iter/s)": 0.039304 |
| }, |
| { |
| "acc": 0.79387317, |
| "epoch": 2.764786795048143, |
| "grad_norm": 1.4291696548461914, |
| "learning_rate": 4.539470762146308e-05, |
| "loss": 0.70652847, |
| "memory(GiB)": 67.98, |
| "step": 4020, |
| "train_speed(iter/s)": 0.03932 |
| }, |
| { |
| "acc": 0.78285937, |
| "epoch": 2.768225584594223, |
| "grad_norm": 1.477131962776184, |
| "learning_rate": 4.5281488618853503e-05, |
| "loss": 0.75896859, |
| "memory(GiB)": 67.98, |
| "step": 4025, |
| "train_speed(iter/s)": 0.039342 |
| }, |
| { |
| "acc": 0.78991375, |
| "epoch": 2.7716643741403026, |
| "grad_norm": 1.352389931678772, |
| "learning_rate": 4.516829405344063e-05, |
| "loss": 0.71030273, |
| "memory(GiB)": 67.98, |
| "step": 4030, |
| "train_speed(iter/s)": 0.039364 |
| }, |
| { |
| "acc": 0.79130993, |
| "epoch": 2.7751031636863823, |
| "grad_norm": 1.5674926042556763, |
| "learning_rate": 4.505512451083922e-05, |
| "loss": 0.71874084, |
| "memory(GiB)": 67.98, |
| "step": 4035, |
| "train_speed(iter/s)": 0.039386 |
| }, |
| { |
| "acc": 0.79276628, |
| "epoch": 2.7785419532324624, |
| "grad_norm": 1.943419098854065, |
| "learning_rate": 4.494198057653455e-05, |
| "loss": 0.71133614, |
| "memory(GiB)": 67.98, |
| "step": 4040, |
| "train_speed(iter/s)": 0.039408 |
| }, |
| { |
| "acc": 0.80222769, |
| "epoch": 2.7819807427785417, |
| "grad_norm": 1.6925394535064697, |
| "learning_rate": 4.482886283587938e-05, |
| "loss": 0.67353868, |
| "memory(GiB)": 67.98, |
| "step": 4045, |
| "train_speed(iter/s)": 0.039433 |
| }, |
| { |
| "acc": 0.80383835, |
| "epoch": 2.785419532324622, |
| "grad_norm": 1.4405827522277832, |
| "learning_rate": 4.471577187409103e-05, |
| "loss": 0.66345797, |
| "memory(GiB)": 67.98, |
| "step": 4050, |
| "train_speed(iter/s)": 0.039457 |
| }, |
| { |
| "acc": 0.80842638, |
| "epoch": 2.7888583218707015, |
| "grad_norm": 1.674682378768921, |
| "learning_rate": 4.460270827624821e-05, |
| "loss": 0.66658139, |
| "memory(GiB)": 67.98, |
| "step": 4055, |
| "train_speed(iter/s)": 0.039479 |
| }, |
| { |
| "acc": 0.79156666, |
| "epoch": 2.792297111416781, |
| "grad_norm": 1.3792381286621094, |
| "learning_rate": 4.4489672627288124e-05, |
| "loss": 0.73030577, |
| "memory(GiB)": 67.98, |
| "step": 4060, |
| "train_speed(iter/s)": 0.039504 |
| }, |
| { |
| "acc": 0.79346962, |
| "epoch": 2.7957359009628613, |
| "grad_norm": 1.404285192489624, |
| "learning_rate": 4.4376665512003304e-05, |
| "loss": 0.70117588, |
| "memory(GiB)": 67.98, |
| "step": 4065, |
| "train_speed(iter/s)": 0.039526 |
| }, |
| { |
| "acc": 0.790658, |
| "epoch": 2.799174690508941, |
| "grad_norm": 1.417019248008728, |
| "learning_rate": 4.4263687515038755e-05, |
| "loss": 0.70299535, |
| "memory(GiB)": 67.98, |
| "step": 4070, |
| "train_speed(iter/s)": 0.039548 |
| }, |
| { |
| "acc": 0.78736067, |
| "epoch": 2.8026134800550206, |
| "grad_norm": 1.5088238716125488, |
| "learning_rate": 4.415073922088876e-05, |
| "loss": 0.73802028, |
| "memory(GiB)": 67.98, |
| "step": 4075, |
| "train_speed(iter/s)": 0.039565 |
| }, |
| { |
| "acc": 0.79492655, |
| "epoch": 2.8060522696011003, |
| "grad_norm": 1.443625569343567, |
| "learning_rate": 4.4037821213893964e-05, |
| "loss": 0.71042171, |
| "memory(GiB)": 67.98, |
| "step": 4080, |
| "train_speed(iter/s)": 0.039586 |
| }, |
| { |
| "acc": 0.79075756, |
| "epoch": 2.80949105914718, |
| "grad_norm": 1.464545726776123, |
| "learning_rate": 4.392493407823832e-05, |
| "loss": 0.70024977, |
| "memory(GiB)": 67.98, |
| "step": 4085, |
| "train_speed(iter/s)": 0.039605 |
| }, |
| { |
| "acc": 0.79780464, |
| "epoch": 2.81292984869326, |
| "grad_norm": 1.6561044454574585, |
| "learning_rate": 4.3812078397946074e-05, |
| "loss": 0.69342613, |
| "memory(GiB)": 67.98, |
| "step": 4090, |
| "train_speed(iter/s)": 0.039624 |
| }, |
| { |
| "acc": 0.78435755, |
| "epoch": 2.81636863823934, |
| "grad_norm": 1.3976974487304688, |
| "learning_rate": 4.369925475687873e-05, |
| "loss": 0.71552553, |
| "memory(GiB)": 67.98, |
| "step": 4095, |
| "train_speed(iter/s)": 0.039648 |
| }, |
| { |
| "acc": 0.7799448, |
| "epoch": 2.8198074277854195, |
| "grad_norm": 1.3698362112045288, |
| "learning_rate": 4.358646373873203e-05, |
| "loss": 0.75982933, |
| "memory(GiB)": 67.98, |
| "step": 4100, |
| "train_speed(iter/s)": 0.039669 |
| }, |
| { |
| "epoch": 2.8198074277854195, |
| "eval_acc": 0.7719721509875377, |
| "eval_loss": 0.8082969784736633, |
| "eval_runtime": 1151.3186, |
| "eval_samples_per_second": 3.72, |
| "eval_steps_per_second": 0.067, |
| "step": 4100 |
| }, |
| { |
| "acc": 0.78468771, |
| "epoch": 2.823246217331499, |
| "grad_norm": 1.7453495264053345, |
| "learning_rate": 4.3473705927032957e-05, |
| "loss": 0.73120604, |
| "memory(GiB)": 67.98, |
| "step": 4105, |
| "train_speed(iter/s)": 0.039254 |
| }, |
| { |
| "acc": 0.77324467, |
| "epoch": 2.826685006877579, |
| "grad_norm": 1.309380292892456, |
| "learning_rate": 4.336098190513667e-05, |
| "loss": 0.7686954, |
| "memory(GiB)": 67.98, |
| "step": 4110, |
| "train_speed(iter/s)": 0.039274 |
| }, |
| { |
| "acc": 0.78504181, |
| "epoch": 2.830123796423659, |
| "grad_norm": 1.3735424280166626, |
| "learning_rate": 4.324829225622355e-05, |
| "loss": 0.72278986, |
| "memory(GiB)": 67.98, |
| "step": 4115, |
| "train_speed(iter/s)": 0.039295 |
| }, |
| { |
| "acc": 0.78531666, |
| "epoch": 2.8335625859697386, |
| "grad_norm": 1.3972020149230957, |
| "learning_rate": 4.3135637563296157e-05, |
| "loss": 0.74182968, |
| "memory(GiB)": 67.98, |
| "step": 4120, |
| "train_speed(iter/s)": 0.039316 |
| }, |
| { |
| "acc": 0.78637772, |
| "epoch": 2.8370013755158183, |
| "grad_norm": 1.5424326658248901, |
| "learning_rate": 4.3023018409176145e-05, |
| "loss": 0.74376593, |
| "memory(GiB)": 67.98, |
| "step": 4125, |
| "train_speed(iter/s)": 0.039338 |
| }, |
| { |
| "acc": 0.79664993, |
| "epoch": 2.8404401650618984, |
| "grad_norm": 1.3284099102020264, |
| "learning_rate": 4.2910435376501365e-05, |
| "loss": 0.67242994, |
| "memory(GiB)": 67.98, |
| "step": 4130, |
| "train_speed(iter/s)": 0.039363 |
| }, |
| { |
| "acc": 0.78375196, |
| "epoch": 2.843878954607978, |
| "grad_norm": 1.4063657522201538, |
| "learning_rate": 4.279788904772275e-05, |
| "loss": 0.73797774, |
| "memory(GiB)": 67.98, |
| "step": 4135, |
| "train_speed(iter/s)": 0.039386 |
| }, |
| { |
| "acc": 0.80310926, |
| "epoch": 2.847317744154058, |
| "grad_norm": 1.6251460313796997, |
| "learning_rate": 4.268538000510139e-05, |
| "loss": 0.67094946, |
| "memory(GiB)": 67.98, |
| "step": 4140, |
| "train_speed(iter/s)": 0.039411 |
| }, |
| { |
| "acc": 0.78242793, |
| "epoch": 2.8507565337001375, |
| "grad_norm": 1.4719781875610352, |
| "learning_rate": 4.257290883070545e-05, |
| "loss": 0.7414422, |
| "memory(GiB)": 67.98, |
| "step": 4145, |
| "train_speed(iter/s)": 0.039435 |
| }, |
| { |
| "acc": 0.79309282, |
| "epoch": 2.854195323246217, |
| "grad_norm": 1.491889238357544, |
| "learning_rate": 4.246047610640717e-05, |
| "loss": 0.69513445, |
| "memory(GiB)": 67.98, |
| "step": 4150, |
| "train_speed(iter/s)": 0.039458 |
| }, |
| { |
| "acc": 0.79532785, |
| "epoch": 2.8576341127922973, |
| "grad_norm": 1.4044826030731201, |
| "learning_rate": 4.2348082413879894e-05, |
| "loss": 0.69395657, |
| "memory(GiB)": 67.98, |
| "step": 4155, |
| "train_speed(iter/s)": 0.039483 |
| }, |
| { |
| "acc": 0.79063025, |
| "epoch": 2.861072902338377, |
| "grad_norm": 1.4058098793029785, |
| "learning_rate": 4.223572833459501e-05, |
| "loss": 0.71690941, |
| "memory(GiB)": 67.98, |
| "step": 4160, |
| "train_speed(iter/s)": 0.039508 |
| }, |
| { |
| "acc": 0.79869499, |
| "epoch": 2.8645116918844566, |
| "grad_norm": 1.6210905313491821, |
| "learning_rate": 4.212341444981898e-05, |
| "loss": 0.6896822, |
| "memory(GiB)": 67.98, |
| "step": 4165, |
| "train_speed(iter/s)": 0.039532 |
| }, |
| { |
| "acc": 0.79149799, |
| "epoch": 2.8679504814305363, |
| "grad_norm": 1.3731998205184937, |
| "learning_rate": 4.2011141340610326e-05, |
| "loss": 0.7168128, |
| "memory(GiB)": 67.98, |
| "step": 4170, |
| "train_speed(iter/s)": 0.039554 |
| }, |
| { |
| "acc": 0.78879414, |
| "epoch": 2.871389270976616, |
| "grad_norm": 1.632126808166504, |
| "learning_rate": 4.189890958781662e-05, |
| "loss": 0.72364569, |
| "memory(GiB)": 67.98, |
| "step": 4175, |
| "train_speed(iter/s)": 0.039576 |
| }, |
| { |
| "acc": 0.78361959, |
| "epoch": 2.874828060522696, |
| "grad_norm": 1.4791241884231567, |
| "learning_rate": 4.178671977207143e-05, |
| "loss": 0.73310771, |
| "memory(GiB)": 67.98, |
| "step": 4180, |
| "train_speed(iter/s)": 0.039599 |
| }, |
| { |
| "acc": 0.79908352, |
| "epoch": 2.878266850068776, |
| "grad_norm": 1.7965590953826904, |
| "learning_rate": 4.1674572473791395e-05, |
| "loss": 0.69370174, |
| "memory(GiB)": 67.98, |
| "step": 4185, |
| "train_speed(iter/s)": 0.039621 |
| }, |
| { |
| "acc": 0.78735409, |
| "epoch": 2.8817056396148555, |
| "grad_norm": 1.6834094524383545, |
| "learning_rate": 4.156246827317322e-05, |
| "loss": 0.72156515, |
| "memory(GiB)": 67.98, |
| "step": 4190, |
| "train_speed(iter/s)": 0.039644 |
| }, |
| { |
| "acc": 0.78155212, |
| "epoch": 2.8851444291609356, |
| "grad_norm": 1.872073769569397, |
| "learning_rate": 4.14504077501906e-05, |
| "loss": 0.74036779, |
| "memory(GiB)": 67.98, |
| "step": 4195, |
| "train_speed(iter/s)": 0.039667 |
| }, |
| { |
| "acc": 0.79145999, |
| "epoch": 2.8885832187070153, |
| "grad_norm": 1.3122477531433105, |
| "learning_rate": 4.133839148459126e-05, |
| "loss": 0.71245356, |
| "memory(GiB)": 67.98, |
| "step": 4200, |
| "train_speed(iter/s)": 0.039688 |
| }, |
| { |
| "epoch": 2.8885832187070153, |
| "eval_acc": 0.7714435146443515, |
| "eval_loss": 0.805468738079071, |
| "eval_runtime": 1087.9192, |
| "eval_samples_per_second": 3.937, |
| "eval_steps_per_second": 0.071, |
| "step": 4200 |
| }, |
| { |
| "acc": 0.79164152, |
| "epoch": 2.892022008253095, |
| "grad_norm": 1.5151678323745728, |
| "learning_rate": 4.122642005589398e-05, |
| "loss": 0.71430082, |
| "memory(GiB)": 67.98, |
| "step": 4205, |
| "train_speed(iter/s)": 0.039306 |
| }, |
| { |
| "acc": 0.79683599, |
| "epoch": 2.8954607977991746, |
| "grad_norm": 1.5568134784698486, |
| "learning_rate": 4.111449404338556e-05, |
| "loss": 0.69535141, |
| "memory(GiB)": 67.98, |
| "step": 4210, |
| "train_speed(iter/s)": 0.039331 |
| }, |
| { |
| "acc": 0.78143187, |
| "epoch": 2.8988995873452543, |
| "grad_norm": 1.6322216987609863, |
| "learning_rate": 4.100261402611785e-05, |
| "loss": 0.74795027, |
| "memory(GiB)": 67.98, |
| "step": 4215, |
| "train_speed(iter/s)": 0.039355 |
| }, |
| { |
| "acc": 0.779213, |
| "epoch": 2.9023383768913344, |
| "grad_norm": 1.479254126548767, |
| "learning_rate": 4.089078058290476e-05, |
| "loss": 0.76658916, |
| "memory(GiB)": 67.98, |
| "step": 4220, |
| "train_speed(iter/s)": 0.039376 |
| }, |
| { |
| "acc": 0.7864768, |
| "epoch": 2.905777166437414, |
| "grad_norm": 1.4543869495391846, |
| "learning_rate": 4.077899429231921e-05, |
| "loss": 0.71652775, |
| "memory(GiB)": 67.98, |
| "step": 4225, |
| "train_speed(iter/s)": 0.039397 |
| }, |
| { |
| "acc": 0.78852596, |
| "epoch": 2.909215955983494, |
| "grad_norm": 1.5353100299835205, |
| "learning_rate": 4.066725573269019e-05, |
| "loss": 0.7080534, |
| "memory(GiB)": 67.98, |
| "step": 4230, |
| "train_speed(iter/s)": 0.039421 |
| }, |
| { |
| "acc": 0.78499179, |
| "epoch": 2.9126547455295735, |
| "grad_norm": 1.7298237085342407, |
| "learning_rate": 4.055556548209975e-05, |
| "loss": 0.73987002, |
| "memory(GiB)": 67.98, |
| "step": 4235, |
| "train_speed(iter/s)": 0.039446 |
| }, |
| { |
| "acc": 0.79733381, |
| "epoch": 2.916093535075653, |
| "grad_norm": 1.3336453437805176, |
| "learning_rate": 4.044392411838003e-05, |
| "loss": 0.6844718, |
| "memory(GiB)": 67.98, |
| "step": 4240, |
| "train_speed(iter/s)": 0.039468 |
| }, |
| { |
| "acc": 0.78939738, |
| "epoch": 2.9195323246217333, |
| "grad_norm": 1.5154653787612915, |
| "learning_rate": 4.033233221911023e-05, |
| "loss": 0.72056727, |
| "memory(GiB)": 67.98, |
| "step": 4245, |
| "train_speed(iter/s)": 0.039491 |
| }, |
| { |
| "acc": 0.78145633, |
| "epoch": 2.922971114167813, |
| "grad_norm": 1.6946913003921509, |
| "learning_rate": 4.022079036161366e-05, |
| "loss": 0.74741826, |
| "memory(GiB)": 67.98, |
| "step": 4250, |
| "train_speed(iter/s)": 0.039515 |
| }, |
| { |
| "acc": 0.78894501, |
| "epoch": 2.9264099037138926, |
| "grad_norm": 1.7859429121017456, |
| "learning_rate": 4.0109299122954716e-05, |
| "loss": 0.71477051, |
| "memory(GiB)": 67.98, |
| "step": 4255, |
| "train_speed(iter/s)": 0.039538 |
| }, |
| { |
| "acc": 0.80096769, |
| "epoch": 2.9298486932599723, |
| "grad_norm": 1.550113558769226, |
| "learning_rate": 3.999785907993594e-05, |
| "loss": 0.66986256, |
| "memory(GiB)": 67.98, |
| "step": 4260, |
| "train_speed(iter/s)": 0.039561 |
| }, |
| { |
| "acc": 0.79326687, |
| "epoch": 2.933287482806052, |
| "grad_norm": 1.3913989067077637, |
| "learning_rate": 3.9886470809095015e-05, |
| "loss": 0.70431404, |
| "memory(GiB)": 67.98, |
| "step": 4265, |
| "train_speed(iter/s)": 0.039585 |
| }, |
| { |
| "acc": 0.78397541, |
| "epoch": 2.936726272352132, |
| "grad_norm": 1.7210358381271362, |
| "learning_rate": 3.9775134886701754e-05, |
| "loss": 0.74710093, |
| "memory(GiB)": 67.98, |
| "step": 4270, |
| "train_speed(iter/s)": 0.039608 |
| }, |
| { |
| "acc": 0.79305878, |
| "epoch": 2.940165061898212, |
| "grad_norm": 1.7996710538864136, |
| "learning_rate": 3.966385188875515e-05, |
| "loss": 0.70518632, |
| "memory(GiB)": 67.98, |
| "step": 4275, |
| "train_speed(iter/s)": 0.039629 |
| }, |
| { |
| "acc": 0.79449868, |
| "epoch": 2.9436038514442915, |
| "grad_norm": 1.8419127464294434, |
| "learning_rate": 3.9552622390980425e-05, |
| "loss": 0.69353704, |
| "memory(GiB)": 67.98, |
| "step": 4280, |
| "train_speed(iter/s)": 0.039653 |
| }, |
| { |
| "acc": 0.79312563, |
| "epoch": 2.9470426409903716, |
| "grad_norm": 1.6806973218917847, |
| "learning_rate": 3.944144696882598e-05, |
| "loss": 0.70997305, |
| "memory(GiB)": 67.98, |
| "step": 4285, |
| "train_speed(iter/s)": 0.039675 |
| }, |
| { |
| "acc": 0.77975159, |
| "epoch": 2.9504814305364513, |
| "grad_norm": 1.5093615055084229, |
| "learning_rate": 3.9330326197460466e-05, |
| "loss": 0.7535347, |
| "memory(GiB)": 67.98, |
| "step": 4290, |
| "train_speed(iter/s)": 0.039698 |
| }, |
| { |
| "acc": 0.77885957, |
| "epoch": 2.953920220082531, |
| "grad_norm": 1.7408277988433838, |
| "learning_rate": 3.921926065176977e-05, |
| "loss": 0.75995541, |
| "memory(GiB)": 67.98, |
| "step": 4295, |
| "train_speed(iter/s)": 0.03972 |
| }, |
| { |
| "acc": 0.78874741, |
| "epoch": 2.9573590096286106, |
| "grad_norm": 1.6146240234375, |
| "learning_rate": 3.9108250906354117e-05, |
| "loss": 0.71309519, |
| "memory(GiB)": 67.98, |
| "step": 4300, |
| "train_speed(iter/s)": 0.039742 |
| }, |
| { |
| "epoch": 2.9573590096286106, |
| "eval_acc": 0.7727257389661223, |
| "eval_loss": 0.8019844889640808, |
| "eval_runtime": 1140.1199, |
| "eval_samples_per_second": 3.757, |
| "eval_steps_per_second": 0.068, |
| "step": 4300 |
| }, |
| { |
| "acc": 0.78785725, |
| "epoch": 2.9607977991746903, |
| "grad_norm": 1.555442452430725, |
| "learning_rate": 3.8997297535525026e-05, |
| "loss": 0.72890291, |
| "memory(GiB)": 67.98, |
| "step": 4305, |
| "train_speed(iter/s)": 0.039349 |
| }, |
| { |
| "acc": 0.78933182, |
| "epoch": 2.9642365887207704, |
| "grad_norm": 1.710303783416748, |
| "learning_rate": 3.888640111330235e-05, |
| "loss": 0.73036714, |
| "memory(GiB)": 67.98, |
| "step": 4310, |
| "train_speed(iter/s)": 0.039373 |
| }, |
| { |
| "acc": 0.79446011, |
| "epoch": 2.96767537826685, |
| "grad_norm": 1.7401241064071655, |
| "learning_rate": 3.877556221341133e-05, |
| "loss": 0.70017486, |
| "memory(GiB)": 67.98, |
| "step": 4315, |
| "train_speed(iter/s)": 0.039398 |
| }, |
| { |
| "acc": 0.78983717, |
| "epoch": 2.97111416781293, |
| "grad_norm": 1.5789563655853271, |
| "learning_rate": 3.866478140927961e-05, |
| "loss": 0.70362015, |
| "memory(GiB)": 67.98, |
| "step": 4320, |
| "train_speed(iter/s)": 0.039419 |
| }, |
| { |
| "acc": 0.79765377, |
| "epoch": 2.9745529573590095, |
| "grad_norm": 2.0560176372528076, |
| "learning_rate": 3.8554059274034246e-05, |
| "loss": 0.68930745, |
| "memory(GiB)": 67.98, |
| "step": 4325, |
| "train_speed(iter/s)": 0.039442 |
| }, |
| { |
| "acc": 0.79753799, |
| "epoch": 2.977991746905089, |
| "grad_norm": 1.5742462873458862, |
| "learning_rate": 3.844339638049885e-05, |
| "loss": 0.68201818, |
| "memory(GiB)": 67.98, |
| "step": 4330, |
| "train_speed(iter/s)": 0.039466 |
| }, |
| { |
| "acc": 0.7916151, |
| "epoch": 2.9814305364511693, |
| "grad_norm": 1.7083474397659302, |
| "learning_rate": 3.8332793301190456e-05, |
| "loss": 0.6970108, |
| "memory(GiB)": 67.98, |
| "step": 4335, |
| "train_speed(iter/s)": 0.03949 |
| }, |
| { |
| "acc": 0.7908206, |
| "epoch": 2.984869325997249, |
| "grad_norm": 1.6145273447036743, |
| "learning_rate": 3.822225060831669e-05, |
| "loss": 0.72308092, |
| "memory(GiB)": 67.98, |
| "step": 4340, |
| "train_speed(iter/s)": 0.039512 |
| }, |
| { |
| "acc": 0.79732313, |
| "epoch": 2.9883081155433286, |
| "grad_norm": 1.3791991472244263, |
| "learning_rate": 3.8111768873772757e-05, |
| "loss": 0.68552351, |
| "memory(GiB)": 67.98, |
| "step": 4345, |
| "train_speed(iter/s)": 0.039537 |
| }, |
| { |
| "acc": 0.78215866, |
| "epoch": 2.9917469050894088, |
| "grad_norm": 1.587035059928894, |
| "learning_rate": 3.800134866913852e-05, |
| "loss": 0.74166784, |
| "memory(GiB)": 67.98, |
| "step": 4350, |
| "train_speed(iter/s)": 0.03956 |
| }, |
| { |
| "acc": 0.7990098, |
| "epoch": 2.9951856946354884, |
| "grad_norm": 1.8290317058563232, |
| "learning_rate": 3.7890990565675476e-05, |
| "loss": 0.68875532, |
| "memory(GiB)": 67.98, |
| "step": 4355, |
| "train_speed(iter/s)": 0.039584 |
| }, |
| { |
| "acc": 0.78591781, |
| "epoch": 2.998624484181568, |
| "grad_norm": 1.8819842338562012, |
| "learning_rate": 3.778069513432386e-05, |
| "loss": 0.72816386, |
| "memory(GiB)": 67.98, |
| "step": 4360, |
| "train_speed(iter/s)": 0.039604 |
| }, |
| { |
| "acc": 0.80687866, |
| "epoch": 3.002063273727648, |
| "grad_norm": 1.3995342254638672, |
| "learning_rate": 3.767046294569967e-05, |
| "loss": 0.64414482, |
| "memory(GiB)": 67.98, |
| "step": 4365, |
| "train_speed(iter/s)": 0.039611 |
| }, |
| { |
| "acc": 0.80390854, |
| "epoch": 3.0055020632737275, |
| "grad_norm": 1.5679051876068115, |
| "learning_rate": 3.75602945700917e-05, |
| "loss": 0.66774035, |
| "memory(GiB)": 67.98, |
| "step": 4370, |
| "train_speed(iter/s)": 0.039629 |
| }, |
| { |
| "acc": 0.79944701, |
| "epoch": 3.0089408528198076, |
| "grad_norm": 1.531205177307129, |
| "learning_rate": 3.7450190577458635e-05, |
| "loss": 0.67704058, |
| "memory(GiB)": 67.98, |
| "step": 4375, |
| "train_speed(iter/s)": 0.039649 |
| }, |
| { |
| "acc": 0.80703545, |
| "epoch": 3.0123796423658873, |
| "grad_norm": 6.210807800292969, |
| "learning_rate": 3.734015153742605e-05, |
| "loss": 0.64957862, |
| "memory(GiB)": 67.98, |
| "step": 4380, |
| "train_speed(iter/s)": 0.039672 |
| }, |
| { |
| "acc": 0.80491982, |
| "epoch": 3.015818431911967, |
| "grad_norm": 1.6315518617630005, |
| "learning_rate": 3.7230178019283506e-05, |
| "loss": 0.65046768, |
| "memory(GiB)": 67.98, |
| "step": 4385, |
| "train_speed(iter/s)": 0.039693 |
| }, |
| { |
| "acc": 0.8061985, |
| "epoch": 3.0192572214580466, |
| "grad_norm": 1.478652000427246, |
| "learning_rate": 3.712027059198157e-05, |
| "loss": 0.64048343, |
| "memory(GiB)": 67.98, |
| "step": 4390, |
| "train_speed(iter/s)": 0.039708 |
| }, |
| { |
| "acc": 0.81162281, |
| "epoch": 3.0226960110041263, |
| "grad_norm": 1.623420238494873, |
| "learning_rate": 3.701042982412889e-05, |
| "loss": 0.62963314, |
| "memory(GiB)": 67.98, |
| "step": 4395, |
| "train_speed(iter/s)": 0.03973 |
| }, |
| { |
| "acc": 0.80488195, |
| "epoch": 3.0261348005502064, |
| "grad_norm": 1.6778922080993652, |
| "learning_rate": 3.690065628398926e-05, |
| "loss": 0.65336089, |
| "memory(GiB)": 67.98, |
| "step": 4400, |
| "train_speed(iter/s)": 0.039751 |
| }, |
| { |
| "epoch": 3.0261348005502064, |
| "eval_acc": 0.7711454537274486, |
| "eval_loss": 0.8140049576759338, |
| "eval_runtime": 1141.0798, |
| "eval_samples_per_second": 3.753, |
| "eval_steps_per_second": 0.067, |
| "step": 4400 |
| }, |
| { |
| "acc": 0.80764694, |
| "epoch": 3.029573590096286, |
| "grad_norm": 1.6117892265319824, |
| "learning_rate": 3.679095053947864e-05, |
| "loss": 0.6384645, |
| "memory(GiB)": 67.98, |
| "step": 4405, |
| "train_speed(iter/s)": 0.039366 |
| }, |
| { |
| "acc": 0.80960245, |
| "epoch": 3.033012379642366, |
| "grad_norm": 1.5972310304641724, |
| "learning_rate": 3.668131315816228e-05, |
| "loss": 0.63809519, |
| "memory(GiB)": 67.98, |
| "step": 4410, |
| "train_speed(iter/s)": 0.039387 |
| }, |
| { |
| "acc": 0.80579681, |
| "epoch": 3.0364511691884455, |
| "grad_norm": 1.6774109601974487, |
| "learning_rate": 3.657174470725173e-05, |
| "loss": 0.64105072, |
| "memory(GiB)": 67.98, |
| "step": 4415, |
| "train_speed(iter/s)": 0.039406 |
| }, |
| { |
| "acc": 0.81135626, |
| "epoch": 3.0398899587345256, |
| "grad_norm": 1.710260033607483, |
| "learning_rate": 3.646224575360194e-05, |
| "loss": 0.6407239, |
| "memory(GiB)": 67.98, |
| "step": 4420, |
| "train_speed(iter/s)": 0.039428 |
| }, |
| { |
| "acc": 0.81669779, |
| "epoch": 3.0433287482806053, |
| "grad_norm": 1.5772171020507812, |
| "learning_rate": 3.635281686370832e-05, |
| "loss": 0.61197987, |
| "memory(GiB)": 67.98, |
| "step": 4425, |
| "train_speed(iter/s)": 0.039449 |
| }, |
| { |
| "acc": 0.81082649, |
| "epoch": 3.046767537826685, |
| "grad_norm": 2.1017799377441406, |
| "learning_rate": 3.624345860370379e-05, |
| "loss": 0.63282819, |
| "memory(GiB)": 67.98, |
| "step": 4430, |
| "train_speed(iter/s)": 0.039468 |
| }, |
| { |
| "acc": 0.80507336, |
| "epoch": 3.0502063273727646, |
| "grad_norm": 1.904692530632019, |
| "learning_rate": 3.613417153935585e-05, |
| "loss": 0.63742828, |
| "memory(GiB)": 67.98, |
| "step": 4435, |
| "train_speed(iter/s)": 0.039486 |
| }, |
| { |
| "acc": 0.79859557, |
| "epoch": 3.0536451169188448, |
| "grad_norm": 1.673336148262024, |
| "learning_rate": 3.60249562360637e-05, |
| "loss": 0.67739854, |
| "memory(GiB)": 67.98, |
| "step": 4440, |
| "train_speed(iter/s)": 0.039507 |
| }, |
| { |
| "acc": 0.8053956, |
| "epoch": 3.0570839064649244, |
| "grad_norm": 1.6409105062484741, |
| "learning_rate": 3.591581325885528e-05, |
| "loss": 0.64070592, |
| "memory(GiB)": 67.98, |
| "step": 4445, |
| "train_speed(iter/s)": 0.039524 |
| }, |
| { |
| "acc": 0.81321754, |
| "epoch": 3.060522696011004, |
| "grad_norm": 1.599678874015808, |
| "learning_rate": 3.5806743172384325e-05, |
| "loss": 0.62494526, |
| "memory(GiB)": 67.98, |
| "step": 4450, |
| "train_speed(iter/s)": 0.039544 |
| }, |
| { |
| "acc": 0.81660137, |
| "epoch": 3.063961485557084, |
| "grad_norm": 1.527250051498413, |
| "learning_rate": 3.569774654092749e-05, |
| "loss": 0.61917772, |
| "memory(GiB)": 67.98, |
| "step": 4455, |
| "train_speed(iter/s)": 0.039565 |
| }, |
| { |
| "acc": 0.80815334, |
| "epoch": 3.0674002751031635, |
| "grad_norm": 1.9215754270553589, |
| "learning_rate": 3.5588823928381385e-05, |
| "loss": 0.64416943, |
| "memory(GiB)": 67.98, |
| "step": 4460, |
| "train_speed(iter/s)": 0.039584 |
| }, |
| { |
| "acc": 0.81522007, |
| "epoch": 3.0708390646492436, |
| "grad_norm": 1.771016240119934, |
| "learning_rate": 3.54799758982597e-05, |
| "loss": 0.62254939, |
| "memory(GiB)": 67.98, |
| "step": 4465, |
| "train_speed(iter/s)": 0.039604 |
| }, |
| { |
| "acc": 0.81300201, |
| "epoch": 3.0742778541953233, |
| "grad_norm": 1.5185010433197021, |
| "learning_rate": 3.537120301369029e-05, |
| "loss": 0.63570495, |
| "memory(GiB)": 67.98, |
| "step": 4470, |
| "train_speed(iter/s)": 0.039623 |
| }, |
| { |
| "acc": 0.79795976, |
| "epoch": 3.077716643741403, |
| "grad_norm": 1.7474913597106934, |
| "learning_rate": 3.526250583741219e-05, |
| "loss": 0.67301879, |
| "memory(GiB)": 67.98, |
| "step": 4475, |
| "train_speed(iter/s)": 0.039644 |
| }, |
| { |
| "acc": 0.80364552, |
| "epoch": 3.0811554332874826, |
| "grad_norm": 1.611039638519287, |
| "learning_rate": 3.51538849317728e-05, |
| "loss": 0.6553544, |
| "memory(GiB)": 67.98, |
| "step": 4480, |
| "train_speed(iter/s)": 0.039664 |
| }, |
| { |
| "acc": 0.80711832, |
| "epoch": 3.0845942228335628, |
| "grad_norm": 1.956214189529419, |
| "learning_rate": 3.504534085872491e-05, |
| "loss": 0.65441723, |
| "memory(GiB)": 67.98, |
| "step": 4485, |
| "train_speed(iter/s)": 0.039686 |
| }, |
| { |
| "acc": 0.80393448, |
| "epoch": 3.0880330123796425, |
| "grad_norm": 1.7758394479751587, |
| "learning_rate": 3.493687417982382e-05, |
| "loss": 0.63968649, |
| "memory(GiB)": 67.98, |
| "step": 4490, |
| "train_speed(iter/s)": 0.039704 |
| }, |
| { |
| "acc": 0.80570278, |
| "epoch": 3.091471801925722, |
| "grad_norm": 1.878055453300476, |
| "learning_rate": 3.4828485456224454e-05, |
| "loss": 0.64807596, |
| "memory(GiB)": 67.98, |
| "step": 4495, |
| "train_speed(iter/s)": 0.039724 |
| }, |
| { |
| "acc": 0.80985212, |
| "epoch": 3.094910591471802, |
| "grad_norm": 1.647511601448059, |
| "learning_rate": 3.47201752486784e-05, |
| "loss": 0.63398943, |
| "memory(GiB)": 67.98, |
| "step": 4500, |
| "train_speed(iter/s)": 0.039743 |
| }, |
| { |
| "epoch": 3.094910591471802, |
| "eval_acc": 0.7721239933414316, |
| "eval_loss": 0.81331866979599, |
| "eval_runtime": 1133.398, |
| "eval_samples_per_second": 3.779, |
| "eval_steps_per_second": 0.068, |
| "step": 4500 |
| }, |
| { |
| "acc": 0.80513477, |
| "epoch": 3.098349381017882, |
| "grad_norm": 1.8428512811660767, |
| "learning_rate": 3.461194411753105e-05, |
| "loss": 0.64937515, |
| "memory(GiB)": 67.98, |
| "step": 4505, |
| "train_speed(iter/s)": 0.03937 |
| }, |
| { |
| "acc": 0.81019039, |
| "epoch": 3.1017881705639616, |
| "grad_norm": 1.6519265174865723, |
| "learning_rate": 3.450379262271869e-05, |
| "loss": 0.63972459, |
| "memory(GiB)": 67.98, |
| "step": 4510, |
| "train_speed(iter/s)": 0.039392 |
| }, |
| { |
| "acc": 0.8086174, |
| "epoch": 3.1052269601100413, |
| "grad_norm": 1.7133119106292725, |
| "learning_rate": 3.439572132376563e-05, |
| "loss": 0.64712973, |
| "memory(GiB)": 67.98, |
| "step": 4515, |
| "train_speed(iter/s)": 0.039407 |
| }, |
| { |
| "acc": 0.79508266, |
| "epoch": 3.108665749656121, |
| "grad_norm": 1.6571804285049438, |
| "learning_rate": 3.428773077978125e-05, |
| "loss": 0.68026247, |
| "memory(GiB)": 67.98, |
| "step": 4520, |
| "train_speed(iter/s)": 0.039425 |
| }, |
| { |
| "acc": 0.8028862, |
| "epoch": 3.1121045392022006, |
| "grad_norm": 2.0089550018310547, |
| "learning_rate": 3.4179821549457166e-05, |
| "loss": 0.66466484, |
| "memory(GiB)": 67.98, |
| "step": 4525, |
| "train_speed(iter/s)": 0.039441 |
| }, |
| { |
| "acc": 0.7982996, |
| "epoch": 3.1155433287482808, |
| "grad_norm": 1.620611548423767, |
| "learning_rate": 3.407199419106429e-05, |
| "loss": 0.67201767, |
| "memory(GiB)": 67.98, |
| "step": 4530, |
| "train_speed(iter/s)": 0.03946 |
| }, |
| { |
| "acc": 0.81185446, |
| "epoch": 3.1189821182943605, |
| "grad_norm": 1.5307915210723877, |
| "learning_rate": 3.396424926244999e-05, |
| "loss": 0.62855453, |
| "memory(GiB)": 67.98, |
| "step": 4535, |
| "train_speed(iter/s)": 0.039478 |
| }, |
| { |
| "acc": 0.80879059, |
| "epoch": 3.12242090784044, |
| "grad_norm": 1.9358049631118774, |
| "learning_rate": 3.3856587321035206e-05, |
| "loss": 0.63443809, |
| "memory(GiB)": 67.98, |
| "step": 4540, |
| "train_speed(iter/s)": 0.0395 |
| }, |
| { |
| "acc": 0.81181793, |
| "epoch": 3.12585969738652, |
| "grad_norm": 1.9281483888626099, |
| "learning_rate": 3.374900892381146e-05, |
| "loss": 0.62519212, |
| "memory(GiB)": 67.98, |
| "step": 4545, |
| "train_speed(iter/s)": 0.039518 |
| }, |
| { |
| "acc": 0.79986091, |
| "epoch": 3.1292984869326, |
| "grad_norm": 1.8126670122146606, |
| "learning_rate": 3.3641514627338166e-05, |
| "loss": 0.67471228, |
| "memory(GiB)": 67.98, |
| "step": 4550, |
| "train_speed(iter/s)": 0.039538 |
| }, |
| { |
| "acc": 0.81441412, |
| "epoch": 3.1327372764786796, |
| "grad_norm": 1.9482190608978271, |
| "learning_rate": 3.353410498773954e-05, |
| "loss": 0.62350183, |
| "memory(GiB)": 67.98, |
| "step": 4555, |
| "train_speed(iter/s)": 0.039558 |
| }, |
| { |
| "acc": 0.80743856, |
| "epoch": 3.1361760660247593, |
| "grad_norm": 1.8278954029083252, |
| "learning_rate": 3.342678056070189e-05, |
| "loss": 0.65586147, |
| "memory(GiB)": 67.98, |
| "step": 4560, |
| "train_speed(iter/s)": 0.03958 |
| }, |
| { |
| "acc": 0.80009956, |
| "epoch": 3.139614855570839, |
| "grad_norm": 1.9203051328659058, |
| "learning_rate": 3.331954190147065e-05, |
| "loss": 0.67459331, |
| "memory(GiB)": 67.98, |
| "step": 4565, |
| "train_speed(iter/s)": 0.039599 |
| }, |
| { |
| "acc": 0.79797955, |
| "epoch": 3.1430536451169186, |
| "grad_norm": 1.78507399559021, |
| "learning_rate": 3.321238956484752e-05, |
| "loss": 0.68094501, |
| "memory(GiB)": 67.98, |
| "step": 4570, |
| "train_speed(iter/s)": 0.039619 |
| }, |
| { |
| "acc": 0.80958462, |
| "epoch": 3.1464924346629988, |
| "grad_norm": 1.8514398336410522, |
| "learning_rate": 3.310532410518765e-05, |
| "loss": 0.63833261, |
| "memory(GiB)": 67.98, |
| "step": 4575, |
| "train_speed(iter/s)": 0.039639 |
| }, |
| { |
| "acc": 0.8103529, |
| "epoch": 3.1499312242090785, |
| "grad_norm": 2.1083662509918213, |
| "learning_rate": 3.2998346076396664e-05, |
| "loss": 0.63392391, |
| "memory(GiB)": 67.98, |
| "step": 4580, |
| "train_speed(iter/s)": 0.039659 |
| }, |
| { |
| "acc": 0.80684109, |
| "epoch": 3.153370013755158, |
| "grad_norm": 2.081134080886841, |
| "learning_rate": 3.289145603192793e-05, |
| "loss": 0.65391574, |
| "memory(GiB)": 67.98, |
| "step": 4585, |
| "train_speed(iter/s)": 0.03968 |
| }, |
| { |
| "acc": 0.80696983, |
| "epoch": 3.156808803301238, |
| "grad_norm": 1.7588388919830322, |
| "learning_rate": 3.2784654524779587e-05, |
| "loss": 0.65089002, |
| "memory(GiB)": 67.98, |
| "step": 4590, |
| "train_speed(iter/s)": 0.039697 |
| }, |
| { |
| "acc": 0.80319796, |
| "epoch": 3.160247592847318, |
| "grad_norm": 1.8731495141983032, |
| "learning_rate": 3.267794210749173e-05, |
| "loss": 0.66944408, |
| "memory(GiB)": 67.98, |
| "step": 4595, |
| "train_speed(iter/s)": 0.039715 |
| }, |
| { |
| "acc": 0.80229826, |
| "epoch": 3.1636863823933976, |
| "grad_norm": 1.863386869430542, |
| "learning_rate": 3.2571319332143516e-05, |
| "loss": 0.67615876, |
| "memory(GiB)": 67.98, |
| "step": 4600, |
| "train_speed(iter/s)": 0.039734 |
| }, |
| { |
| "epoch": 3.1636863823933976, |
| "eval_acc": 0.7733612273361228, |
| "eval_loss": 0.8061870336532593, |
| "eval_runtime": 1127.8615, |
| "eval_samples_per_second": 3.797, |
| "eval_steps_per_second": 0.068, |
| "step": 4600 |
| }, |
| { |
| "acc": 0.81224995, |
| "epoch": 3.1671251719394773, |
| "grad_norm": 1.7116352319717407, |
| "learning_rate": 3.2464786750350434e-05, |
| "loss": 0.62269239, |
| "memory(GiB)": 67.98, |
| "step": 4605, |
| "train_speed(iter/s)": 0.03937 |
| }, |
| { |
| "acc": 0.80045443, |
| "epoch": 3.170563961485557, |
| "grad_norm": 1.838098406791687, |
| "learning_rate": 3.235834491326126e-05, |
| "loss": 0.65012379, |
| "memory(GiB)": 67.98, |
| "step": 4610, |
| "train_speed(iter/s)": 0.039391 |
| }, |
| { |
| "acc": 0.80470877, |
| "epoch": 3.1740027510316366, |
| "grad_norm": 1.8031960725784302, |
| "learning_rate": 3.225199437155532e-05, |
| "loss": 0.65979033, |
| "memory(GiB)": 67.98, |
| "step": 4615, |
| "train_speed(iter/s)": 0.039409 |
| }, |
| { |
| "acc": 0.80560265, |
| "epoch": 3.1774415405777168, |
| "grad_norm": 1.7068849802017212, |
| "learning_rate": 3.214573567543964e-05, |
| "loss": 0.63796139, |
| "memory(GiB)": 67.98, |
| "step": 4620, |
| "train_speed(iter/s)": 0.039427 |
| }, |
| { |
| "acc": 0.79233809, |
| "epoch": 3.1808803301237965, |
| "grad_norm": 1.7398771047592163, |
| "learning_rate": 3.203956937464607e-05, |
| "loss": 0.67283368, |
| "memory(GiB)": 67.98, |
| "step": 4625, |
| "train_speed(iter/s)": 0.039446 |
| }, |
| { |
| "acc": 0.81366425, |
| "epoch": 3.184319119669876, |
| "grad_norm": 1.7081953287124634, |
| "learning_rate": 3.1933496018428446e-05, |
| "loss": 0.62146492, |
| "memory(GiB)": 67.98, |
| "step": 4630, |
| "train_speed(iter/s)": 0.039466 |
| }, |
| { |
| "acc": 0.80281668, |
| "epoch": 3.187757909215956, |
| "grad_norm": 1.6009129285812378, |
| "learning_rate": 3.1827516155559786e-05, |
| "loss": 0.66720371, |
| "memory(GiB)": 67.98, |
| "step": 4635, |
| "train_speed(iter/s)": 0.039486 |
| }, |
| { |
| "acc": 0.80487442, |
| "epoch": 3.191196698762036, |
| "grad_norm": 1.8239426612854004, |
| "learning_rate": 3.1721630334329366e-05, |
| "loss": 0.64386883, |
| "memory(GiB)": 67.98, |
| "step": 4640, |
| "train_speed(iter/s)": 0.039505 |
| }, |
| { |
| "acc": 0.80696297, |
| "epoch": 3.1946354883081156, |
| "grad_norm": 1.906916856765747, |
| "learning_rate": 3.161583910253998e-05, |
| "loss": 0.64987645, |
| "memory(GiB)": 67.98, |
| "step": 4645, |
| "train_speed(iter/s)": 0.039524 |
| }, |
| { |
| "acc": 0.80997219, |
| "epoch": 3.1980742778541953, |
| "grad_norm": 2.060511350631714, |
| "learning_rate": 3.1510143007505016e-05, |
| "loss": 0.63655567, |
| "memory(GiB)": 67.98, |
| "step": 4650, |
| "train_speed(iter/s)": 0.039543 |
| }, |
| { |
| "acc": 0.79812059, |
| "epoch": 3.201513067400275, |
| "grad_norm": 1.793277382850647, |
| "learning_rate": 3.14045425960457e-05, |
| "loss": 0.68602118, |
| "memory(GiB)": 67.98, |
| "step": 4655, |
| "train_speed(iter/s)": 0.039564 |
| }, |
| { |
| "acc": 0.79850287, |
| "epoch": 3.204951856946355, |
| "grad_norm": 1.6924282312393188, |
| "learning_rate": 3.129903841448827e-05, |
| "loss": 0.67275462, |
| "memory(GiB)": 67.98, |
| "step": 4660, |
| "train_speed(iter/s)": 0.039583 |
| }, |
| { |
| "acc": 0.80627632, |
| "epoch": 3.2083906464924348, |
| "grad_norm": 1.678781509399414, |
| "learning_rate": 3.119363100866106e-05, |
| "loss": 0.65286617, |
| "memory(GiB)": 67.98, |
| "step": 4665, |
| "train_speed(iter/s)": 0.039602 |
| }, |
| { |
| "acc": 0.81515961, |
| "epoch": 3.2118294360385145, |
| "grad_norm": 1.9915016889572144, |
| "learning_rate": 3.108832092389172e-05, |
| "loss": 0.59764929, |
| "memory(GiB)": 67.98, |
| "step": 4670, |
| "train_speed(iter/s)": 0.039621 |
| }, |
| { |
| "acc": 0.80686855, |
| "epoch": 3.215268225584594, |
| "grad_norm": 1.8249253034591675, |
| "learning_rate": 3.098310870500448e-05, |
| "loss": 0.64462824, |
| "memory(GiB)": 67.98, |
| "step": 4675, |
| "train_speed(iter/s)": 0.03964 |
| }, |
| { |
| "acc": 0.80929985, |
| "epoch": 3.218707015130674, |
| "grad_norm": 1.7660592794418335, |
| "learning_rate": 3.087799489631721e-05, |
| "loss": 0.6324172, |
| "memory(GiB)": 67.98, |
| "step": 4680, |
| "train_speed(iter/s)": 0.039659 |
| }, |
| { |
| "acc": 0.80256157, |
| "epoch": 3.222145804676754, |
| "grad_norm": 1.9033777713775635, |
| "learning_rate": 3.077298004163865e-05, |
| "loss": 0.67533493, |
| "memory(GiB)": 67.98, |
| "step": 4685, |
| "train_speed(iter/s)": 0.039677 |
| }, |
| { |
| "acc": 0.80715237, |
| "epoch": 3.2255845942228336, |
| "grad_norm": 1.6797436475753784, |
| "learning_rate": 3.066806468426561e-05, |
| "loss": 0.64756646, |
| "memory(GiB)": 67.98, |
| "step": 4690, |
| "train_speed(iter/s)": 0.039699 |
| }, |
| { |
| "acc": 0.79631739, |
| "epoch": 3.2290233837689133, |
| "grad_norm": 1.6722263097763062, |
| "learning_rate": 3.056324936698014e-05, |
| "loss": 0.68136206, |
| "memory(GiB)": 67.98, |
| "step": 4695, |
| "train_speed(iter/s)": 0.03972 |
| }, |
| { |
| "acc": 0.81018467, |
| "epoch": 3.232462173314993, |
| "grad_norm": 1.9351452589035034, |
| "learning_rate": 3.0458534632046766e-05, |
| "loss": 0.63391657, |
| "memory(GiB)": 67.98, |
| "step": 4700, |
| "train_speed(iter/s)": 0.039741 |
| }, |
| { |
| "epoch": 3.232462173314993, |
| "eval_acc": 0.7737605164889548, |
| "eval_loss": 0.806867241859436, |
| "eval_runtime": 1123.2355, |
| "eval_samples_per_second": 3.813, |
| "eval_steps_per_second": 0.069, |
| "step": 4700 |
| }, |
| { |
| "acc": 0.81098757, |
| "epoch": 3.235900962861073, |
| "grad_norm": 1.8516818284988403, |
| "learning_rate": 3.0353921021209598e-05, |
| "loss": 0.63078384, |
| "memory(GiB)": 67.98, |
| "step": 4705, |
| "train_speed(iter/s)": 0.039386 |
| }, |
| { |
| "acc": 0.80097027, |
| "epoch": 3.2393397524071528, |
| "grad_norm": 1.9018975496292114, |
| "learning_rate": 3.02494090756896e-05, |
| "loss": 0.67307758, |
| "memory(GiB)": 67.98, |
| "step": 4710, |
| "train_speed(iter/s)": 0.039403 |
| }, |
| { |
| "acc": 0.78721581, |
| "epoch": 3.2427785419532325, |
| "grad_norm": 1.8945331573486328, |
| "learning_rate": 3.014499933618176e-05, |
| "loss": 0.71489978, |
| "memory(GiB)": 67.98, |
| "step": 4715, |
| "train_speed(iter/s)": 0.039419 |
| }, |
| { |
| "acc": 0.81447954, |
| "epoch": 3.246217331499312, |
| "grad_norm": 1.873476505279541, |
| "learning_rate": 3.004069234285235e-05, |
| "loss": 0.62558355, |
| "memory(GiB)": 67.98, |
| "step": 4720, |
| "train_speed(iter/s)": 0.039438 |
| }, |
| { |
| "acc": 0.80101833, |
| "epoch": 3.2496561210453923, |
| "grad_norm": 1.9191193580627441, |
| "learning_rate": 2.993648863533602e-05, |
| "loss": 0.65777245, |
| "memory(GiB)": 67.98, |
| "step": 4725, |
| "train_speed(iter/s)": 0.039458 |
| }, |
| { |
| "acc": 0.81086941, |
| "epoch": 3.253094910591472, |
| "grad_norm": 1.973708987236023, |
| "learning_rate": 2.983238875273308e-05, |
| "loss": 0.63210435, |
| "memory(GiB)": 67.98, |
| "step": 4730, |
| "train_speed(iter/s)": 0.039477 |
| }, |
| { |
| "acc": 0.80301018, |
| "epoch": 3.2565337001375516, |
| "grad_norm": 1.7471644878387451, |
| "learning_rate": 2.9728393233606715e-05, |
| "loss": 0.66623907, |
| "memory(GiB)": 67.98, |
| "step": 4735, |
| "train_speed(iter/s)": 0.039495 |
| }, |
| { |
| "acc": 0.80888157, |
| "epoch": 3.2599724896836313, |
| "grad_norm": 1.7000857591629028, |
| "learning_rate": 2.9624502615980177e-05, |
| "loss": 0.64117575, |
| "memory(GiB)": 67.98, |
| "step": 4740, |
| "train_speed(iter/s)": 0.039513 |
| }, |
| { |
| "acc": 0.81031885, |
| "epoch": 3.263411279229711, |
| "grad_norm": 1.760911226272583, |
| "learning_rate": 2.9520717437334024e-05, |
| "loss": 0.65109177, |
| "memory(GiB)": 67.98, |
| "step": 4745, |
| "train_speed(iter/s)": 0.039529 |
| }, |
| { |
| "acc": 0.81288662, |
| "epoch": 3.266850068775791, |
| "grad_norm": 1.7138432264328003, |
| "learning_rate": 2.941703823460329e-05, |
| "loss": 0.62872763, |
| "memory(GiB)": 67.98, |
| "step": 4750, |
| "train_speed(iter/s)": 0.039549 |
| }, |
| { |
| "acc": 0.80744476, |
| "epoch": 3.2702888583218708, |
| "grad_norm": 1.9831231832504272, |
| "learning_rate": 2.9313465544174756e-05, |
| "loss": 0.63904066, |
| "memory(GiB)": 67.98, |
| "step": 4755, |
| "train_speed(iter/s)": 0.039566 |
| }, |
| { |
| "acc": 0.80821819, |
| "epoch": 3.2737276478679505, |
| "grad_norm": 1.79635488986969, |
| "learning_rate": 2.9209999901884165e-05, |
| "loss": 0.64807615, |
| "memory(GiB)": 67.98, |
| "step": 4760, |
| "train_speed(iter/s)": 0.039585 |
| }, |
| { |
| "acc": 0.81542759, |
| "epoch": 3.27716643741403, |
| "grad_norm": 1.7462048530578613, |
| "learning_rate": 2.910664184301346e-05, |
| "loss": 0.61296053, |
| "memory(GiB)": 67.98, |
| "step": 4765, |
| "train_speed(iter/s)": 0.039602 |
| }, |
| { |
| "acc": 0.81904421, |
| "epoch": 3.28060522696011, |
| "grad_norm": 1.6204197406768799, |
| "learning_rate": 2.900339190228796e-05, |
| "loss": 0.60652199, |
| "memory(GiB)": 67.98, |
| "step": 4770, |
| "train_speed(iter/s)": 0.039622 |
| }, |
| { |
| "acc": 0.8142024, |
| "epoch": 3.28404401650619, |
| "grad_norm": 2.0843801498413086, |
| "learning_rate": 2.890025061387362e-05, |
| "loss": 0.61951303, |
| "memory(GiB)": 67.98, |
| "step": 4775, |
| "train_speed(iter/s)": 0.039638 |
| }, |
| { |
| "acc": 0.80437889, |
| "epoch": 3.2874828060522696, |
| "grad_norm": 1.9848445653915405, |
| "learning_rate": 2.879721851137438e-05, |
| "loss": 0.65048337, |
| "memory(GiB)": 67.98, |
| "step": 4780, |
| "train_speed(iter/s)": 0.039655 |
| }, |
| { |
| "acc": 0.8088932, |
| "epoch": 3.2909215955983493, |
| "grad_norm": 1.7368524074554443, |
| "learning_rate": 2.8694296127829177e-05, |
| "loss": 0.64408207, |
| "memory(GiB)": 67.98, |
| "step": 4785, |
| "train_speed(iter/s)": 0.039674 |
| }, |
| { |
| "acc": 0.79394779, |
| "epoch": 3.294360385144429, |
| "grad_norm": 1.72417414188385, |
| "learning_rate": 2.8591483995709407e-05, |
| "loss": 0.68265638, |
| "memory(GiB)": 67.98, |
| "step": 4790, |
| "train_speed(iter/s)": 0.039689 |
| }, |
| { |
| "acc": 0.81347179, |
| "epoch": 3.297799174690509, |
| "grad_norm": 1.7844178676605225, |
| "learning_rate": 2.8488782646916024e-05, |
| "loss": 0.61397967, |
| "memory(GiB)": 67.98, |
| "step": 4795, |
| "train_speed(iter/s)": 0.039709 |
| }, |
| { |
| "acc": 0.80788279, |
| "epoch": 3.3012379642365888, |
| "grad_norm": 1.7968957424163818, |
| "learning_rate": 2.838619261277686e-05, |
| "loss": 0.64608054, |
| "memory(GiB)": 67.98, |
| "step": 4800, |
| "train_speed(iter/s)": 0.039729 |
| }, |
| { |
| "epoch": 3.3012379642365888, |
| "eval_acc": 0.7751046025104602, |
| "eval_loss": 0.804237425327301, |
| "eval_runtime": 1089.1926, |
| "eval_samples_per_second": 3.932, |
| "eval_steps_per_second": 0.071, |
| "step": 4800 |
| }, |
| { |
| "acc": 0.81613159, |
| "epoch": 3.3046767537826685, |
| "grad_norm": 1.6313848495483398, |
| "learning_rate": 2.828371442404386e-05, |
| "loss": 0.62472601, |
| "memory(GiB)": 67.98, |
| "step": 4805, |
| "train_speed(iter/s)": 0.039393 |
| }, |
| { |
| "acc": 0.81500292, |
| "epoch": 3.308115543328748, |
| "grad_norm": 1.8520140647888184, |
| "learning_rate": 2.8181348610890345e-05, |
| "loss": 0.62366076, |
| "memory(GiB)": 67.98, |
| "step": 4810, |
| "train_speed(iter/s)": 0.039414 |
| }, |
| { |
| "acc": 0.79354863, |
| "epoch": 3.3115543328748283, |
| "grad_norm": 1.8981624841690063, |
| "learning_rate": 2.8079095702908214e-05, |
| "loss": 0.69254041, |
| "memory(GiB)": 67.98, |
| "step": 4815, |
| "train_speed(iter/s)": 0.03943 |
| }, |
| { |
| "acc": 0.8094223, |
| "epoch": 3.314993122420908, |
| "grad_norm": 1.9359115362167358, |
| "learning_rate": 2.7976956229105322e-05, |
| "loss": 0.64053526, |
| "memory(GiB)": 67.98, |
| "step": 4820, |
| "train_speed(iter/s)": 0.039449 |
| }, |
| { |
| "acc": 0.79914575, |
| "epoch": 3.3184319119669876, |
| "grad_norm": 1.8818870782852173, |
| "learning_rate": 2.7874930717902603e-05, |
| "loss": 0.68240814, |
| "memory(GiB)": 67.98, |
| "step": 4825, |
| "train_speed(iter/s)": 0.039467 |
| }, |
| { |
| "acc": 0.80387897, |
| "epoch": 3.3218707015130673, |
| "grad_norm": 1.9390044212341309, |
| "learning_rate": 2.7773019697131435e-05, |
| "loss": 0.65107994, |
| "memory(GiB)": 67.98, |
| "step": 4830, |
| "train_speed(iter/s)": 0.039486 |
| }, |
| { |
| "acc": 0.81106586, |
| "epoch": 3.325309491059147, |
| "grad_norm": 1.6695841550827026, |
| "learning_rate": 2.767122369403088e-05, |
| "loss": 0.63033338, |
| "memory(GiB)": 67.98, |
| "step": 4835, |
| "train_speed(iter/s)": 0.039505 |
| }, |
| { |
| "acc": 0.82067537, |
| "epoch": 3.328748280605227, |
| "grad_norm": 1.7732053995132446, |
| "learning_rate": 2.756954323524491e-05, |
| "loss": 0.61327543, |
| "memory(GiB)": 67.98, |
| "step": 4840, |
| "train_speed(iter/s)": 0.039521 |
| }, |
| { |
| "acc": 0.80741024, |
| "epoch": 3.3321870701513068, |
| "grad_norm": 1.7796927690505981, |
| "learning_rate": 2.7467978846819775e-05, |
| "loss": 0.63265486, |
| "memory(GiB)": 67.98, |
| "step": 4845, |
| "train_speed(iter/s)": 0.039541 |
| }, |
| { |
| "acc": 0.80555611, |
| "epoch": 3.3356258596973865, |
| "grad_norm": 1.739590048789978, |
| "learning_rate": 2.7366531054201243e-05, |
| "loss": 0.64431, |
| "memory(GiB)": 67.98, |
| "step": 4850, |
| "train_speed(iter/s)": 0.03956 |
| }, |
| { |
| "acc": 0.79774094, |
| "epoch": 3.339064649243466, |
| "grad_norm": 1.9023163318634033, |
| "learning_rate": 2.726520038223182e-05, |
| "loss": 0.68374538, |
| "memory(GiB)": 67.98, |
| "step": 4855, |
| "train_speed(iter/s)": 0.03958 |
| }, |
| { |
| "acc": 0.81651649, |
| "epoch": 3.3425034387895463, |
| "grad_norm": 1.862848162651062, |
| "learning_rate": 2.716398735514812e-05, |
| "loss": 0.62106805, |
| "memory(GiB)": 67.98, |
| "step": 4860, |
| "train_speed(iter/s)": 0.039595 |
| }, |
| { |
| "acc": 0.8125948, |
| "epoch": 3.345942228335626, |
| "grad_norm": 1.7548292875289917, |
| "learning_rate": 2.7062892496578096e-05, |
| "loss": 0.62365727, |
| "memory(GiB)": 67.98, |
| "step": 4865, |
| "train_speed(iter/s)": 0.039615 |
| }, |
| { |
| "acc": 0.81203623, |
| "epoch": 3.3493810178817056, |
| "grad_norm": 1.8868883848190308, |
| "learning_rate": 2.696191632953835e-05, |
| "loss": 0.63214188, |
| "memory(GiB)": 67.98, |
| "step": 4870, |
| "train_speed(iter/s)": 0.039636 |
| }, |
| { |
| "acc": 0.7978539, |
| "epoch": 3.3528198074277853, |
| "grad_norm": 1.854641318321228, |
| "learning_rate": 2.6861059376431485e-05, |
| "loss": 0.66800289, |
| "memory(GiB)": 67.98, |
| "step": 4875, |
| "train_speed(iter/s)": 0.039653 |
| }, |
| { |
| "acc": 0.80050983, |
| "epoch": 3.3562585969738654, |
| "grad_norm": 2.1327366828918457, |
| "learning_rate": 2.6760322159043293e-05, |
| "loss": 0.68278418, |
| "memory(GiB)": 67.98, |
| "step": 4880, |
| "train_speed(iter/s)": 0.039669 |
| }, |
| { |
| "acc": 0.80420437, |
| "epoch": 3.359697386519945, |
| "grad_norm": 1.9406790733337402, |
| "learning_rate": 2.6659705198540137e-05, |
| "loss": 0.65569339, |
| "memory(GiB)": 67.98, |
| "step": 4885, |
| "train_speed(iter/s)": 0.039687 |
| }, |
| { |
| "acc": 0.81132812, |
| "epoch": 3.3631361760660248, |
| "grad_norm": 2.0002591609954834, |
| "learning_rate": 2.6559209015466198e-05, |
| "loss": 0.64171629, |
| "memory(GiB)": 67.98, |
| "step": 4890, |
| "train_speed(iter/s)": 0.039704 |
| }, |
| { |
| "acc": 0.80664577, |
| "epoch": 3.3665749656121045, |
| "grad_norm": 1.8464481830596924, |
| "learning_rate": 2.6458834129740834e-05, |
| "loss": 0.63870592, |
| "memory(GiB)": 67.98, |
| "step": 4895, |
| "train_speed(iter/s)": 0.039724 |
| }, |
| { |
| "acc": 0.80081406, |
| "epoch": 3.370013755158184, |
| "grad_norm": 1.992497444152832, |
| "learning_rate": 2.635858106065588e-05, |
| "loss": 0.67669377, |
| "memory(GiB)": 67.98, |
| "step": 4900, |
| "train_speed(iter/s)": 0.039739 |
| }, |
| { |
| "epoch": 3.370013755158184, |
| "eval_acc": 0.775284563818779, |
| "eval_loss": 0.8015691637992859, |
| "eval_runtime": 1150.5731, |
| "eval_samples_per_second": 3.722, |
| "eval_steps_per_second": 0.067, |
| "step": 4900 |
| }, |
| { |
| "acc": 0.80654058, |
| "epoch": 3.3734525447042643, |
| "grad_norm": 1.6790952682495117, |
| "learning_rate": 2.625845032687293e-05, |
| "loss": 0.66655011, |
| "memory(GiB)": 67.98, |
| "step": 4905, |
| "train_speed(iter/s)": 0.03939 |
| }, |
| { |
| "acc": 0.81852398, |
| "epoch": 3.376891334250344, |
| "grad_norm": 1.7393443584442139, |
| "learning_rate": 2.6158442446420673e-05, |
| "loss": 0.61265764, |
| "memory(GiB)": 67.98, |
| "step": 4910, |
| "train_speed(iter/s)": 0.039408 |
| }, |
| { |
| "acc": 0.80897388, |
| "epoch": 3.3803301237964236, |
| "grad_norm": 1.757190465927124, |
| "learning_rate": 2.605855793669223e-05, |
| "loss": 0.63301859, |
| "memory(GiB)": 67.98, |
| "step": 4915, |
| "train_speed(iter/s)": 0.039424 |
| }, |
| { |
| "acc": 0.81477318, |
| "epoch": 3.3837689133425033, |
| "grad_norm": 1.7762666940689087, |
| "learning_rate": 2.595879731444242e-05, |
| "loss": 0.63501825, |
| "memory(GiB)": 67.98, |
| "step": 4920, |
| "train_speed(iter/s)": 0.039441 |
| }, |
| { |
| "acc": 0.80826883, |
| "epoch": 3.387207702888583, |
| "grad_norm": 1.8915072679519653, |
| "learning_rate": 2.5859161095785204e-05, |
| "loss": 0.64570541, |
| "memory(GiB)": 67.98, |
| "step": 4925, |
| "train_speed(iter/s)": 0.039458 |
| }, |
| { |
| "acc": 0.80283833, |
| "epoch": 3.390646492434663, |
| "grad_norm": 1.8202823400497437, |
| "learning_rate": 2.5759649796190873e-05, |
| "loss": 0.65588207, |
| "memory(GiB)": 67.98, |
| "step": 4930, |
| "train_speed(iter/s)": 0.039476 |
| }, |
| { |
| "acc": 0.80436974, |
| "epoch": 3.3940852819807428, |
| "grad_norm": 1.8674787282943726, |
| "learning_rate": 2.5660263930483468e-05, |
| "loss": 0.63378534, |
| "memory(GiB)": 67.98, |
| "step": 4935, |
| "train_speed(iter/s)": 0.039489 |
| }, |
| { |
| "acc": 0.80554743, |
| "epoch": 3.3975240715268225, |
| "grad_norm": 1.7539056539535522, |
| "learning_rate": 2.5561004012838067e-05, |
| "loss": 0.65574193, |
| "memory(GiB)": 67.98, |
| "step": 4940, |
| "train_speed(iter/s)": 0.039506 |
| }, |
| { |
| "acc": 0.81471024, |
| "epoch": 3.4009628610729026, |
| "grad_norm": 1.9018100500106812, |
| "learning_rate": 2.5461870556778218e-05, |
| "loss": 0.61126738, |
| "memory(GiB)": 67.98, |
| "step": 4945, |
| "train_speed(iter/s)": 0.039526 |
| }, |
| { |
| "acc": 0.80630493, |
| "epoch": 3.4044016506189823, |
| "grad_norm": 1.7900938987731934, |
| "learning_rate": 2.5362864075173153e-05, |
| "loss": 0.63573794, |
| "memory(GiB)": 67.98, |
| "step": 4950, |
| "train_speed(iter/s)": 0.039543 |
| }, |
| { |
| "acc": 0.80556068, |
| "epoch": 3.407840440165062, |
| "grad_norm": 2.0504183769226074, |
| "learning_rate": 2.526398508023523e-05, |
| "loss": 0.6546957, |
| "memory(GiB)": 67.98, |
| "step": 4955, |
| "train_speed(iter/s)": 0.039561 |
| }, |
| { |
| "acc": 0.80205326, |
| "epoch": 3.4112792297111416, |
| "grad_norm": 1.9150274991989136, |
| "learning_rate": 2.5165234083517246e-05, |
| "loss": 0.64255061, |
| "memory(GiB)": 67.98, |
| "step": 4960, |
| "train_speed(iter/s)": 0.039575 |
| }, |
| { |
| "acc": 0.80601921, |
| "epoch": 3.4147180192572213, |
| "grad_norm": 1.8331859111785889, |
| "learning_rate": 2.5066611595909784e-05, |
| "loss": 0.64326835, |
| "memory(GiB)": 67.98, |
| "step": 4965, |
| "train_speed(iter/s)": 0.039591 |
| }, |
| { |
| "acc": 0.80851765, |
| "epoch": 3.4181568088033014, |
| "grad_norm": 1.8799371719360352, |
| "learning_rate": 2.49681181276386e-05, |
| "loss": 0.63813715, |
| "memory(GiB)": 67.98, |
| "step": 4970, |
| "train_speed(iter/s)": 0.039605 |
| }, |
| { |
| "acc": 0.80517483, |
| "epoch": 3.421595598349381, |
| "grad_norm": 1.8553872108459473, |
| "learning_rate": 2.486975418826196e-05, |
| "loss": 0.66684914, |
| "memory(GiB)": 67.98, |
| "step": 4975, |
| "train_speed(iter/s)": 0.039623 |
| }, |
| { |
| "acc": 0.82046995, |
| "epoch": 3.4250343878954608, |
| "grad_norm": 1.695779800415039, |
| "learning_rate": 2.477152028666798e-05, |
| "loss": 0.60830936, |
| "memory(GiB)": 67.98, |
| "step": 4980, |
| "train_speed(iter/s)": 0.039643 |
| }, |
| { |
| "acc": 0.7990911, |
| "epoch": 3.4284731774415405, |
| "grad_norm": 1.7533307075500488, |
| "learning_rate": 2.4673416931072094e-05, |
| "loss": 0.67933016, |
| "memory(GiB)": 67.98, |
| "step": 4985, |
| "train_speed(iter/s)": 0.039658 |
| }, |
| { |
| "acc": 0.80797586, |
| "epoch": 3.43191196698762, |
| "grad_norm": 2.2120864391326904, |
| "learning_rate": 2.4575444629014292e-05, |
| "loss": 0.65290236, |
| "memory(GiB)": 67.98, |
| "step": 4990, |
| "train_speed(iter/s)": 0.039679 |
| }, |
| { |
| "acc": 0.80715389, |
| "epoch": 3.4353507565337003, |
| "grad_norm": 1.7007701396942139, |
| "learning_rate": 2.447760388735657e-05, |
| "loss": 0.64799299, |
| "memory(GiB)": 67.98, |
| "step": 4995, |
| "train_speed(iter/s)": 0.039694 |
| }, |
| { |
| "acc": 0.80209885, |
| "epoch": 3.43878954607978, |
| "grad_norm": 2.1484506130218506, |
| "learning_rate": 2.4379895212280297e-05, |
| "loss": 0.6714016, |
| "memory(GiB)": 67.98, |
| "step": 5000, |
| "train_speed(iter/s)": 0.039712 |
| }, |
| { |
| "epoch": 3.43878954607978, |
| "eval_acc": 0.7760493993791335, |
| "eval_loss": 0.7988596558570862, |
| "eval_runtime": 1141.4518, |
| "eval_samples_per_second": 3.752, |
| "eval_steps_per_second": 0.067, |
| "step": 5000 |
| }, |
| { |
| "acc": 0.80421772, |
| "epoch": 3.4422283356258596, |
| "grad_norm": 1.683592438697815, |
| "learning_rate": 2.428231910928358e-05, |
| "loss": 0.65520515, |
| "memory(GiB)": 67.98, |
| "step": 5005, |
| "train_speed(iter/s)": 0.039374 |
| }, |
| { |
| "acc": 0.80245571, |
| "epoch": 3.4456671251719393, |
| "grad_norm": 1.8841793537139893, |
| "learning_rate": 2.418487608317867e-05, |
| "loss": 0.67175484, |
| "memory(GiB)": 67.98, |
| "step": 5010, |
| "train_speed(iter/s)": 0.039392 |
| }, |
| { |
| "acc": 0.8125226, |
| "epoch": 3.4491059147180194, |
| "grad_norm": 1.949098825454712, |
| "learning_rate": 2.408756663808937e-05, |
| "loss": 0.61799521, |
| "memory(GiB)": 67.98, |
| "step": 5015, |
| "train_speed(iter/s)": 0.039412 |
| }, |
| { |
| "acc": 0.81072598, |
| "epoch": 3.452544704264099, |
| "grad_norm": 1.7873549461364746, |
| "learning_rate": 2.399039127744836e-05, |
| "loss": 0.64322014, |
| "memory(GiB)": 67.98, |
| "step": 5020, |
| "train_speed(iter/s)": 0.039425 |
| }, |
| { |
| "acc": 0.80515785, |
| "epoch": 3.4559834938101788, |
| "grad_norm": 2.0002734661102295, |
| "learning_rate": 2.389335050399464e-05, |
| "loss": 0.6395524, |
| "memory(GiB)": 67.98, |
| "step": 5025, |
| "train_speed(iter/s)": 0.039445 |
| }, |
| { |
| "acc": 0.80893536, |
| "epoch": 3.4594222833562585, |
| "grad_norm": 1.9517066478729248, |
| "learning_rate": 2.3796444819770926e-05, |
| "loss": 0.63445306, |
| "memory(GiB)": 67.98, |
| "step": 5030, |
| "train_speed(iter/s)": 0.03946 |
| }, |
| { |
| "acc": 0.79918771, |
| "epoch": 3.4628610729023386, |
| "grad_norm": 1.7724376916885376, |
| "learning_rate": 2.3699674726121022e-05, |
| "loss": 0.68629122, |
| "memory(GiB)": 67.98, |
| "step": 5035, |
| "train_speed(iter/s)": 0.039476 |
| }, |
| { |
| "acc": 0.80571623, |
| "epoch": 3.4662998624484183, |
| "grad_norm": 1.769455075263977, |
| "learning_rate": 2.3603040723687315e-05, |
| "loss": 0.65023713, |
| "memory(GiB)": 67.98, |
| "step": 5040, |
| "train_speed(iter/s)": 0.039495 |
| }, |
| { |
| "acc": 0.81652203, |
| "epoch": 3.469738651994498, |
| "grad_norm": 2.4029428958892822, |
| "learning_rate": 2.3506543312408055e-05, |
| "loss": 0.62751317, |
| "memory(GiB)": 67.98, |
| "step": 5045, |
| "train_speed(iter/s)": 0.039514 |
| }, |
| { |
| "acc": 0.8143034, |
| "epoch": 3.4731774415405776, |
| "grad_norm": 1.7803950309753418, |
| "learning_rate": 2.3410182991514863e-05, |
| "loss": 0.62447834, |
| "memory(GiB)": 67.98, |
| "step": 5050, |
| "train_speed(iter/s)": 0.039532 |
| }, |
| { |
| "acc": 0.81082478, |
| "epoch": 3.4766162310866573, |
| "grad_norm": 1.8696342706680298, |
| "learning_rate": 2.3313960259530114e-05, |
| "loss": 0.63704772, |
| "memory(GiB)": 67.98, |
| "step": 5055, |
| "train_speed(iter/s)": 0.039552 |
| }, |
| { |
| "acc": 0.81630154, |
| "epoch": 3.4800550206327374, |
| "grad_norm": 1.9919400215148926, |
| "learning_rate": 2.321787561426436e-05, |
| "loss": 0.61488199, |
| "memory(GiB)": 67.98, |
| "step": 5060, |
| "train_speed(iter/s)": 0.039568 |
| }, |
| { |
| "acc": 0.81280794, |
| "epoch": 3.483493810178817, |
| "grad_norm": 1.9915574789047241, |
| "learning_rate": 2.3121929552813775e-05, |
| "loss": 0.62114315, |
| "memory(GiB)": 67.98, |
| "step": 5065, |
| "train_speed(iter/s)": 0.039585 |
| }, |
| { |
| "acc": 0.80458755, |
| "epoch": 3.4869325997248968, |
| "grad_norm": 1.9132686853408813, |
| "learning_rate": 2.302612257155754e-05, |
| "loss": 0.63852549, |
| "memory(GiB)": 67.98, |
| "step": 5070, |
| "train_speed(iter/s)": 0.039601 |
| }, |
| { |
| "acc": 0.80228262, |
| "epoch": 3.4903713892709765, |
| "grad_norm": 1.638962745666504, |
| "learning_rate": 2.2930455166155325e-05, |
| "loss": 0.65759382, |
| "memory(GiB)": 67.98, |
| "step": 5075, |
| "train_speed(iter/s)": 0.039619 |
| }, |
| { |
| "acc": 0.81052542, |
| "epoch": 3.4938101788170566, |
| "grad_norm": 2.4375152587890625, |
| "learning_rate": 2.2834927831544663e-05, |
| "loss": 0.62842712, |
| "memory(GiB)": 67.98, |
| "step": 5080, |
| "train_speed(iter/s)": 0.039637 |
| }, |
| { |
| "acc": 0.81806412, |
| "epoch": 3.4972489683631363, |
| "grad_norm": 1.8711788654327393, |
| "learning_rate": 2.273954106193851e-05, |
| "loss": 0.59915447, |
| "memory(GiB)": 67.98, |
| "step": 5085, |
| "train_speed(iter/s)": 0.039654 |
| }, |
| { |
| "acc": 0.80885086, |
| "epoch": 3.500687757909216, |
| "grad_norm": 1.7943886518478394, |
| "learning_rate": 2.2644295350822523e-05, |
| "loss": 0.64677639, |
| "memory(GiB)": 67.98, |
| "step": 5090, |
| "train_speed(iter/s)": 0.03967 |
| }, |
| { |
| "acc": 0.80517883, |
| "epoch": 3.5041265474552956, |
| "grad_norm": 1.9428882598876953, |
| "learning_rate": 2.2549191190952614e-05, |
| "loss": 0.64541783, |
| "memory(GiB)": 67.98, |
| "step": 5095, |
| "train_speed(iter/s)": 0.039687 |
| }, |
| { |
| "acc": 0.82121677, |
| "epoch": 3.5075653370013757, |
| "grad_norm": 2.129689931869507, |
| "learning_rate": 2.245422907435237e-05, |
| "loss": 0.59930925, |
| "memory(GiB)": 67.98, |
| "step": 5100, |
| "train_speed(iter/s)": 0.039705 |
| }, |
| { |
| "epoch": 3.5075653370013757, |
| "eval_acc": 0.7766230260493994, |
| "eval_loss": 0.7989464998245239, |
| "eval_runtime": 1103.0791, |
| "eval_samples_per_second": 3.883, |
| "eval_steps_per_second": 0.07, |
| "step": 5100 |
| }, |
| { |
| "acc": 0.80364723, |
| "epoch": 3.5110041265474554, |
| "grad_norm": 1.949704885482788, |
| "learning_rate": 2.2359409492310554e-05, |
| "loss": 0.65982656, |
| "memory(GiB)": 72.17, |
| "step": 5105, |
| "train_speed(iter/s)": 45.411016 |
| }, |
| { |
| "acc": 0.81052856, |
| "epoch": 3.514442916093535, |
| "grad_norm": 1.766641616821289, |
| "learning_rate": 2.2264732935378485e-05, |
| "loss": 0.62573719, |
| "memory(GiB)": 72.17, |
| "step": 5110, |
| "train_speed(iter/s)": 26.201936 |
| }, |
| { |
| "acc": 0.81840916, |
| "epoch": 3.5178817056396148, |
| "grad_norm": 2.0052237510681152, |
| "learning_rate": 2.217019989336754e-05, |
| "loss": 0.60661297, |
| "memory(GiB)": 72.17, |
| "step": 5115, |
| "train_speed(iter/s)": 19.601314 |
| }, |
| { |
| "acc": 0.81169033, |
| "epoch": 3.5213204951856945, |
| "grad_norm": 1.8747566938400269, |
| "learning_rate": 2.2075810855346627e-05, |
| "loss": 0.6164432, |
| "memory(GiB)": 72.17, |
| "step": 5120, |
| "train_speed(iter/s)": 15.593037 |
| }, |
| { |
| "acc": 0.81197557, |
| "epoch": 3.5247592847317746, |
| "grad_norm": 1.8955270051956177, |
| "learning_rate": 2.1981566309639646e-05, |
| "loss": 0.63830528, |
| "memory(GiB)": 72.17, |
| "step": 5125, |
| "train_speed(iter/s)": 13.017298 |
| }, |
| { |
| "acc": 0.80265837, |
| "epoch": 3.5281980742778543, |
| "grad_norm": 1.9690247774124146, |
| "learning_rate": 2.1887466743822955e-05, |
| "loss": 0.66069555, |
| "memory(GiB)": 72.17, |
| "step": 5130, |
| "train_speed(iter/s)": 10.901481 |
| }, |
| { |
| "acc": 0.80450611, |
| "epoch": 3.531636863823934, |
| "grad_norm": 1.9993948936462402, |
| "learning_rate": 2.1793512644722865e-05, |
| "loss": 0.66204972, |
| "memory(GiB)": 72.23, |
| "step": 5135, |
| "train_speed(iter/s)": 9.338386 |
| }, |
| { |
| "acc": 0.80088081, |
| "epoch": 3.5350756533700136, |
| "grad_norm": 2.0130441188812256, |
| "learning_rate": 2.1699704498413108e-05, |
| "loss": 0.67445641, |
| "memory(GiB)": 72.23, |
| "step": 5140, |
| "train_speed(iter/s)": 8.21847 |
| }, |
| { |
| "acc": 0.80354471, |
| "epoch": 3.5385144429160933, |
| "grad_norm": 1.8594011068344116, |
| "learning_rate": 2.1606042790212308e-05, |
| "loss": 0.6569746, |
| "memory(GiB)": 72.28, |
| "step": 5145, |
| "train_speed(iter/s)": 7.396472 |
| }, |
| { |
| "acc": 0.817062, |
| "epoch": 3.5419532324621734, |
| "grad_norm": 1.8149155378341675, |
| "learning_rate": 2.1512528004681535e-05, |
| "loss": 0.63296041, |
| "memory(GiB)": 72.7, |
| "step": 5150, |
| "train_speed(iter/s)": 6.704915 |
| }, |
| { |
| "acc": 0.81256504, |
| "epoch": 3.545392022008253, |
| "grad_norm": 1.9153436422348022, |
| "learning_rate": 2.1419160625621713e-05, |
| "loss": 0.6270606, |
| "memory(GiB)": 72.7, |
| "step": 5155, |
| "train_speed(iter/s)": 6.15874 |
| }, |
| { |
| "acc": 0.80941086, |
| "epoch": 3.5488308115543328, |
| "grad_norm": 1.6500003337860107, |
| "learning_rate": 2.1325941136071155e-05, |
| "loss": 0.63835382, |
| "memory(GiB)": 72.7, |
| "step": 5160, |
| "train_speed(iter/s)": 5.671284 |
| }, |
| { |
| "acc": 0.81136417, |
| "epoch": 3.552269601100413, |
| "grad_norm": 1.8858124017715454, |
| "learning_rate": 2.1232870018303073e-05, |
| "loss": 0.63752775, |
| "memory(GiB)": 72.7, |
| "step": 5165, |
| "train_speed(iter/s)": 5.219987 |
| }, |
| { |
| "acc": 0.80491219, |
| "epoch": 3.5557083906464926, |
| "grad_norm": 1.6921783685684204, |
| "learning_rate": 2.1139947753823062e-05, |
| "loss": 0.64572196, |
| "memory(GiB)": 72.7, |
| "step": 5170, |
| "train_speed(iter/s)": 4.872821 |
| }, |
| { |
| "acc": 0.81005411, |
| "epoch": 3.5591471801925723, |
| "grad_norm": 2.046410322189331, |
| "learning_rate": 2.104717482336666e-05, |
| "loss": 0.63220901, |
| "memory(GiB)": 72.7, |
| "step": 5175, |
| "train_speed(iter/s)": 4.560666 |
| }, |
| { |
| "acc": 0.81424847, |
| "epoch": 3.562585969738652, |
| "grad_norm": 1.6710875034332275, |
| "learning_rate": 2.095455170689679e-05, |
| "loss": 0.61837616, |
| "memory(GiB)": 72.7, |
| "step": 5180, |
| "train_speed(iter/s)": 4.309485 |
| }, |
| { |
| "acc": 0.80108767, |
| "epoch": 3.5660247592847316, |
| "grad_norm": 1.8342450857162476, |
| "learning_rate": 2.0862078883601306e-05, |
| "loss": 0.65744696, |
| "memory(GiB)": 72.7, |
| "step": 5185, |
| "train_speed(iter/s)": 4.065786 |
| }, |
| { |
| "acc": 0.81536474, |
| "epoch": 3.5694635488308117, |
| "grad_norm": 1.8400901556015015, |
| "learning_rate": 2.0769756831890517e-05, |
| "loss": 0.60680361, |
| "memory(GiB)": 72.7, |
| "step": 5190, |
| "train_speed(iter/s)": 3.855262 |
| }, |
| { |
| "acc": 0.80920811, |
| "epoch": 3.5729023383768914, |
| "grad_norm": 2.1009435653686523, |
| "learning_rate": 2.067758602939473e-05, |
| "loss": 0.617168, |
| "memory(GiB)": 72.7, |
| "step": 5195, |
| "train_speed(iter/s)": 3.680601 |
| }, |
| { |
| "acc": 0.81185656, |
| "epoch": 3.576341127922971, |
| "grad_norm": 1.8651849031448364, |
| "learning_rate": 2.058556695296173e-05, |
| "loss": 0.62884312, |
| "memory(GiB)": 72.7, |
| "step": 5200, |
| "train_speed(iter/s)": 3.496681 |
| }, |
| { |
| "epoch": 3.576341127922971, |
| "eval_acc": 0.7778602600440905, |
| "eval_loss": 0.7939268350601196, |
| "eval_runtime": 1134.0299, |
| "eval_samples_per_second": 3.777, |
| "eval_steps_per_second": 0.068, |
| "step": 5200 |
| }, |
| { |
| "acc": 0.80170975, |
| "epoch": 3.5797799174690508, |
| "grad_norm": 1.989461064338684, |
| "learning_rate": 2.0493700078654395e-05, |
| "loss": 0.65876365, |
| "memory(GiB)": 72.7, |
| "step": 5205, |
| "train_speed(iter/s)": 1.934041 |
| }, |
| { |
| "acc": 0.81692247, |
| "epoch": 3.5832187070151305, |
| "grad_norm": 1.9536714553833008, |
| "learning_rate": 2.040198588174813e-05, |
| "loss": 0.60520372, |
| "memory(GiB)": 67.62, |
| "step": 5210, |
| "train_speed(iter/s)": 1.887787 |
| }, |
| { |
| "acc": 0.81152821, |
| "epoch": 3.5866574965612106, |
| "grad_norm": 1.9955531358718872, |
| "learning_rate": 2.0310424836728494e-05, |
| "loss": 0.64239225, |
| "memory(GiB)": 67.62, |
| "step": 5215, |
| "train_speed(iter/s)": 1.844883 |
| }, |
| { |
| "acc": 0.81309061, |
| "epoch": 3.5900962861072903, |
| "grad_norm": 1.7581534385681152, |
| "learning_rate": 2.0219017417288675e-05, |
| "loss": 0.62655144, |
| "memory(GiB)": 67.62, |
| "step": 5220, |
| "train_speed(iter/s)": 1.804023 |
| }, |
| { |
| "acc": 0.79863563, |
| "epoch": 3.59353507565337, |
| "grad_norm": 2.349116086959839, |
| "learning_rate": 2.0127764096327113e-05, |
| "loss": 0.6843668, |
| "memory(GiB)": 67.62, |
| "step": 5225, |
| "train_speed(iter/s)": 1.756569 |
| }, |
| { |
| "acc": 0.82426891, |
| "epoch": 3.59697386519945, |
| "grad_norm": 1.8200994729995728, |
| "learning_rate": 2.0036665345945005e-05, |
| "loss": 0.57460217, |
| "memory(GiB)": 67.62, |
| "step": 5230, |
| "train_speed(iter/s)": 1.719852 |
| }, |
| { |
| "acc": 0.81199923, |
| "epoch": 3.6004126547455297, |
| "grad_norm": 1.760864019393921, |
| "learning_rate": 1.9945721637443855e-05, |
| "loss": 0.63763566, |
| "memory(GiB)": 67.62, |
| "step": 5235, |
| "train_speed(iter/s)": 1.68241 |
| }, |
| { |
| "acc": 0.81704388, |
| "epoch": 3.6038514442916094, |
| "grad_norm": 1.843873143196106, |
| "learning_rate": 1.9854933441323074e-05, |
| "loss": 0.61490622, |
| "memory(GiB)": 67.62, |
| "step": 5240, |
| "train_speed(iter/s)": 1.645642 |
| }, |
| { |
| "acc": 0.80201912, |
| "epoch": 3.607290233837689, |
| "grad_norm": 1.963784098625183, |
| "learning_rate": 1.9764301227277503e-05, |
| "loss": 0.64649305, |
| "memory(GiB)": 67.62, |
| "step": 5245, |
| "train_speed(iter/s)": 1.609859 |
| }, |
| { |
| "acc": 0.79966879, |
| "epoch": 3.6107290233837688, |
| "grad_norm": 2.0832812786102295, |
| "learning_rate": 1.9673825464195065e-05, |
| "loss": 0.68630571, |
| "memory(GiB)": 67.62, |
| "step": 5250, |
| "train_speed(iter/s)": 1.577467 |
| }, |
| { |
| "acc": 0.80603333, |
| "epoch": 3.614167812929849, |
| "grad_norm": 2.1369543075561523, |
| "learning_rate": 1.9583506620154203e-05, |
| "loss": 0.65753994, |
| "memory(GiB)": 67.62, |
| "step": 5255, |
| "train_speed(iter/s)": 1.547258 |
| }, |
| { |
| "acc": 0.80580025, |
| "epoch": 3.6176066024759286, |
| "grad_norm": 1.853987455368042, |
| "learning_rate": 1.9493345162421595e-05, |
| "loss": 0.65103807, |
| "memory(GiB)": 67.62, |
| "step": 5260, |
| "train_speed(iter/s)": 1.514916 |
| }, |
| { |
| "acc": 0.81311512, |
| "epoch": 3.6210453920220083, |
| "grad_norm": 2.1064698696136475, |
| "learning_rate": 1.9403341557449614e-05, |
| "loss": 0.61463804, |
| "memory(GiB)": 67.62, |
| "step": 5265, |
| "train_speed(iter/s)": 1.48572 |
| }, |
| { |
| "acc": 0.79921217, |
| "epoch": 3.624484181568088, |
| "grad_norm": 1.9443074464797974, |
| "learning_rate": 1.9313496270874065e-05, |
| "loss": 0.67477508, |
| "memory(GiB)": 67.62, |
| "step": 5270, |
| "train_speed(iter/s)": 1.458339 |
| }, |
| { |
| "acc": 0.81266232, |
| "epoch": 3.6279229711141676, |
| "grad_norm": 1.8594951629638672, |
| "learning_rate": 1.9223809767511622e-05, |
| "loss": 0.62132969, |
| "memory(GiB)": 67.62, |
| "step": 5275, |
| "train_speed(iter/s)": 1.430819 |
| }, |
| { |
| "acc": 0.81770267, |
| "epoch": 3.6313617606602477, |
| "grad_norm": 1.726508617401123, |
| "learning_rate": 1.913428251135751e-05, |
| "loss": 0.59776912, |
| "memory(GiB)": 67.62, |
| "step": 5280, |
| "train_speed(iter/s)": 1.404985 |
| }, |
| { |
| "acc": 0.81523685, |
| "epoch": 3.6348005502063274, |
| "grad_norm": 1.8356785774230957, |
| "learning_rate": 1.904491496558308e-05, |
| "loss": 0.62854185, |
| "memory(GiB)": 67.62, |
| "step": 5285, |
| "train_speed(iter/s)": 1.381204 |
| }, |
| { |
| "acc": 0.81304836, |
| "epoch": 3.638239339752407, |
| "grad_norm": 2.129279136657715, |
| "learning_rate": 1.8955707592533422e-05, |
| "loss": 0.62155433, |
| "memory(GiB)": 67.62, |
| "step": 5290, |
| "train_speed(iter/s)": 1.359057 |
| }, |
| { |
| "acc": 0.80884018, |
| "epoch": 3.6416781292984868, |
| "grad_norm": 1.8221231698989868, |
| "learning_rate": 1.8866660853724986e-05, |
| "loss": 0.63217707, |
| "memory(GiB)": 67.62, |
| "step": 5295, |
| "train_speed(iter/s)": 1.33386 |
| }, |
| { |
| "acc": 0.81211977, |
| "epoch": 3.6451169188445665, |
| "grad_norm": 1.8867233991622925, |
| "learning_rate": 1.8777775209843136e-05, |
| "loss": 0.62917542, |
| "memory(GiB)": 67.62, |
| "step": 5300, |
| "train_speed(iter/s)": 1.313671 |
| }, |
| { |
| "epoch": 3.6451169188445665, |
| "eval_acc": 0.7788275520763036, |
| "eval_loss": 0.791822075843811, |
| "eval_runtime": 1053.4964, |
| "eval_samples_per_second": 4.066, |
| "eval_steps_per_second": 0.073, |
| "step": 5300 |
| }, |
| { |
| "acc": 0.81195221, |
| "epoch": 3.6485557083906466, |
| "grad_norm": 1.8668956756591797, |
| "learning_rate": 1.868905112073983e-05, |
| "loss": 0.63313217, |
| "memory(GiB)": 67.62, |
| "step": 5305, |
| "train_speed(iter/s)": 1.028242 |
| }, |
| { |
| "acc": 0.80295448, |
| "epoch": 3.6519944979367263, |
| "grad_norm": 1.8957765102386475, |
| "learning_rate": 1.8600489045431255e-05, |
| "loss": 0.6641448, |
| "memory(GiB)": 67.62, |
| "step": 5310, |
| "train_speed(iter/s)": 1.011268 |
| }, |
| { |
| "acc": 0.81735973, |
| "epoch": 3.655433287482806, |
| "grad_norm": 1.843002438545227, |
| "learning_rate": 1.851208944209535e-05, |
| "loss": 0.60693998, |
| "memory(GiB)": 67.62, |
| "step": 5315, |
| "train_speed(iter/s)": 0.999134 |
| }, |
| { |
| "acc": 0.80702572, |
| "epoch": 3.658872077028886, |
| "grad_norm": 1.8155903816223145, |
| "learning_rate": 1.8423852768069548e-05, |
| "loss": 0.65699286, |
| "memory(GiB)": 67.62, |
| "step": 5320, |
| "train_speed(iter/s)": 0.984903 |
| }, |
| { |
| "acc": 0.80685482, |
| "epoch": 3.6623108665749657, |
| "grad_norm": 2.5197625160217285, |
| "learning_rate": 1.8335779479848343e-05, |
| "loss": 0.64485803, |
| "memory(GiB)": 67.62, |
| "step": 5325, |
| "train_speed(iter/s)": 0.97156 |
| }, |
| { |
| "acc": 0.80205936, |
| "epoch": 3.6657496561210454, |
| "grad_norm": 2.0389351844787598, |
| "learning_rate": 1.8247870033080946e-05, |
| "loss": 0.66550064, |
| "memory(GiB)": 67.62, |
| "step": 5330, |
| "train_speed(iter/s)": 0.958682 |
| }, |
| { |
| "acc": 0.79223623, |
| "epoch": 3.669188445667125, |
| "grad_norm": 1.8488144874572754, |
| "learning_rate": 1.8160124882568932e-05, |
| "loss": 0.69218178, |
| "memory(GiB)": 67.62, |
| "step": 5335, |
| "train_speed(iter/s)": 0.946767 |
| }, |
| { |
| "acc": 0.80992165, |
| "epoch": 3.6726272352132048, |
| "grad_norm": 1.701180338859558, |
| "learning_rate": 1.8072544482263918e-05, |
| "loss": 0.63368897, |
| "memory(GiB)": 67.62, |
| "step": 5340, |
| "train_speed(iter/s)": 0.934657 |
| }, |
| { |
| "acc": 0.81273346, |
| "epoch": 3.676066024759285, |
| "grad_norm": 1.870936632156372, |
| "learning_rate": 1.798512928526514e-05, |
| "loss": 0.62342134, |
| "memory(GiB)": 67.62, |
| "step": 5345, |
| "train_speed(iter/s)": 0.922237 |
| }, |
| { |
| "acc": 0.81911898, |
| "epoch": 3.6795048143053646, |
| "grad_norm": 2.1875438690185547, |
| "learning_rate": 1.789787974381717e-05, |
| "loss": 0.60667896, |
| "memory(GiB)": 67.62, |
| "step": 5350, |
| "train_speed(iter/s)": 0.910869 |
| }, |
| { |
| "acc": 0.81839104, |
| "epoch": 3.6829436038514443, |
| "grad_norm": 1.9975168704986572, |
| "learning_rate": 1.7810796309307553e-05, |
| "loss": 0.61631479, |
| "memory(GiB)": 67.62, |
| "step": 5355, |
| "train_speed(iter/s)": 0.898788 |
| }, |
| { |
| "acc": 0.79996266, |
| "epoch": 3.686382393397524, |
| "grad_norm": 2.1356396675109863, |
| "learning_rate": 1.7723879432264454e-05, |
| "loss": 0.65718513, |
| "memory(GiB)": 67.62, |
| "step": 5360, |
| "train_speed(iter/s)": 0.888306 |
| }, |
| { |
| "acc": 0.81604223, |
| "epoch": 3.6898211829436036, |
| "grad_norm": 1.8731410503387451, |
| "learning_rate": 1.763712956235441e-05, |
| "loss": 0.62172794, |
| "memory(GiB)": 67.62, |
| "step": 5365, |
| "train_speed(iter/s)": 0.877305 |
| }, |
| { |
| "acc": 0.80391541, |
| "epoch": 3.6932599724896837, |
| "grad_norm": 2.0950632095336914, |
| "learning_rate": 1.7550547148379887e-05, |
| "loss": 0.66051216, |
| "memory(GiB)": 67.62, |
| "step": 5370, |
| "train_speed(iter/s)": 0.867559 |
| }, |
| { |
| "acc": 0.81647606, |
| "epoch": 3.6966987620357634, |
| "grad_norm": 1.7469427585601807, |
| "learning_rate": 1.7464132638277024e-05, |
| "loss": 0.61341143, |
| "memory(GiB)": 67.62, |
| "step": 5375, |
| "train_speed(iter/s)": 0.85733 |
| }, |
| { |
| "acc": 0.81001339, |
| "epoch": 3.700137551581843, |
| "grad_norm": 1.9832128286361694, |
| "learning_rate": 1.737788647911332e-05, |
| "loss": 0.63573427, |
| "memory(GiB)": 67.62, |
| "step": 5380, |
| "train_speed(iter/s)": 0.846179 |
| }, |
| { |
| "acc": 0.81297035, |
| "epoch": 3.703576341127923, |
| "grad_norm": 1.993898868560791, |
| "learning_rate": 1.72918091170853e-05, |
| "loss": 0.64141645, |
| "memory(GiB)": 67.62, |
| "step": 5385, |
| "train_speed(iter/s)": 0.837314 |
| }, |
| { |
| "acc": 0.81126728, |
| "epoch": 3.707015130674003, |
| "grad_norm": 1.8106107711791992, |
| "learning_rate": 1.72059009975162e-05, |
| "loss": 0.63114452, |
| "memory(GiB)": 67.62, |
| "step": 5390, |
| "train_speed(iter/s)": 0.827419 |
| }, |
| { |
| "acc": 0.80908537, |
| "epoch": 3.7104539202200826, |
| "grad_norm": 2.117880344390869, |
| "learning_rate": 1.71201625648537e-05, |
| "loss": 0.64524364, |
| "memory(GiB)": 67.62, |
| "step": 5395, |
| "train_speed(iter/s)": 0.818052 |
| }, |
| { |
| "acc": 0.80611687, |
| "epoch": 3.7138927097661623, |
| "grad_norm": 1.86283278465271, |
| "learning_rate": 1.7034594262667588e-05, |
| "loss": 0.65121384, |
| "memory(GiB)": 67.62, |
| "step": 5400, |
| "train_speed(iter/s)": 0.809997 |
| }, |
| { |
| "epoch": 3.7138927097661623, |
| "eval_acc": 0.7794068025374544, |
| "eval_loss": 0.7907042503356934, |
| "eval_runtime": 1176.8109, |
| "eval_samples_per_second": 3.639, |
| "eval_steps_per_second": 0.065, |
| "step": 5400 |
| }, |
| { |
| "acc": 0.80929089, |
| "epoch": 3.717331499312242, |
| "grad_norm": 2.175724983215332, |
| "learning_rate": 1.6949196533647456e-05, |
| "loss": 0.63896065, |
| "memory(GiB)": 67.62, |
| "step": 5405, |
| "train_speed(iter/s)": 0.682919 |
| }, |
| { |
| "acc": 0.80504618, |
| "epoch": 3.720770288858322, |
| "grad_norm": 1.7912895679473877, |
| "learning_rate": 1.6863969819600486e-05, |
| "loss": 0.6515821, |
| "memory(GiB)": 67.62, |
| "step": 5410, |
| "train_speed(iter/s)": 0.677418 |
| }, |
| { |
| "acc": 0.82038784, |
| "epoch": 3.7242090784044017, |
| "grad_norm": 1.9168109893798828, |
| "learning_rate": 1.6778914561449068e-05, |
| "loss": 0.60445056, |
| "memory(GiB)": 67.62, |
| "step": 5415, |
| "train_speed(iter/s)": 0.672424 |
| }, |
| { |
| "acc": 0.80365715, |
| "epoch": 3.7276478679504814, |
| "grad_norm": 2.0032663345336914, |
| "learning_rate": 1.669403119922857e-05, |
| "loss": 0.65206861, |
| "memory(GiB)": 67.62, |
| "step": 5420, |
| "train_speed(iter/s)": 0.666879 |
| }, |
| { |
| "acc": 0.81271191, |
| "epoch": 3.731086657496561, |
| "grad_norm": 1.7971467971801758, |
| "learning_rate": 1.660932017208504e-05, |
| "loss": 0.63001757, |
| "memory(GiB)": 67.62, |
| "step": 5425, |
| "train_speed(iter/s)": 0.661349 |
| }, |
| { |
| "acc": 0.80627918, |
| "epoch": 3.7345254470426408, |
| "grad_norm": 2.1404869556427, |
| "learning_rate": 1.6524781918272988e-05, |
| "loss": 0.65701981, |
| "memory(GiB)": 67.62, |
| "step": 5430, |
| "train_speed(iter/s)": 0.656674 |
| }, |
| { |
| "acc": 0.81756916, |
| "epoch": 3.737964236588721, |
| "grad_norm": 1.9488438367843628, |
| "learning_rate": 1.6440416875153035e-05, |
| "loss": 0.62909493, |
| "memory(GiB)": 67.62, |
| "step": 5435, |
| "train_speed(iter/s)": 0.651758 |
| }, |
| { |
| "acc": 0.81080599, |
| "epoch": 3.7414030261348006, |
| "grad_norm": 1.9031460285186768, |
| "learning_rate": 1.6356225479189706e-05, |
| "loss": 0.64159656, |
| "memory(GiB)": 67.62, |
| "step": 5440, |
| "train_speed(iter/s)": 0.647121 |
| }, |
| { |
| "acc": 0.80497589, |
| "epoch": 3.7448418156808803, |
| "grad_norm": 1.9063955545425415, |
| "learning_rate": 1.6272208165949165e-05, |
| "loss": 0.66333132, |
| "memory(GiB)": 67.62, |
| "step": 5445, |
| "train_speed(iter/s)": 0.641945 |
| }, |
| { |
| "acc": 0.81537628, |
| "epoch": 3.7482806052269604, |
| "grad_norm": 1.9544923305511475, |
| "learning_rate": 1.6188365370096938e-05, |
| "loss": 0.60649881, |
| "memory(GiB)": 67.62, |
| "step": 5450, |
| "train_speed(iter/s)": 0.637092 |
| }, |
| { |
| "acc": 0.81485357, |
| "epoch": 3.7517193947730396, |
| "grad_norm": 1.7963929176330566, |
| "learning_rate": 1.61046975253957e-05, |
| "loss": 0.62127781, |
| "memory(GiB)": 67.62, |
| "step": 5455, |
| "train_speed(iter/s)": 0.632455 |
| }, |
| { |
| "acc": 0.80606298, |
| "epoch": 3.7551581843191197, |
| "grad_norm": 1.91194748878479, |
| "learning_rate": 1.6021205064703e-05, |
| "loss": 0.6456295, |
| "memory(GiB)": 67.62, |
| "step": 5460, |
| "train_speed(iter/s)": 0.627638 |
| }, |
| { |
| "acc": 0.80508499, |
| "epoch": 3.7585969738651994, |
| "grad_norm": 1.6945174932479858, |
| "learning_rate": 1.593788841996904e-05, |
| "loss": 0.64310069, |
| "memory(GiB)": 67.62, |
| "step": 5465, |
| "train_speed(iter/s)": 0.622963 |
| }, |
| { |
| "acc": 0.82610073, |
| "epoch": 3.762035763411279, |
| "grad_norm": 1.6806504726409912, |
| "learning_rate": 1.5854748022234422e-05, |
| "loss": 0.57846365, |
| "memory(GiB)": 67.62, |
| "step": 5470, |
| "train_speed(iter/s)": 0.619076 |
| }, |
| { |
| "acc": 0.80571289, |
| "epoch": 3.7654745529573592, |
| "grad_norm": 1.9896758794784546, |
| "learning_rate": 1.5771784301627968e-05, |
| "loss": 0.64995089, |
| "memory(GiB)": 67.62, |
| "step": 5475, |
| "train_speed(iter/s)": 0.614402 |
| }, |
| { |
| "acc": 0.81692181, |
| "epoch": 3.768913342503439, |
| "grad_norm": 1.7746247053146362, |
| "learning_rate": 1.5688997687364408e-05, |
| "loss": 0.61731248, |
| "memory(GiB)": 67.62, |
| "step": 5480, |
| "train_speed(iter/s)": 0.610536 |
| }, |
| { |
| "acc": 0.79758596, |
| "epoch": 3.7723521320495186, |
| "grad_norm": 1.9613304138183594, |
| "learning_rate": 1.560638860774223e-05, |
| "loss": 0.66896119, |
| "memory(GiB)": 67.62, |
| "step": 5485, |
| "train_speed(iter/s)": 0.605861 |
| }, |
| { |
| "acc": 0.80816298, |
| "epoch": 3.7757909215955983, |
| "grad_norm": 1.7979682683944702, |
| "learning_rate": 1.552395749014145e-05, |
| "loss": 0.64903908, |
| "memory(GiB)": 67.62, |
| "step": 5490, |
| "train_speed(iter/s)": 0.601267 |
| }, |
| { |
| "acc": 0.81845226, |
| "epoch": 3.779229711141678, |
| "grad_norm": 1.610510230064392, |
| "learning_rate": 1.5441704761021365e-05, |
| "loss": 0.61122522, |
| "memory(GiB)": 67.62, |
| "step": 5495, |
| "train_speed(iter/s)": 0.59685 |
| }, |
| { |
| "acc": 0.80607834, |
| "epoch": 3.782668500687758, |
| "grad_norm": 1.8088189363479614, |
| "learning_rate": 1.535963084591842e-05, |
| "loss": 0.6456028, |
| "memory(GiB)": 67.62, |
| "step": 5500, |
| "train_speed(iter/s)": 0.59234 |
| }, |
| { |
| "epoch": 3.782668500687758, |
| "eval_acc": 0.7795867638457732, |
| "eval_loss": 0.7851858735084534, |
| "eval_runtime": 1107.2216, |
| "eval_samples_per_second": 3.868, |
| "eval_steps_per_second": 0.07, |
| "step": 5500 |
| }, |
| { |
| "acc": 0.80722027, |
| "epoch": 3.7861072902338377, |
| "grad_norm": 1.7930651903152466, |
| "learning_rate": 1.527773616944393e-05, |
| "loss": 0.65197091, |
| "memory(GiB)": 67.62, |
| "step": 5505, |
| "train_speed(iter/s)": 0.525895 |
| }, |
| { |
| "acc": 0.80907288, |
| "epoch": 3.7895460797799174, |
| "grad_norm": 1.873205542564392, |
| "learning_rate": 1.519602115528191e-05, |
| "loss": 0.63936815, |
| "memory(GiB)": 67.62, |
| "step": 5510, |
| "train_speed(iter/s)": 0.522195 |
| }, |
| { |
| "acc": 0.81462736, |
| "epoch": 3.792984869325997, |
| "grad_norm": 2.1219732761383057, |
| "learning_rate": 1.5114486226186914e-05, |
| "loss": 0.63517313, |
| "memory(GiB)": 67.62, |
| "step": 5515, |
| "train_speed(iter/s)": 0.518863 |
| }, |
| { |
| "acc": 0.81379719, |
| "epoch": 3.796423658872077, |
| "grad_norm": 1.8798179626464844, |
| "learning_rate": 1.5033131803981795e-05, |
| "loss": 0.6165091, |
| "memory(GiB)": 67.62, |
| "step": 5520, |
| "train_speed(iter/s)": 0.516156 |
| }, |
| { |
| "acc": 0.80504332, |
| "epoch": 3.799862448418157, |
| "grad_norm": 2.1897356510162354, |
| "learning_rate": 1.495195830955555e-05, |
| "loss": 0.65493903, |
| "memory(GiB)": 67.62, |
| "step": 5525, |
| "train_speed(iter/s)": 0.512721 |
| }, |
| { |
| "acc": 0.79971151, |
| "epoch": 3.8033012379642366, |
| "grad_norm": 2.3374557495117188, |
| "learning_rate": 1.4870966162861185e-05, |
| "loss": 0.66825953, |
| "memory(GiB)": 67.62, |
| "step": 5530, |
| "train_speed(iter/s)": 0.509778 |
| }, |
| { |
| "acc": 0.81023417, |
| "epoch": 3.8067400275103163, |
| "grad_norm": 2.0296730995178223, |
| "learning_rate": 1.4790155782913446e-05, |
| "loss": 0.6293088, |
| "memory(GiB)": 67.62, |
| "step": 5535, |
| "train_speed(iter/s)": 0.506899 |
| }, |
| { |
| "acc": 0.81744757, |
| "epoch": 3.8101788170563964, |
| "grad_norm": 2.1950666904449463, |
| "learning_rate": 1.4709527587786729e-05, |
| "loss": 0.60644913, |
| "memory(GiB)": 67.62, |
| "step": 5540, |
| "train_speed(iter/s)": 0.504436 |
| }, |
| { |
| "acc": 0.80740032, |
| "epoch": 3.813617606602476, |
| "grad_norm": 1.870073676109314, |
| "learning_rate": 1.4629081994612883e-05, |
| "loss": 0.65674248, |
| "memory(GiB)": 67.62, |
| "step": 5545, |
| "train_speed(iter/s)": 0.501407 |
| }, |
| { |
| "acc": 0.82541618, |
| "epoch": 3.8170563961485557, |
| "grad_norm": 1.814864993095398, |
| "learning_rate": 1.4548819419579082e-05, |
| "loss": 0.59056787, |
| "memory(GiB)": 67.62, |
| "step": 5550, |
| "train_speed(iter/s)": 0.49869 |
| }, |
| { |
| "acc": 0.79932752, |
| "epoch": 3.8204951856946354, |
| "grad_norm": 2.118622303009033, |
| "learning_rate": 1.4468740277925627e-05, |
| "loss": 0.67586517, |
| "memory(GiB)": 67.62, |
| "step": 5555, |
| "train_speed(iter/s)": 0.496257 |
| }, |
| { |
| "acc": 0.80807095, |
| "epoch": 3.823933975240715, |
| "grad_norm": 2.1060431003570557, |
| "learning_rate": 1.4388844983943837e-05, |
| "loss": 0.64639549, |
| "memory(GiB)": 67.62, |
| "step": 5560, |
| "train_speed(iter/s)": 0.493446 |
| }, |
| { |
| "acc": 0.80921745, |
| "epoch": 3.8273727647867952, |
| "grad_norm": 1.9443578720092773, |
| "learning_rate": 1.430913395097388e-05, |
| "loss": 0.63667898, |
| "memory(GiB)": 67.62, |
| "step": 5565, |
| "train_speed(iter/s)": 0.490752 |
| }, |
| { |
| "acc": 0.82145481, |
| "epoch": 3.830811554332875, |
| "grad_norm": 2.1125001907348633, |
| "learning_rate": 1.4229607591402635e-05, |
| "loss": 0.58884673, |
| "memory(GiB)": 67.62, |
| "step": 5570, |
| "train_speed(iter/s)": 0.487797 |
| }, |
| { |
| "acc": 0.82449379, |
| "epoch": 3.8342503438789546, |
| "grad_norm": 1.7490825653076172, |
| "learning_rate": 1.4150266316661623e-05, |
| "loss": 0.60519004, |
| "memory(GiB)": 67.62, |
| "step": 5575, |
| "train_speed(iter/s)": 0.485346 |
| }, |
| { |
| "acc": 0.81672592, |
| "epoch": 3.8376891334250343, |
| "grad_norm": 1.6957894563674927, |
| "learning_rate": 1.407111053722477e-05, |
| "loss": 0.61075163, |
| "memory(GiB)": 67.62, |
| "step": 5580, |
| "train_speed(iter/s)": 0.482577 |
| }, |
| { |
| "acc": 0.79658046, |
| "epoch": 3.841127922971114, |
| "grad_norm": 2.5719101428985596, |
| "learning_rate": 1.3992140662606357e-05, |
| "loss": 0.67197566, |
| "memory(GiB)": 67.62, |
| "step": 5585, |
| "train_speed(iter/s)": 0.480134 |
| }, |
| { |
| "acc": 0.82320576, |
| "epoch": 3.844566712517194, |
| "grad_norm": 2.0491995811462402, |
| "learning_rate": 1.3913357101358865e-05, |
| "loss": 0.59475327, |
| "memory(GiB)": 67.62, |
| "step": 5590, |
| "train_speed(iter/s)": 0.477933 |
| }, |
| { |
| "acc": 0.81174135, |
| "epoch": 3.8480055020632737, |
| "grad_norm": 1.8027801513671875, |
| "learning_rate": 1.3834760261070908e-05, |
| "loss": 0.63737803, |
| "memory(GiB)": 67.62, |
| "step": 5595, |
| "train_speed(iter/s)": 0.475151 |
| }, |
| { |
| "acc": 0.81277132, |
| "epoch": 3.8514442916093534, |
| "grad_norm": 2.0076797008514404, |
| "learning_rate": 1.3756350548365069e-05, |
| "loss": 0.64119816, |
| "memory(GiB)": 67.62, |
| "step": 5600, |
| "train_speed(iter/s)": 0.472763 |
| }, |
| { |
| "epoch": 3.8514442916093534, |
| "eval_acc": 0.7802222522157736, |
| "eval_loss": 0.7851279377937317, |
| "eval_runtime": 1153.3756, |
| "eval_samples_per_second": 3.713, |
| "eval_steps_per_second": 0.067, |
| "step": 5600 |
| }, |
| { |
| "acc": 0.79907894, |
| "epoch": 3.8548830811554335, |
| "grad_norm": 2.066263437271118, |
| "learning_rate": 1.3678128368895824e-05, |
| "loss": 0.66954241, |
| "memory(GiB)": 67.62, |
| "step": 5605, |
| "train_speed(iter/s)": 0.428917 |
| }, |
| { |
| "acc": 0.81123543, |
| "epoch": 3.8583218707015132, |
| "grad_norm": 1.8212432861328125, |
| "learning_rate": 1.3600094127347462e-05, |
| "loss": 0.64494739, |
| "memory(GiB)": 67.62, |
| "step": 5610, |
| "train_speed(iter/s)": 0.427046 |
| }, |
| { |
| "acc": 0.8146841, |
| "epoch": 3.861760660247593, |
| "grad_norm": 1.99728524684906, |
| "learning_rate": 1.3522248227431972e-05, |
| "loss": 0.61559277, |
| "memory(GiB)": 67.62, |
| "step": 5615, |
| "train_speed(iter/s)": 0.42489 |
| }, |
| { |
| "acc": 0.8208971, |
| "epoch": 3.8651994497936726, |
| "grad_norm": 2.206382989883423, |
| "learning_rate": 1.3444591071886931e-05, |
| "loss": 0.61427069, |
| "memory(GiB)": 67.62, |
| "step": 5620, |
| "train_speed(iter/s)": 0.423148 |
| }, |
| { |
| "acc": 0.81454735, |
| "epoch": 3.8686382393397523, |
| "grad_norm": 1.812099575996399, |
| "learning_rate": 1.3367123062473446e-05, |
| "loss": 0.62899446, |
| "memory(GiB)": 67.62, |
| "step": 5625, |
| "train_speed(iter/s)": 0.421338 |
| }, |
| { |
| "acc": 0.81133175, |
| "epoch": 3.8720770288858324, |
| "grad_norm": 1.8373388051986694, |
| "learning_rate": 1.328984459997408e-05, |
| "loss": 0.63102517, |
| "memory(GiB)": 67.62, |
| "step": 5630, |
| "train_speed(iter/s)": 0.419334 |
| }, |
| { |
| "acc": 0.80838804, |
| "epoch": 3.875515818431912, |
| "grad_norm": 1.7026041746139526, |
| "learning_rate": 1.3212756084190767e-05, |
| "loss": 0.63373623, |
| "memory(GiB)": 67.62, |
| "step": 5635, |
| "train_speed(iter/s)": 0.417529 |
| }, |
| { |
| "acc": 0.81233072, |
| "epoch": 3.8789546079779917, |
| "grad_norm": 1.80439293384552, |
| "learning_rate": 1.313585791394274e-05, |
| "loss": 0.63350501, |
| "memory(GiB)": 67.62, |
| "step": 5640, |
| "train_speed(iter/s)": 0.415882 |
| }, |
| { |
| "acc": 0.80420494, |
| "epoch": 3.8823933975240714, |
| "grad_norm": 1.835792064666748, |
| "learning_rate": 1.3059150487064497e-05, |
| "loss": 0.64182324, |
| "memory(GiB)": 67.62, |
| "step": 5645, |
| "train_speed(iter/s)": 0.413931 |
| }, |
| { |
| "acc": 0.82478485, |
| "epoch": 3.885832187070151, |
| "grad_norm": 2.1019296646118164, |
| "learning_rate": 1.2982634200403704e-05, |
| "loss": 0.57977004, |
| "memory(GiB)": 67.62, |
| "step": 5650, |
| "train_speed(iter/s)": 0.41225 |
| }, |
| { |
| "acc": 0.82099762, |
| "epoch": 3.8892709766162312, |
| "grad_norm": 1.7367315292358398, |
| "learning_rate": 1.2906309449819154e-05, |
| "loss": 0.60107656, |
| "memory(GiB)": 67.62, |
| "step": 5655, |
| "train_speed(iter/s)": 0.410363 |
| }, |
| { |
| "acc": 0.81159325, |
| "epoch": 3.892709766162311, |
| "grad_norm": 2.0694830417633057, |
| "learning_rate": 1.2830176630178729e-05, |
| "loss": 0.61608582, |
| "memory(GiB)": 67.62, |
| "step": 5660, |
| "train_speed(iter/s)": 0.408617 |
| }, |
| { |
| "acc": 0.81860466, |
| "epoch": 3.8961485557083906, |
| "grad_norm": 1.9818027019500732, |
| "learning_rate": 1.2754236135357367e-05, |
| "loss": 0.60277052, |
| "memory(GiB)": 67.62, |
| "step": 5665, |
| "train_speed(iter/s)": 0.40679 |
| }, |
| { |
| "acc": 0.8118084, |
| "epoch": 3.8995873452544703, |
| "grad_norm": 1.893306016921997, |
| "learning_rate": 1.2678488358234992e-05, |
| "loss": 0.64575768, |
| "memory(GiB)": 67.62, |
| "step": 5670, |
| "train_speed(iter/s)": 0.405098 |
| }, |
| { |
| "acc": 0.80856295, |
| "epoch": 3.90302613480055, |
| "grad_norm": 1.9855684041976929, |
| "learning_rate": 1.2602933690694502e-05, |
| "loss": 0.65475564, |
| "memory(GiB)": 67.62, |
| "step": 5675, |
| "train_speed(iter/s)": 0.403391 |
| }, |
| { |
| "acc": 0.82089319, |
| "epoch": 3.90646492434663, |
| "grad_norm": 1.8527436256408691, |
| "learning_rate": 1.2527572523619729e-05, |
| "loss": 0.59858413, |
| "memory(GiB)": 67.62, |
| "step": 5680, |
| "train_speed(iter/s)": 0.401656 |
| }, |
| { |
| "acc": 0.81135502, |
| "epoch": 3.9099037138927097, |
| "grad_norm": 1.8112705945968628, |
| "learning_rate": 1.245240524689345e-05, |
| "loss": 0.640869, |
| "memory(GiB)": 67.62, |
| "step": 5685, |
| "train_speed(iter/s)": 0.399915 |
| }, |
| { |
| "acc": 0.81638031, |
| "epoch": 3.9133425034387894, |
| "grad_norm": 1.88164222240448, |
| "learning_rate": 1.2377432249395323e-05, |
| "loss": 0.62925024, |
| "memory(GiB)": 67.62, |
| "step": 5690, |
| "train_speed(iter/s)": 0.398338 |
| }, |
| { |
| "acc": 0.81665897, |
| "epoch": 3.9167812929848695, |
| "grad_norm": 2.2220370769500732, |
| "learning_rate": 1.2302653918999902e-05, |
| "loss": 0.61042566, |
| "memory(GiB)": 67.62, |
| "step": 5695, |
| "train_speed(iter/s)": 0.396628 |
| }, |
| { |
| "acc": 0.81146564, |
| "epoch": 3.9202200825309492, |
| "grad_norm": 2.2208054065704346, |
| "learning_rate": 1.2228070642574637e-05, |
| "loss": 0.62549958, |
| "memory(GiB)": 67.62, |
| "step": 5700, |
| "train_speed(iter/s)": 0.394844 |
| }, |
| { |
| "epoch": 3.9202200825309492, |
| "eval_acc": 0.7811276825482522, |
| "eval_loss": 0.783173680305481, |
| "eval_runtime": 1145.264, |
| "eval_samples_per_second": 3.74, |
| "eval_steps_per_second": 0.067, |
| "step": 5700 |
| }, |
| { |
| "acc": 0.82111177, |
| "epoch": 3.923658872077029, |
| "grad_norm": 2.1147096157073975, |
| "learning_rate": 1.2153682805977849e-05, |
| "loss": 0.61029615, |
| "memory(GiB)": 67.62, |
| "step": 5705, |
| "train_speed(iter/s)": 0.364527 |
| }, |
| { |
| "acc": 0.81873646, |
| "epoch": 3.9270976616231086, |
| "grad_norm": 1.870378851890564, |
| "learning_rate": 1.2079490794056745e-05, |
| "loss": 0.60247025, |
| "memory(GiB)": 67.62, |
| "step": 5710, |
| "train_speed(iter/s)": 0.363318 |
| }, |
| { |
| "acc": 0.8050211, |
| "epoch": 3.9305364511691883, |
| "grad_norm": 2.061549663543701, |
| "learning_rate": 1.2005494990645446e-05, |
| "loss": 0.64639635, |
| "memory(GiB)": 67.62, |
| "step": 5715, |
| "train_speed(iter/s)": 0.361877 |
| }, |
| { |
| "acc": 0.80292168, |
| "epoch": 3.9339752407152684, |
| "grad_norm": 2.088428020477295, |
| "learning_rate": 1.1931695778562984e-05, |
| "loss": 0.66072493, |
| "memory(GiB)": 67.62, |
| "step": 5720, |
| "train_speed(iter/s)": 0.360551 |
| }, |
| { |
| "acc": 0.81333771, |
| "epoch": 3.937414030261348, |
| "grad_norm": 2.195223093032837, |
| "learning_rate": 1.1858093539611302e-05, |
| "loss": 0.62468419, |
| "memory(GiB)": 67.62, |
| "step": 5725, |
| "train_speed(iter/s)": 0.359367 |
| }, |
| { |
| "acc": 0.80784473, |
| "epoch": 3.9408528198074277, |
| "grad_norm": 2.1771881580352783, |
| "learning_rate": 1.1784688654573306e-05, |
| "loss": 0.6561008, |
| "memory(GiB)": 67.62, |
| "step": 5730, |
| "train_speed(iter/s)": 0.35807 |
| }, |
| { |
| "acc": 0.81136761, |
| "epoch": 3.9442916093535074, |
| "grad_norm": 1.9094853401184082, |
| "learning_rate": 1.1711481503210884e-05, |
| "loss": 0.63656788, |
| "memory(GiB)": 67.62, |
| "step": 5735, |
| "train_speed(iter/s)": 0.356785 |
| }, |
| { |
| "acc": 0.81901407, |
| "epoch": 3.947730398899587, |
| "grad_norm": 1.9423341751098633, |
| "learning_rate": 1.1638472464262948e-05, |
| "loss": 0.61632404, |
| "memory(GiB)": 67.62, |
| "step": 5740, |
| "train_speed(iter/s)": 0.355531 |
| }, |
| { |
| "acc": 0.81649647, |
| "epoch": 3.9511691884456672, |
| "grad_norm": 1.8900690078735352, |
| "learning_rate": 1.1565661915443475e-05, |
| "loss": 0.61735368, |
| "memory(GiB)": 67.62, |
| "step": 5745, |
| "train_speed(iter/s)": 0.354181 |
| }, |
| { |
| "acc": 0.80203295, |
| "epoch": 3.954607977991747, |
| "grad_norm": 1.9980183839797974, |
| "learning_rate": 1.1493050233439526e-05, |
| "loss": 0.66276655, |
| "memory(GiB)": 67.62, |
| "step": 5750, |
| "train_speed(iter/s)": 0.352959 |
| }, |
| { |
| "acc": 0.81161861, |
| "epoch": 3.9580467675378266, |
| "grad_norm": 1.8814200162887573, |
| "learning_rate": 1.1420637793909362e-05, |
| "loss": 0.64876308, |
| "memory(GiB)": 67.62, |
| "step": 5755, |
| "train_speed(iter/s)": 0.351641 |
| }, |
| { |
| "acc": 0.80822277, |
| "epoch": 3.9614855570839067, |
| "grad_norm": 2.157858371734619, |
| "learning_rate": 1.1348424971480429e-05, |
| "loss": 0.64273562, |
| "memory(GiB)": 67.62, |
| "step": 5760, |
| "train_speed(iter/s)": 0.350261 |
| }, |
| { |
| "acc": 0.81014862, |
| "epoch": 3.9649243466299864, |
| "grad_norm": 1.8866498470306396, |
| "learning_rate": 1.1276412139747452e-05, |
| "loss": 0.63060379, |
| "memory(GiB)": 67.62, |
| "step": 5765, |
| "train_speed(iter/s)": 0.34897 |
| }, |
| { |
| "acc": 0.80797882, |
| "epoch": 3.968363136176066, |
| "grad_norm": 2.011620283126831, |
| "learning_rate": 1.1204599671270494e-05, |
| "loss": 0.64154892, |
| "memory(GiB)": 67.62, |
| "step": 5770, |
| "train_speed(iter/s)": 0.347683 |
| }, |
| { |
| "acc": 0.81029825, |
| "epoch": 3.9718019257221457, |
| "grad_norm": 2.1030616760253906, |
| "learning_rate": 1.1132987937573052e-05, |
| "loss": 0.62338347, |
| "memory(GiB)": 67.62, |
| "step": 5775, |
| "train_speed(iter/s)": 0.346438 |
| }, |
| { |
| "acc": 0.7949429, |
| "epoch": 3.9752407152682254, |
| "grad_norm": 2.054006338119507, |
| "learning_rate": 1.1061577309140098e-05, |
| "loss": 0.70458865, |
| "memory(GiB)": 67.62, |
| "step": 5780, |
| "train_speed(iter/s)": 0.345233 |
| }, |
| { |
| "acc": 0.8113884, |
| "epoch": 3.9786795048143055, |
| "grad_norm": 2.072899103164673, |
| "learning_rate": 1.0990368155416202e-05, |
| "loss": 0.63724394, |
| "memory(GiB)": 67.62, |
| "step": 5785, |
| "train_speed(iter/s)": 0.344106 |
| }, |
| { |
| "acc": 0.81157551, |
| "epoch": 3.9821182943603852, |
| "grad_norm": 1.9557698965072632, |
| "learning_rate": 1.091936084480358e-05, |
| "loss": 0.62347059, |
| "memory(GiB)": 67.62, |
| "step": 5790, |
| "train_speed(iter/s)": 0.342935 |
| }, |
| { |
| "acc": 0.81167564, |
| "epoch": 3.985557083906465, |
| "grad_norm": 1.9136029481887817, |
| "learning_rate": 1.0848555744660215e-05, |
| "loss": 0.61960039, |
| "memory(GiB)": 67.62, |
| "step": 5795, |
| "train_speed(iter/s)": 0.341839 |
| }, |
| { |
| "acc": 0.83220634, |
| "epoch": 3.9889958734525446, |
| "grad_norm": 1.9021817445755005, |
| "learning_rate": 1.0777953221297932e-05, |
| "loss": 0.56068201, |
| "memory(GiB)": 67.62, |
| "step": 5800, |
| "train_speed(iter/s)": 0.340892 |
| }, |
| { |
| "epoch": 3.9889958734525446, |
| "eval_acc": 0.7817744185000225, |
| "eval_loss": 0.7804912328720093, |
| "eval_runtime": 1111.4903, |
| "eval_samples_per_second": 3.853, |
| "eval_steps_per_second": 0.069, |
| "step": 5800 |
| }, |
| { |
| "acc": 0.80734158, |
| "epoch": 3.9924346629986243, |
| "grad_norm": 2.02614688873291, |
| "learning_rate": 1.0707553639980585e-05, |
| "loss": 0.64991465, |
| "memory(GiB)": 67.62, |
| "step": 5805, |
| "train_speed(iter/s)": 0.319008 |
| }, |
| { |
| "acc": 0.82151909, |
| "epoch": 3.9958734525447044, |
| "grad_norm": 2.07773494720459, |
| "learning_rate": 1.0637357364922026e-05, |
| "loss": 0.58141608, |
| "memory(GiB)": 67.62, |
| "step": 5810, |
| "train_speed(iter/s)": 0.318131 |
| }, |
| { |
| "acc": 0.81823015, |
| "epoch": 3.999312242090784, |
| "grad_norm": 1.7081282138824463, |
| "learning_rate": 1.0567364759284327e-05, |
| "loss": 0.61670866, |
| "memory(GiB)": 67.62, |
| "step": 5815, |
| "train_speed(iter/s)": 0.317189 |
| }, |
| { |
| "acc": 0.82516012, |
| "epoch": 4.002751031636864, |
| "grad_norm": 1.9197478294372559, |
| "learning_rate": 1.0497576185175877e-05, |
| "loss": 0.57296357, |
| "memory(GiB)": 67.62, |
| "step": 5820, |
| "train_speed(iter/s)": 0.315614 |
| }, |
| { |
| "acc": 0.82848129, |
| "epoch": 4.006189821182944, |
| "grad_norm": 1.7602168321609497, |
| "learning_rate": 1.042799200364949e-05, |
| "loss": 0.57674851, |
| "memory(GiB)": 67.62, |
| "step": 5825, |
| "train_speed(iter/s)": 0.314514 |
| }, |
| { |
| "acc": 0.8312006, |
| "epoch": 4.009628610729023, |
| "grad_norm": 1.904069423675537, |
| "learning_rate": 1.0358612574700576e-05, |
| "loss": 0.58140912, |
| "memory(GiB)": 67.62, |
| "step": 5830, |
| "train_speed(iter/s)": 0.313493 |
| }, |
| { |
| "acc": 0.83973274, |
| "epoch": 4.013067400275103, |
| "grad_norm": 1.8969364166259766, |
| "learning_rate": 1.0289438257265218e-05, |
| "loss": 0.5239769, |
| "memory(GiB)": 67.62, |
| "step": 5835, |
| "train_speed(iter/s)": 0.3126 |
| }, |
| { |
| "acc": 0.82053461, |
| "epoch": 4.016506189821183, |
| "grad_norm": 2.004246950149536, |
| "learning_rate": 1.0220469409218385e-05, |
| "loss": 0.58951969, |
| "memory(GiB)": 67.62, |
| "step": 5840, |
| "train_speed(iter/s)": 0.311738 |
| }, |
| { |
| "acc": 0.82730808, |
| "epoch": 4.019944979367263, |
| "grad_norm": 2.120168685913086, |
| "learning_rate": 1.0151706387371993e-05, |
| "loss": 0.57174788, |
| "memory(GiB)": 67.62, |
| "step": 5845, |
| "train_speed(iter/s)": 0.310754 |
| }, |
| { |
| "acc": 0.81913013, |
| "epoch": 4.023383768913343, |
| "grad_norm": 2.083112955093384, |
| "learning_rate": 1.008314954747319e-05, |
| "loss": 0.60139389, |
| "memory(GiB)": 67.62, |
| "step": 5850, |
| "train_speed(iter/s)": 0.309795 |
| }, |
| { |
| "acc": 0.82976856, |
| "epoch": 4.026822558459422, |
| "grad_norm": 1.9144500494003296, |
| "learning_rate": 1.0014799244202362e-05, |
| "loss": 0.56441569, |
| "memory(GiB)": 67.62, |
| "step": 5855, |
| "train_speed(iter/s)": 0.308991 |
| }, |
| { |
| "acc": 0.82570667, |
| "epoch": 4.030261348005502, |
| "grad_norm": 1.9201669692993164, |
| "learning_rate": 9.94665583117142e-06, |
| "loss": 0.58550615, |
| "memory(GiB)": 67.62, |
| "step": 5860, |
| "train_speed(iter/s)": 0.308024 |
| }, |
| { |
| "acc": 0.82395906, |
| "epoch": 4.033700137551582, |
| "grad_norm": 2.058741807937622, |
| "learning_rate": 9.878719660921893e-06, |
| "loss": 0.59208636, |
| "memory(GiB)": 67.62, |
| "step": 5865, |
| "train_speed(iter/s)": 0.30702 |
| }, |
| { |
| "acc": 0.82681818, |
| "epoch": 4.037138927097661, |
| "grad_norm": 2.0860073566436768, |
| "learning_rate": 9.810991084923154e-06, |
| "loss": 0.57163272, |
| "memory(GiB)": 67.62, |
| "step": 5870, |
| "train_speed(iter/s)": 0.306194 |
| }, |
| { |
| "acc": 0.81524105, |
| "epoch": 4.0405777166437415, |
| "grad_norm": 1.9567036628723145, |
| "learning_rate": 9.743470453570575e-06, |
| "loss": 0.62305789, |
| "memory(GiB)": 67.62, |
| "step": 5875, |
| "train_speed(iter/s)": 0.305279 |
| }, |
| { |
| "acc": 0.81588497, |
| "epoch": 4.044016506189821, |
| "grad_norm": 2.048231601715088, |
| "learning_rate": 9.676158116183729e-06, |
| "loss": 0.60361052, |
| "memory(GiB)": 67.62, |
| "step": 5880, |
| "train_speed(iter/s)": 0.304326 |
| }, |
| { |
| "acc": 0.82715149, |
| "epoch": 4.047455295735901, |
| "grad_norm": 1.918243408203125, |
| "learning_rate": 9.609054421004562e-06, |
| "loss": 0.56623569, |
| "memory(GiB)": 67.62, |
| "step": 5885, |
| "train_speed(iter/s)": 0.303437 |
| }, |
| { |
| "acc": 0.82016706, |
| "epoch": 4.050894085281981, |
| "grad_norm": 2.212838888168335, |
| "learning_rate": 9.542159715195614e-06, |
| "loss": 0.60472922, |
| "memory(GiB)": 67.62, |
| "step": 5890, |
| "train_speed(iter/s)": 0.302387 |
| }, |
| { |
| "acc": 0.82020359, |
| "epoch": 4.05433287482806, |
| "grad_norm": 2.029686450958252, |
| "learning_rate": 9.475474344838204e-06, |
| "loss": 0.59589596, |
| "memory(GiB)": 67.62, |
| "step": 5895, |
| "train_speed(iter/s)": 0.301428 |
| }, |
| { |
| "acc": 0.82250319, |
| "epoch": 4.05777166437414, |
| "grad_norm": 2.0857136249542236, |
| "learning_rate": 9.408998654930675e-06, |
| "loss": 0.59207001, |
| "memory(GiB)": 67.62, |
| "step": 5900, |
| "train_speed(iter/s)": 0.300628 |
| }, |
| { |
| "epoch": 4.05777166437414, |
| "eval_acc": 0.7809758401943582, |
| "eval_loss": 0.7950036525726318, |
| "eval_runtime": 1140.5258, |
| "eval_samples_per_second": 3.755, |
| "eval_steps_per_second": 0.068, |
| "step": 5900 |
| }, |
| { |
| "acc": 0.83261538, |
| "epoch": 4.0612104539202205, |
| "grad_norm": 2.129284143447876, |
| "learning_rate": 9.342732989386557e-06, |
| "loss": 0.54631634, |
| "memory(GiB)": 67.62, |
| "step": 5905, |
| "train_speed(iter/s)": 0.283416 |
| }, |
| { |
| "acc": 0.82259159, |
| "epoch": 4.0646492434663, |
| "grad_norm": 2.0771372318267822, |
| "learning_rate": 9.27667769103282e-06, |
| "loss": 0.59988642, |
| "memory(GiB)": 67.62, |
| "step": 5910, |
| "train_speed(iter/s)": 0.282658 |
| }, |
| { |
| "acc": 0.82938833, |
| "epoch": 4.06808803301238, |
| "grad_norm": 2.0288455486297607, |
| "learning_rate": 9.210833101608094e-06, |
| "loss": 0.56707897, |
| "memory(GiB)": 67.62, |
| "step": 5915, |
| "train_speed(iter/s)": 0.281964 |
| }, |
| { |
| "acc": 0.81752338, |
| "epoch": 4.071526822558459, |
| "grad_norm": 2.1337034702301025, |
| "learning_rate": 9.145199561760913e-06, |
| "loss": 0.58798003, |
| "memory(GiB)": 67.62, |
| "step": 5920, |
| "train_speed(iter/s)": 0.281194 |
| }, |
| { |
| "acc": 0.83025227, |
| "epoch": 4.074965612104539, |
| "grad_norm": 1.9078054428100586, |
| "learning_rate": 9.079777411047923e-06, |
| "loss": 0.55221009, |
| "memory(GiB)": 67.62, |
| "step": 5925, |
| "train_speed(iter/s)": 0.280374 |
| }, |
| { |
| "acc": 0.82038937, |
| "epoch": 4.078404401650619, |
| "grad_norm": 2.1154861450195312, |
| "learning_rate": 9.014566987932155e-06, |
| "loss": 0.58884945, |
| "memory(GiB)": 67.62, |
| "step": 5930, |
| "train_speed(iter/s)": 0.279665 |
| }, |
| { |
| "acc": 0.83256464, |
| "epoch": 4.081843191196699, |
| "grad_norm": 2.46669602394104, |
| "learning_rate": 8.949568629781233e-06, |
| "loss": 0.55993681, |
| "memory(GiB)": 67.62, |
| "step": 5935, |
| "train_speed(iter/s)": 0.279022 |
| }, |
| { |
| "acc": 0.82162399, |
| "epoch": 4.085281980742779, |
| "grad_norm": 2.2108795642852783, |
| "learning_rate": 8.884782672865745e-06, |
| "loss": 0.58439035, |
| "memory(GiB)": 67.62, |
| "step": 5940, |
| "train_speed(iter/s)": 0.278353 |
| }, |
| { |
| "acc": 0.81261024, |
| "epoch": 4.088720770288858, |
| "grad_norm": 2.3239004611968994, |
| "learning_rate": 8.820209452357312e-06, |
| "loss": 0.62102919, |
| "memory(GiB)": 67.62, |
| "step": 5945, |
| "train_speed(iter/s)": 0.277507 |
| }, |
| { |
| "acc": 0.82187653, |
| "epoch": 4.092159559834938, |
| "grad_norm": 2.306704521179199, |
| "learning_rate": 8.755849302327025e-06, |
| "loss": 0.58051348, |
| "memory(GiB)": 67.62, |
| "step": 5950, |
| "train_speed(iter/s)": 0.276835 |
| }, |
| { |
| "acc": 0.83303099, |
| "epoch": 4.095598349381018, |
| "grad_norm": 2.3323071002960205, |
| "learning_rate": 8.691702555743604e-06, |
| "loss": 0.54123106, |
| "memory(GiB)": 67.62, |
| "step": 5955, |
| "train_speed(iter/s)": 0.27621 |
| }, |
| { |
| "acc": 0.82155704, |
| "epoch": 4.099037138927097, |
| "grad_norm": 2.2443792819976807, |
| "learning_rate": 8.627769544471766e-06, |
| "loss": 0.57790089, |
| "memory(GiB)": 67.62, |
| "step": 5960, |
| "train_speed(iter/s)": 0.275577 |
| }, |
| { |
| "acc": 0.81777382, |
| "epoch": 4.1024759284731775, |
| "grad_norm": 2.0098752975463867, |
| "learning_rate": 8.564050599270423e-06, |
| "loss": 0.60635762, |
| "memory(GiB)": 67.62, |
| "step": 5965, |
| "train_speed(iter/s)": 0.27489 |
| }, |
| { |
| "acc": 0.82945662, |
| "epoch": 4.105914718019257, |
| "grad_norm": 2.6297407150268555, |
| "learning_rate": 8.50054604979104e-06, |
| "loss": 0.55736432, |
| "memory(GiB)": 67.62, |
| "step": 5970, |
| "train_speed(iter/s)": 0.274231 |
| }, |
| { |
| "acc": 0.83819923, |
| "epoch": 4.109353507565337, |
| "grad_norm": 2.027495861053467, |
| "learning_rate": 8.43725622457589e-06, |
| "loss": 0.53537874, |
| "memory(GiB)": 67.62, |
| "step": 5975, |
| "train_speed(iter/s)": 0.273579 |
| }, |
| { |
| "acc": 0.83728676, |
| "epoch": 4.112792297111417, |
| "grad_norm": 1.9991952180862427, |
| "learning_rate": 8.37418145105636e-06, |
| "loss": 0.52903852, |
| "memory(GiB)": 67.62, |
| "step": 5980, |
| "train_speed(iter/s)": 0.272886 |
| }, |
| { |
| "acc": 0.83919382, |
| "epoch": 4.116231086657496, |
| "grad_norm": 1.941271424293518, |
| "learning_rate": 8.311322055551258e-06, |
| "loss": 0.54152002, |
| "memory(GiB)": 67.62, |
| "step": 5985, |
| "train_speed(iter/s)": 0.272292 |
| }, |
| { |
| "acc": 0.82639074, |
| "epoch": 4.119669876203576, |
| "grad_norm": 2.0080490112304688, |
| "learning_rate": 8.248678363265168e-06, |
| "loss": 0.58616934, |
| "memory(GiB)": 67.62, |
| "step": 5990, |
| "train_speed(iter/s)": 0.271655 |
| }, |
| { |
| "acc": 0.82685022, |
| "epoch": 4.1231086657496565, |
| "grad_norm": 2.1029014587402344, |
| "learning_rate": 8.186250698286685e-06, |
| "loss": 0.57365303, |
| "memory(GiB)": 67.62, |
| "step": 5995, |
| "train_speed(iter/s)": 0.271018 |
| }, |
| { |
| "acc": 0.83401289, |
| "epoch": 4.126547455295736, |
| "grad_norm": 2.381568431854248, |
| "learning_rate": 8.124039383586785e-06, |
| "loss": 0.54990234, |
| "memory(GiB)": 67.62, |
| "step": 6000, |
| "train_speed(iter/s)": 0.27041 |
| }, |
| { |
| "epoch": 4.126547455295736, |
| "eval_acc": 0.7808971071219688, |
| "eval_loss": 0.7977337837219238, |
| "eval_runtime": 1150.4843, |
| "eval_samples_per_second": 3.723, |
| "eval_steps_per_second": 0.067, |
| "step": 6000 |
| }, |
| { |
| "acc": 0.82955971, |
| "epoch": 4.129986244841816, |
| "grad_norm": 2.2076478004455566, |
| "learning_rate": 8.062044741017174e-06, |
| "loss": 0.56549349, |
| "memory(GiB)": 67.62, |
| "step": 6005, |
| "train_speed(iter/s)": 0.256539 |
| }, |
| { |
| "acc": 0.82928619, |
| "epoch": 4.133425034387895, |
| "grad_norm": 2.240816116333008, |
| "learning_rate": 8.00026709130858e-06, |
| "loss": 0.56595135, |
| "memory(GiB)": 67.62, |
| "step": 6010, |
| "train_speed(iter/s)": 0.256006 |
| }, |
| { |
| "acc": 0.81660957, |
| "epoch": 4.136863823933975, |
| "grad_norm": 2.1177453994750977, |
| "learning_rate": 7.938706754069125e-06, |
| "loss": 0.60902424, |
| "memory(GiB)": 67.62, |
| "step": 6015, |
| "train_speed(iter/s)": 0.255365 |
| }, |
| { |
| "acc": 0.81723537, |
| "epoch": 4.140302613480055, |
| "grad_norm": 2.291558265686035, |
| "learning_rate": 7.877364047782646e-06, |
| "loss": 0.59432869, |
| "memory(GiB)": 67.62, |
| "step": 6020, |
| "train_speed(iter/s)": 0.254797 |
| }, |
| { |
| "acc": 0.83183041, |
| "epoch": 4.143741403026135, |
| "grad_norm": 2.1598074436187744, |
| "learning_rate": 7.816239289807078e-06, |
| "loss": 0.56827602, |
| "memory(GiB)": 67.62, |
| "step": 6025, |
| "train_speed(iter/s)": 0.254252 |
| }, |
| { |
| "acc": 0.82450991, |
| "epoch": 4.147180192572215, |
| "grad_norm": 2.316070556640625, |
| "learning_rate": 7.755332796372783e-06, |
| "loss": 0.5860589, |
| "memory(GiB)": 67.62, |
| "step": 6030, |
| "train_speed(iter/s)": 0.253753 |
| }, |
| { |
| "acc": 0.83219881, |
| "epoch": 4.150618982118294, |
| "grad_norm": 2.0400826930999756, |
| "learning_rate": 7.694644882580929e-06, |
| "loss": 0.56074944, |
| "memory(GiB)": 67.62, |
| "step": 6035, |
| "train_speed(iter/s)": 0.253206 |
| }, |
| { |
| "acc": 0.82405052, |
| "epoch": 4.154057771664374, |
| "grad_norm": 2.167229652404785, |
| "learning_rate": 7.634175862401859e-06, |
| "loss": 0.5924716, |
| "memory(GiB)": 67.62, |
| "step": 6040, |
| "train_speed(iter/s)": 0.252704 |
| }, |
| { |
| "acc": 0.83036137, |
| "epoch": 4.157496561210454, |
| "grad_norm": 2.0544652938842773, |
| "learning_rate": 7.5739260486734785e-06, |
| "loss": 0.56387725, |
| "memory(GiB)": 67.62, |
| "step": 6045, |
| "train_speed(iter/s)": 0.252135 |
| }, |
| { |
| "acc": 0.82638521, |
| "epoch": 4.160935350756533, |
| "grad_norm": 2.0944511890411377, |
| "learning_rate": 7.5138957530996e-06, |
| "loss": 0.58068042, |
| "memory(GiB)": 67.62, |
| "step": 6050, |
| "train_speed(iter/s)": 0.251533 |
| }, |
| { |
| "acc": 0.82557564, |
| "epoch": 4.1643741403026135, |
| "grad_norm": 2.206922769546509, |
| "learning_rate": 7.454085286248365e-06, |
| "loss": 0.57935457, |
| "memory(GiB)": 67.62, |
| "step": 6055, |
| "train_speed(iter/s)": 0.250929 |
| }, |
| { |
| "acc": 0.82684364, |
| "epoch": 4.167812929848694, |
| "grad_norm": 2.0836057662963867, |
| "learning_rate": 7.394494957550617e-06, |
| "loss": 0.57276134, |
| "memory(GiB)": 67.62, |
| "step": 6060, |
| "train_speed(iter/s)": 0.250399 |
| }, |
| { |
| "acc": 0.82708397, |
| "epoch": 4.171251719394773, |
| "grad_norm": 2.394265651702881, |
| "learning_rate": 7.335125075298327e-06, |
| "loss": 0.56799402, |
| "memory(GiB)": 67.62, |
| "step": 6065, |
| "train_speed(iter/s)": 0.249822 |
| }, |
| { |
| "acc": 0.82027712, |
| "epoch": 4.174690508940853, |
| "grad_norm": 2.20003080368042, |
| "learning_rate": 7.2759759466429625e-06, |
| "loss": 0.59135399, |
| "memory(GiB)": 67.62, |
| "step": 6070, |
| "train_speed(iter/s)": 0.249261 |
| }, |
| { |
| "acc": 0.83627338, |
| "epoch": 4.178129298486932, |
| "grad_norm": 2.0617763996124268, |
| "learning_rate": 7.217047877593917e-06, |
| "loss": 0.542978, |
| "memory(GiB)": 67.62, |
| "step": 6075, |
| "train_speed(iter/s)": 0.248741 |
| }, |
| { |
| "acc": 0.83200588, |
| "epoch": 4.181568088033012, |
| "grad_norm": 2.2494707107543945, |
| "learning_rate": 7.158341173016954e-06, |
| "loss": 0.54484763, |
| "memory(GiB)": 67.62, |
| "step": 6080, |
| "train_speed(iter/s)": 0.248292 |
| }, |
| { |
| "acc": 0.83537827, |
| "epoch": 4.1850068775790925, |
| "grad_norm": 2.177746295928955, |
| "learning_rate": 7.099856136632578e-06, |
| "loss": 0.54962234, |
| "memory(GiB)": 67.62, |
| "step": 6085, |
| "train_speed(iter/s)": 0.247751 |
| }, |
| { |
| "acc": 0.83314114, |
| "epoch": 4.188445667125172, |
| "grad_norm": 1.8806217908859253, |
| "learning_rate": 7.041593071014495e-06, |
| "loss": 0.55333209, |
| "memory(GiB)": 67.62, |
| "step": 6090, |
| "train_speed(iter/s)": 0.247191 |
| }, |
| { |
| "acc": 0.83060188, |
| "epoch": 4.191884456671252, |
| "grad_norm": 2.284046173095703, |
| "learning_rate": 6.983552277588039e-06, |
| "loss": 0.55391922, |
| "memory(GiB)": 67.62, |
| "step": 6095, |
| "train_speed(iter/s)": 0.246671 |
| }, |
| { |
| "acc": 0.82202473, |
| "epoch": 4.195323246217331, |
| "grad_norm": 2.113684892654419, |
| "learning_rate": 6.925734056628606e-06, |
| "loss": 0.59055824, |
| "memory(GiB)": 67.62, |
| "step": 6100, |
| "train_speed(iter/s)": 0.246148 |
| }, |
| { |
| "epoch": 4.195323246217331, |
| "eval_acc": 0.7810377018940927, |
| "eval_loss": 0.7978992462158203, |
| "eval_runtime": 1151.0505, |
| "eval_samples_per_second": 3.721, |
| "eval_steps_per_second": 0.067, |
| "step": 6100 |
| }, |
| { |
| "acc": 0.82853069, |
| "epoch": 4.198762035763411, |
| "grad_norm": 1.9292495250701904, |
| "learning_rate": 6.8681387072601215e-06, |
| "loss": 0.56650033, |
| "memory(GiB)": 67.62, |
| "step": 6105, |
| "train_speed(iter/s)": 0.234762 |
| }, |
| { |
| "acc": 0.83146677, |
| "epoch": 4.202200825309491, |
| "grad_norm": 2.457911729812622, |
| "learning_rate": 6.8107665274534755e-06, |
| "loss": 0.56592517, |
| "memory(GiB)": 67.62, |
| "step": 6110, |
| "train_speed(iter/s)": 0.234306 |
| }, |
| { |
| "acc": 0.81799488, |
| "epoch": 4.205639614855571, |
| "grad_norm": 2.1064655780792236, |
| "learning_rate": 6.753617814024982e-06, |
| "loss": 0.59414425, |
| "memory(GiB)": 67.62, |
| "step": 6115, |
| "train_speed(iter/s)": 0.233775 |
| }, |
| { |
| "acc": 0.83699923, |
| "epoch": 4.209078404401651, |
| "grad_norm": 2.15045166015625, |
| "learning_rate": 6.696692862634848e-06, |
| "loss": 0.53455338, |
| "memory(GiB)": 67.62, |
| "step": 6120, |
| "train_speed(iter/s)": 0.233246 |
| }, |
| { |
| "acc": 0.82855034, |
| "epoch": 4.212517193947731, |
| "grad_norm": 1.9256818294525146, |
| "learning_rate": 6.639991967785629e-06, |
| "loss": 0.57589531, |
| "memory(GiB)": 67.62, |
| "step": 6125, |
| "train_speed(iter/s)": 0.232841 |
| }, |
| { |
| "acc": 0.82569561, |
| "epoch": 4.21595598349381, |
| "grad_norm": 2.2568438053131104, |
| "learning_rate": 6.583515422820755e-06, |
| "loss": 0.59608107, |
| "memory(GiB)": 67.62, |
| "step": 6130, |
| "train_speed(iter/s)": 0.232428 |
| }, |
| { |
| "acc": 0.83448133, |
| "epoch": 4.21939477303989, |
| "grad_norm": 2.2241194248199463, |
| "learning_rate": 6.527263519922942e-06, |
| "loss": 0.53996773, |
| "memory(GiB)": 67.62, |
| "step": 6135, |
| "train_speed(iter/s)": 0.232016 |
| }, |
| { |
| "acc": 0.82402668, |
| "epoch": 4.222833562585969, |
| "grad_norm": 2.152508020401001, |
| "learning_rate": 6.471236550112733e-06, |
| "loss": 0.5897275, |
| "memory(GiB)": 67.62, |
| "step": 6140, |
| "train_speed(iter/s)": 0.231577 |
| }, |
| { |
| "acc": 0.82092781, |
| "epoch": 4.2262723521320495, |
| "grad_norm": 2.7539846897125244, |
| "learning_rate": 6.415434803246959e-06, |
| "loss": 0.60109167, |
| "memory(GiB)": 67.62, |
| "step": 6145, |
| "train_speed(iter/s)": 0.231131 |
| }, |
| { |
| "acc": 0.82336702, |
| "epoch": 4.22971114167813, |
| "grad_norm": 2.2428319454193115, |
| "learning_rate": 6.359858568017257e-06, |
| "loss": 0.5810329, |
| "memory(GiB)": 67.62, |
| "step": 6150, |
| "train_speed(iter/s)": 0.230762 |
| }, |
| { |
| "acc": 0.8375886, |
| "epoch": 4.233149931224209, |
| "grad_norm": 2.108989715576172, |
| "learning_rate": 6.304508131948601e-06, |
| "loss": 0.54037862, |
| "memory(GiB)": 67.62, |
| "step": 6155, |
| "train_speed(iter/s)": 0.230346 |
| }, |
| { |
| "acc": 0.82534332, |
| "epoch": 4.236588720770289, |
| "grad_norm": 2.159034252166748, |
| "learning_rate": 6.249383781397765e-06, |
| "loss": 0.58905783, |
| "memory(GiB)": 67.62, |
| "step": 6160, |
| "train_speed(iter/s)": 0.229862 |
| }, |
| { |
| "acc": 0.82394867, |
| "epoch": 4.240027510316368, |
| "grad_norm": 2.191835880279541, |
| "learning_rate": 6.194485801551856e-06, |
| "loss": 0.57035618, |
| "memory(GiB)": 67.62, |
| "step": 6165, |
| "train_speed(iter/s)": 0.229394 |
| }, |
| { |
| "acc": 0.83958015, |
| "epoch": 4.243466299862448, |
| "grad_norm": 1.9931029081344604, |
| "learning_rate": 6.139814476426854e-06, |
| "loss": 0.53320942, |
| "memory(GiB)": 67.62, |
| "step": 6170, |
| "train_speed(iter/s)": 0.22895 |
| }, |
| { |
| "acc": 0.81791973, |
| "epoch": 4.2469050894085285, |
| "grad_norm": 2.611358404159546, |
| "learning_rate": 6.085370088866157e-06, |
| "loss": 0.61060858, |
| "memory(GiB)": 67.62, |
| "step": 6175, |
| "train_speed(iter/s)": 0.22846 |
| }, |
| { |
| "acc": 0.81913891, |
| "epoch": 4.250343878954608, |
| "grad_norm": 2.0629124641418457, |
| "learning_rate": 6.031152920539071e-06, |
| "loss": 0.59518094, |
| "memory(GiB)": 67.62, |
| "step": 6180, |
| "train_speed(iter/s)": 0.228031 |
| }, |
| { |
| "acc": 0.83380852, |
| "epoch": 4.253782668500688, |
| "grad_norm": 2.2911267280578613, |
| "learning_rate": 5.977163251939388e-06, |
| "loss": 0.55708628, |
| "memory(GiB)": 67.62, |
| "step": 6185, |
| "train_speed(iter/s)": 0.227608 |
| }, |
| { |
| "acc": 0.83007746, |
| "epoch": 4.257221458046768, |
| "grad_norm": 2.3003599643707275, |
| "learning_rate": 5.9234013623839155e-06, |
| "loss": 0.56224914, |
| "memory(GiB)": 67.62, |
| "step": 6190, |
| "train_speed(iter/s)": 0.227229 |
| }, |
| { |
| "acc": 0.82731237, |
| "epoch": 4.260660247592847, |
| "grad_norm": 2.23395037651062, |
| "learning_rate": 5.869867530011054e-06, |
| "loss": 0.57990241, |
| "memory(GiB)": 67.62, |
| "step": 6195, |
| "train_speed(iter/s)": 0.226782 |
| }, |
| { |
| "acc": 0.82513866, |
| "epoch": 4.264099037138927, |
| "grad_norm": 1.8877415657043457, |
| "learning_rate": 5.816562031779334e-06, |
| "loss": 0.58530903, |
| "memory(GiB)": 67.62, |
| "step": 6200, |
| "train_speed(iter/s)": 0.226378 |
| }, |
| { |
| "epoch": 4.264099037138927, |
| "eval_acc": 0.7814088720925001, |
| "eval_loss": 0.796574592590332, |
| "eval_runtime": 1138.6928, |
| "eval_samples_per_second": 3.761, |
| "eval_steps_per_second": 0.068, |
| "step": 6200 |
| }, |
| { |
| "acc": 0.83331938, |
| "epoch": 4.267537826685007, |
| "grad_norm": 2.007477283477783, |
| "learning_rate": 5.7634851434660045e-06, |
| "loss": 0.55948911, |
| "memory(GiB)": 67.62, |
| "step": 6205, |
| "train_speed(iter/s)": 0.21694 |
| }, |
| { |
| "acc": 0.83156748, |
| "epoch": 4.270976616231087, |
| "grad_norm": 2.2435107231140137, |
| "learning_rate": 5.7106371396655885e-06, |
| "loss": 0.55306296, |
| "memory(GiB)": 67.62, |
| "step": 6210, |
| "train_speed(iter/s)": 0.21658 |
| }, |
| { |
| "acc": 0.82246685, |
| "epoch": 4.274415405777167, |
| "grad_norm": 2.471839427947998, |
| "learning_rate": 5.658018293788461e-06, |
| "loss": 0.58456354, |
| "memory(GiB)": 67.62, |
| "step": 6215, |
| "train_speed(iter/s)": 0.216188 |
| }, |
| { |
| "acc": 0.8260498, |
| "epoch": 4.277854195323246, |
| "grad_norm": 2.342773675918579, |
| "learning_rate": 5.6056288780594584e-06, |
| "loss": 0.58758726, |
| "memory(GiB)": 67.62, |
| "step": 6220, |
| "train_speed(iter/s)": 0.215836 |
| }, |
| { |
| "acc": 0.83068848, |
| "epoch": 4.281292984869326, |
| "grad_norm": 2.36448073387146, |
| "learning_rate": 5.553469163516459e-06, |
| "loss": 0.55812101, |
| "memory(GiB)": 67.62, |
| "step": 6225, |
| "train_speed(iter/s)": 0.21546 |
| }, |
| { |
| "acc": 0.8118145, |
| "epoch": 4.284731774415405, |
| "grad_norm": 2.0966968536376953, |
| "learning_rate": 5.501539420008957e-06, |
| "loss": 0.62151508, |
| "memory(GiB)": 67.62, |
| "step": 6230, |
| "train_speed(iter/s)": 0.215114 |
| }, |
| { |
| "acc": 0.82315483, |
| "epoch": 4.2881705639614855, |
| "grad_norm": 2.090514898300171, |
| "learning_rate": 5.449839916196701e-06, |
| "loss": 0.59569468, |
| "memory(GiB)": 67.62, |
| "step": 6235, |
| "train_speed(iter/s)": 0.214737 |
| }, |
| { |
| "acc": 0.82968979, |
| "epoch": 4.291609353507566, |
| "grad_norm": 2.4561944007873535, |
| "learning_rate": 5.398370919548289e-06, |
| "loss": 0.56410408, |
| "memory(GiB)": 67.62, |
| "step": 6240, |
| "train_speed(iter/s)": 0.214364 |
| }, |
| { |
| "acc": 0.82265596, |
| "epoch": 4.295048143053645, |
| "grad_norm": 2.0787575244903564, |
| "learning_rate": 5.3471326963397644e-06, |
| "loss": 0.59666047, |
| "memory(GiB)": 67.62, |
| "step": 6245, |
| "train_speed(iter/s)": 0.213976 |
| }, |
| { |
| "acc": 0.83164139, |
| "epoch": 4.298486932599725, |
| "grad_norm": 1.96835458278656, |
| "learning_rate": 5.296125511653292e-06, |
| "loss": 0.56099758, |
| "memory(GiB)": 67.62, |
| "step": 6250, |
| "train_speed(iter/s)": 0.213621 |
| }, |
| { |
| "acc": 0.82760611, |
| "epoch": 4.301925722145804, |
| "grad_norm": 2.032607078552246, |
| "learning_rate": 5.245349629375726e-06, |
| "loss": 0.56520452, |
| "memory(GiB)": 67.62, |
| "step": 6255, |
| "train_speed(iter/s)": 0.213246 |
| }, |
| { |
| "acc": 0.81889114, |
| "epoch": 4.305364511691884, |
| "grad_norm": 2.076733112335205, |
| "learning_rate": 5.194805312197261e-06, |
| "loss": 0.60234947, |
| "memory(GiB)": 67.62, |
| "step": 6260, |
| "train_speed(iter/s)": 0.212879 |
| }, |
| { |
| "acc": 0.83639603, |
| "epoch": 4.3088033012379645, |
| "grad_norm": 2.0413177013397217, |
| "learning_rate": 5.144492821610151e-06, |
| "loss": 0.53537364, |
| "memory(GiB)": 67.62, |
| "step": 6265, |
| "train_speed(iter/s)": 0.212585 |
| }, |
| { |
| "acc": 0.84348145, |
| "epoch": 4.312242090784044, |
| "grad_norm": 2.1440134048461914, |
| "learning_rate": 5.094412417907226e-06, |
| "loss": 0.52636375, |
| "memory(GiB)": 67.62, |
| "step": 6270, |
| "train_speed(iter/s)": 0.21228 |
| }, |
| { |
| "acc": 0.81755209, |
| "epoch": 4.315680880330124, |
| "grad_norm": 2.337132692337036, |
| "learning_rate": 5.0445643601806165e-06, |
| "loss": 0.60215778, |
| "memory(GiB)": 67.62, |
| "step": 6275, |
| "train_speed(iter/s)": 0.211939 |
| }, |
| { |
| "acc": 0.81957273, |
| "epoch": 4.319119669876203, |
| "grad_norm": 2.3544983863830566, |
| "learning_rate": 4.994948906320421e-06, |
| "loss": 0.62419033, |
| "memory(GiB)": 67.62, |
| "step": 6280, |
| "train_speed(iter/s)": 0.211611 |
| }, |
| { |
| "acc": 0.83044434, |
| "epoch": 4.322558459422283, |
| "grad_norm": 2.0763583183288574, |
| "learning_rate": 4.945566313013359e-06, |
| "loss": 0.56670027, |
| "memory(GiB)": 67.62, |
| "step": 6285, |
| "train_speed(iter/s)": 0.211304 |
| }, |
| { |
| "acc": 0.83016624, |
| "epoch": 4.325997248968363, |
| "grad_norm": 2.207101583480835, |
| "learning_rate": 4.896416835741426e-06, |
| "loss": 0.57944641, |
| "memory(GiB)": 67.62, |
| "step": 6290, |
| "train_speed(iter/s)": 0.210967 |
| }, |
| { |
| "acc": 0.82729073, |
| "epoch": 4.329436038514443, |
| "grad_norm": 2.1743686199188232, |
| "learning_rate": 4.847500728780591e-06, |
| "loss": 0.57582512, |
| "memory(GiB)": 67.62, |
| "step": 6295, |
| "train_speed(iter/s)": 0.210593 |
| }, |
| { |
| "acc": 0.83406305, |
| "epoch": 4.332874828060523, |
| "grad_norm": 2.1914258003234863, |
| "learning_rate": 4.798818245199488e-06, |
| "loss": 0.56798325, |
| "memory(GiB)": 67.62, |
| "step": 6300, |
| "train_speed(iter/s)": 0.210291 |
| }, |
| { |
| "epoch": 4.332874828060523, |
| "eval_acc": 0.7814426148378099, |
| "eval_loss": 0.7952266335487366, |
| "eval_runtime": 1113.0563, |
| "eval_samples_per_second": 3.848, |
| "eval_steps_per_second": 0.069, |
| "step": 6300 |
| }, |
| { |
| "acc": 0.81289082, |
| "epoch": 4.336313617606603, |
| "grad_norm": 1.9904134273529053, |
| "learning_rate": 4.7503696368580756e-06, |
| "loss": 0.62703791, |
| "memory(GiB)": 67.62, |
| "step": 6305, |
| "train_speed(iter/s)": 0.202425 |
| }, |
| { |
| "acc": 0.82088013, |
| "epoch": 4.339752407152682, |
| "grad_norm": 2.3138110637664795, |
| "learning_rate": 4.702155154406356e-06, |
| "loss": 0.59575286, |
| "memory(GiB)": 67.62, |
| "step": 6310, |
| "train_speed(iter/s)": 0.202102 |
| }, |
| { |
| "acc": 0.82686548, |
| "epoch": 4.343191196698762, |
| "grad_norm": 2.3361921310424805, |
| "learning_rate": 4.654175047283105e-06, |
| "loss": 0.58184552, |
| "memory(GiB)": 67.62, |
| "step": 6315, |
| "train_speed(iter/s)": 0.201819 |
| }, |
| { |
| "acc": 0.81809053, |
| "epoch": 4.346629986244841, |
| "grad_norm": 2.237659215927124, |
| "learning_rate": 4.606429563714522e-06, |
| "loss": 0.61091933, |
| "memory(GiB)": 67.62, |
| "step": 6320, |
| "train_speed(iter/s)": 0.201524 |
| }, |
| { |
| "acc": 0.83638992, |
| "epoch": 4.3500687757909215, |
| "grad_norm": 2.163444995880127, |
| "learning_rate": 4.558918950712983e-06, |
| "loss": 0.53875408, |
| "memory(GiB)": 67.62, |
| "step": 6325, |
| "train_speed(iter/s)": 0.201286 |
| }, |
| { |
| "acc": 0.83066168, |
| "epoch": 4.353507565337002, |
| "grad_norm": 2.1895644664764404, |
| "learning_rate": 4.511643454075753e-06, |
| "loss": 0.54859762, |
| "memory(GiB)": 67.62, |
| "step": 6330, |
| "train_speed(iter/s)": 0.201 |
| }, |
| { |
| "acc": 0.82753067, |
| "epoch": 4.356946354883081, |
| "grad_norm": 2.3949623107910156, |
| "learning_rate": 4.464603318383724e-06, |
| "loss": 0.57942715, |
| "memory(GiB)": 67.62, |
| "step": 6335, |
| "train_speed(iter/s)": 0.20074 |
| }, |
| { |
| "acc": 0.8256155, |
| "epoch": 4.360385144429161, |
| "grad_norm": 2.2843456268310547, |
| "learning_rate": 4.417798787000139e-06, |
| "loss": 0.5838841, |
| "memory(GiB)": 67.62, |
| "step": 6340, |
| "train_speed(iter/s)": 0.200436 |
| }, |
| { |
| "acc": 0.82477741, |
| "epoch": 4.36382393397524, |
| "grad_norm": 2.1748905181884766, |
| "learning_rate": 4.371230102069333e-06, |
| "loss": 0.57569537, |
| "memory(GiB)": 67.62, |
| "step": 6345, |
| "train_speed(iter/s)": 0.200165 |
| }, |
| { |
| "acc": 0.82552452, |
| "epoch": 4.36726272352132, |
| "grad_norm": 2.2806589603424072, |
| "learning_rate": 4.324897504515494e-06, |
| "loss": 0.5679925, |
| "memory(GiB)": 67.62, |
| "step": 6350, |
| "train_speed(iter/s)": 0.199857 |
| }, |
| { |
| "acc": 0.82228546, |
| "epoch": 4.3707015130674005, |
| "grad_norm": 2.5865187644958496, |
| "learning_rate": 4.278801234041395e-06, |
| "loss": 0.60699501, |
| "memory(GiB)": 67.62, |
| "step": 6355, |
| "train_speed(iter/s)": 0.199561 |
| }, |
| { |
| "acc": 0.83120518, |
| "epoch": 4.37414030261348, |
| "grad_norm": 2.1603238582611084, |
| "learning_rate": 4.2329415291271675e-06, |
| "loss": 0.56461072, |
| "memory(GiB)": 67.62, |
| "step": 6360, |
| "train_speed(iter/s)": 0.199251 |
| }, |
| { |
| "acc": 0.82535934, |
| "epoch": 4.37757909215956, |
| "grad_norm": 2.120961904525757, |
| "learning_rate": 4.18731862702908e-06, |
| "loss": 0.57014971, |
| "memory(GiB)": 67.62, |
| "step": 6365, |
| "train_speed(iter/s)": 0.199009 |
| }, |
| { |
| "acc": 0.82253723, |
| "epoch": 4.38101788170564, |
| "grad_norm": 2.091716766357422, |
| "learning_rate": 4.141932763778269e-06, |
| "loss": 0.58944392, |
| "memory(GiB)": 67.62, |
| "step": 6370, |
| "train_speed(iter/s)": 0.198744 |
| }, |
| { |
| "acc": 0.82127199, |
| "epoch": 4.384456671251719, |
| "grad_norm": 2.529238700866699, |
| "learning_rate": 4.09678417417958e-06, |
| "loss": 0.60495977, |
| "memory(GiB)": 67.62, |
| "step": 6375, |
| "train_speed(iter/s)": 0.19843 |
| }, |
| { |
| "acc": 0.82691174, |
| "epoch": 4.387895460797799, |
| "grad_norm": 2.0297234058380127, |
| "learning_rate": 4.051873091810289e-06, |
| "loss": 0.57716408, |
| "memory(GiB)": 67.62, |
| "step": 6380, |
| "train_speed(iter/s)": 0.198164 |
| }, |
| { |
| "acc": 0.82726593, |
| "epoch": 4.391334250343879, |
| "grad_norm": 2.1247737407684326, |
| "learning_rate": 4.007199749018933e-06, |
| "loss": 0.56230278, |
| "memory(GiB)": 67.62, |
| "step": 6385, |
| "train_speed(iter/s)": 0.197892 |
| }, |
| { |
| "acc": 0.83000584, |
| "epoch": 4.394773039889959, |
| "grad_norm": 2.1872763633728027, |
| "learning_rate": 3.962764376924093e-06, |
| "loss": 0.57364516, |
| "memory(GiB)": 67.62, |
| "step": 6390, |
| "train_speed(iter/s)": 0.197621 |
| }, |
| { |
| "acc": 0.80675488, |
| "epoch": 4.398211829436039, |
| "grad_norm": 2.4632184505462646, |
| "learning_rate": 3.918567205413209e-06, |
| "loss": 0.63493814, |
| "memory(GiB)": 67.62, |
| "step": 6395, |
| "train_speed(iter/s)": 0.197376 |
| }, |
| { |
| "acc": 0.83054581, |
| "epoch": 4.401650618982118, |
| "grad_norm": 2.1453042030334473, |
| "learning_rate": 3.8746084631413774e-06, |
| "loss": 0.55714712, |
| "memory(GiB)": 67.62, |
| "step": 6400, |
| "train_speed(iter/s)": 0.197063 |
| }, |
| { |
| "epoch": 4.401650618982118, |
| "eval_acc": 0.7813807531380753, |
| "eval_loss": 0.7940236926078796, |
| "eval_runtime": 1132.2427, |
| "eval_samples_per_second": 3.783, |
| "eval_steps_per_second": 0.068, |
| "step": 6400 |
| }, |
| { |
| "acc": 0.82402639, |
| "epoch": 4.405089408528198, |
| "grad_norm": 2.5457465648651123, |
| "learning_rate": 3.830888377530191e-06, |
| "loss": 0.58401513, |
| "memory(GiB)": 67.62, |
| "step": 6405, |
| "train_speed(iter/s)": 0.190185 |
| }, |
| { |
| "acc": 0.8252965, |
| "epoch": 4.408528198074277, |
| "grad_norm": 2.4412484169006348, |
| "learning_rate": 3.787407174766534e-06, |
| "loss": 0.57594061, |
| "memory(GiB)": 67.62, |
| "step": 6410, |
| "train_speed(iter/s)": 0.189944 |
| }, |
| { |
| "acc": 0.82564621, |
| "epoch": 4.4119669876203575, |
| "grad_norm": 2.4891350269317627, |
| "learning_rate": 3.7441650798014204e-06, |
| "loss": 0.58461208, |
| "memory(GiB)": 67.62, |
| "step": 6415, |
| "train_speed(iter/s)": 0.189741 |
| }, |
| { |
| "acc": 0.82523041, |
| "epoch": 4.415405777166438, |
| "grad_norm": 2.297450065612793, |
| "learning_rate": 3.7011623163488466e-06, |
| "loss": 0.56609049, |
| "memory(GiB)": 67.62, |
| "step": 6420, |
| "train_speed(iter/s)": 0.18951 |
| }, |
| { |
| "acc": 0.82219734, |
| "epoch": 4.418844566712517, |
| "grad_norm": 2.200800657272339, |
| "learning_rate": 3.6583991068846157e-06, |
| "loss": 0.59716201, |
| "memory(GiB)": 67.62, |
| "step": 6425, |
| "train_speed(iter/s)": 0.189276 |
| }, |
| { |
| "acc": 0.83268661, |
| "epoch": 4.422283356258597, |
| "grad_norm": 2.185145378112793, |
| "learning_rate": 3.61587567264519e-06, |
| "loss": 0.56204829, |
| "memory(GiB)": 67.62, |
| "step": 6430, |
| "train_speed(iter/s)": 0.188997 |
| }, |
| { |
| "acc": 0.82503653, |
| "epoch": 4.425722145804677, |
| "grad_norm": 2.043168067932129, |
| "learning_rate": 3.5735922336265567e-06, |
| "loss": 0.5881556, |
| "memory(GiB)": 67.62, |
| "step": 6435, |
| "train_speed(iter/s)": 0.188753 |
| }, |
| { |
| "acc": 0.81975737, |
| "epoch": 4.429160935350756, |
| "grad_norm": 2.160871982574463, |
| "learning_rate": 3.5315490085830724e-06, |
| "loss": 0.6149045, |
| "memory(GiB)": 67.62, |
| "step": 6440, |
| "train_speed(iter/s)": 0.188516 |
| }, |
| { |
| "acc": 0.82297249, |
| "epoch": 4.4325997248968365, |
| "grad_norm": 2.2979509830474854, |
| "learning_rate": 3.489746215026349e-06, |
| "loss": 0.58171053, |
| "memory(GiB)": 67.62, |
| "step": 6445, |
| "train_speed(iter/s)": 0.188237 |
| }, |
| { |
| "acc": 0.81737309, |
| "epoch": 4.436038514442916, |
| "grad_norm": 2.3226141929626465, |
| "learning_rate": 3.4481840692241092e-06, |
| "loss": 0.61316481, |
| "memory(GiB)": 67.62, |
| "step": 6450, |
| "train_speed(iter/s)": 0.18799 |
| }, |
| { |
| "acc": 0.82329559, |
| "epoch": 4.439477303988996, |
| "grad_norm": 2.2420105934143066, |
| "learning_rate": 3.4068627861991034e-06, |
| "loss": 0.60935397, |
| "memory(GiB)": 67.62, |
| "step": 6455, |
| "train_speed(iter/s)": 0.187782 |
| }, |
| { |
| "acc": 0.82957897, |
| "epoch": 4.442916093535076, |
| "grad_norm": 2.281442403793335, |
| "learning_rate": 3.365782579727948e-06, |
| "loss": 0.58194571, |
| "memory(GiB)": 67.62, |
| "step": 6460, |
| "train_speed(iter/s)": 0.187574 |
| }, |
| { |
| "acc": 0.82551146, |
| "epoch": 4.446354883081155, |
| "grad_norm": 2.1205482482910156, |
| "learning_rate": 3.3249436623400493e-06, |
| "loss": 0.57835684, |
| "memory(GiB)": 67.62, |
| "step": 6465, |
| "train_speed(iter/s)": 0.187326 |
| }, |
| { |
| "acc": 0.82712269, |
| "epoch": 4.449793672627235, |
| "grad_norm": 2.2721188068389893, |
| "learning_rate": 3.284346245316513e-06, |
| "loss": 0.57927489, |
| "memory(GiB)": 67.62, |
| "step": 6470, |
| "train_speed(iter/s)": 0.187079 |
| }, |
| { |
| "acc": 0.82159977, |
| "epoch": 4.453232462173315, |
| "grad_norm": 2.2417726516723633, |
| "learning_rate": 3.24399053868902e-06, |
| "loss": 0.57816648, |
| "memory(GiB)": 67.62, |
| "step": 6475, |
| "train_speed(iter/s)": 0.186865 |
| }, |
| { |
| "acc": 0.83896151, |
| "epoch": 4.456671251719395, |
| "grad_norm": 2.33647084236145, |
| "learning_rate": 3.203876751238749e-06, |
| "loss": 0.53038335, |
| "memory(GiB)": 67.62, |
| "step": 6480, |
| "train_speed(iter/s)": 0.186653 |
| }, |
| { |
| "acc": 0.83427839, |
| "epoch": 4.460110041265475, |
| "grad_norm": 2.063394069671631, |
| "learning_rate": 3.1640050904953505e-06, |
| "loss": 0.56539698, |
| "memory(GiB)": 67.62, |
| "step": 6485, |
| "train_speed(iter/s)": 0.186435 |
| }, |
| { |
| "acc": 0.82990141, |
| "epoch": 4.463548830811554, |
| "grad_norm": 2.2717719078063965, |
| "learning_rate": 3.1243757627357668e-06, |
| "loss": 0.55906305, |
| "memory(GiB)": 67.62, |
| "step": 6490, |
| "train_speed(iter/s)": 0.186164 |
| }, |
| { |
| "acc": 0.82575073, |
| "epoch": 4.466987620357634, |
| "grad_norm": 2.5037717819213867, |
| "learning_rate": 3.0849889729832654e-06, |
| "loss": 0.57216806, |
| "memory(GiB)": 67.62, |
| "step": 6495, |
| "train_speed(iter/s)": 0.185961 |
| }, |
| { |
| "acc": 0.82585573, |
| "epoch": 4.470426409903714, |
| "grad_norm": 2.19950795173645, |
| "learning_rate": 3.045844925006326e-06, |
| "loss": 0.57823243, |
| "memory(GiB)": 67.62, |
| "step": 6500, |
| "train_speed(iter/s)": 0.185733 |
| }, |
| { |
| "epoch": 4.470426409903714, |
| "eval_acc": 0.7815775858190489, |
| "eval_loss": 0.7929303646087646, |
| "eval_runtime": 1155.4394, |
| "eval_samples_per_second": 3.707, |
| "eval_steps_per_second": 0.067, |
| "step": 6500 |
| }, |
| { |
| "acc": 0.81569099, |
| "epoch": 4.4738651994497936, |
| "grad_norm": 2.3302502632141113, |
| "learning_rate": 3.0069438213175954e-06, |
| "loss": 0.61277876, |
| "memory(GiB)": 67.62, |
| "step": 6505, |
| "train_speed(iter/s)": 0.179594 |
| }, |
| { |
| "acc": 0.82125263, |
| "epoch": 4.477303988995874, |
| "grad_norm": 2.0833966732025146, |
| "learning_rate": 2.968285863172848e-06, |
| "loss": 0.59841776, |
| "memory(GiB)": 67.62, |
| "step": 6510, |
| "train_speed(iter/s)": 0.179377 |
| }, |
| { |
| "acc": 0.81880264, |
| "epoch": 4.480742778541953, |
| "grad_norm": 2.5484683513641357, |
| "learning_rate": 2.929871250569924e-06, |
| "loss": 0.59419332, |
| "memory(GiB)": 67.62, |
| "step": 6515, |
| "train_speed(iter/s)": 0.179133 |
| }, |
| { |
| "acc": 0.82751369, |
| "epoch": 4.484181568088033, |
| "grad_norm": 2.393644332885742, |
| "learning_rate": 2.891700182247734e-06, |
| "loss": 0.57184334, |
| "memory(GiB)": 67.62, |
| "step": 6520, |
| "train_speed(iter/s)": 0.17891 |
| }, |
| { |
| "acc": 0.81892633, |
| "epoch": 4.487620357634113, |
| "grad_norm": 2.44018292427063, |
| "learning_rate": 2.8537728556851844e-06, |
| "loss": 0.61149454, |
| "memory(GiB)": 67.62, |
| "step": 6525, |
| "train_speed(iter/s)": 0.178678 |
| }, |
| { |
| "acc": 0.82124205, |
| "epoch": 4.491059147180192, |
| "grad_norm": 2.0875890254974365, |
| "learning_rate": 2.8160894671001892e-06, |
| "loss": 0.5891263, |
| "memory(GiB)": 67.62, |
| "step": 6530, |
| "train_speed(iter/s)": 0.178449 |
| }, |
| { |
| "acc": 0.82609577, |
| "epoch": 4.4944979367262725, |
| "grad_norm": 2.057404041290283, |
| "learning_rate": 2.778650211448648e-06, |
| "loss": 0.56262321, |
| "memory(GiB)": 67.62, |
| "step": 6535, |
| "train_speed(iter/s)": 0.178257 |
| }, |
| { |
| "acc": 0.83202305, |
| "epoch": 4.497936726272352, |
| "grad_norm": 2.3149304389953613, |
| "learning_rate": 2.741455282423418e-06, |
| "loss": 0.55560713, |
| "memory(GiB)": 67.62, |
| "step": 6540, |
| "train_speed(iter/s)": 0.178079 |
| }, |
| { |
| "acc": 0.83527908, |
| "epoch": 4.501375515818432, |
| "grad_norm": 2.2315163612365723, |
| "learning_rate": 2.7045048724533295e-06, |
| "loss": 0.54867306, |
| "memory(GiB)": 67.62, |
| "step": 6545, |
| "train_speed(iter/s)": 0.177882 |
| }, |
| { |
| "acc": 0.82490063, |
| "epoch": 4.504814305364512, |
| "grad_norm": 2.0971333980560303, |
| "learning_rate": 2.667799172702211e-06, |
| "loss": 0.58073626, |
| "memory(GiB)": 67.62, |
| "step": 6550, |
| "train_speed(iter/s)": 0.177654 |
| }, |
| { |
| "acc": 0.82666264, |
| "epoch": 4.508253094910591, |
| "grad_norm": 2.328887701034546, |
| "learning_rate": 2.6313383730678536e-06, |
| "loss": 0.58351974, |
| "memory(GiB)": 67.62, |
| "step": 6555, |
| "train_speed(iter/s)": 0.177423 |
| }, |
| { |
| "acc": 0.81643009, |
| "epoch": 4.511691884456671, |
| "grad_norm": 2.3826959133148193, |
| "learning_rate": 2.5951226621810548e-06, |
| "loss": 0.60832229, |
| "memory(GiB)": 67.62, |
| "step": 6560, |
| "train_speed(iter/s)": 0.17721 |
| }, |
| { |
| "acc": 0.83378086, |
| "epoch": 4.5151306740027515, |
| "grad_norm": 2.135087490081787, |
| "learning_rate": 2.5591522274046416e-06, |
| "loss": 0.56533546, |
| "memory(GiB)": 67.62, |
| "step": 6565, |
| "train_speed(iter/s)": 0.177032 |
| }, |
| { |
| "acc": 0.83013229, |
| "epoch": 4.518569463548831, |
| "grad_norm": 2.335890054702759, |
| "learning_rate": 2.523427254832501e-06, |
| "loss": 0.55983028, |
| "memory(GiB)": 67.62, |
| "step": 6570, |
| "train_speed(iter/s)": 0.176797 |
| }, |
| { |
| "acc": 0.82724657, |
| "epoch": 4.522008253094911, |
| "grad_norm": 2.3773765563964844, |
| "learning_rate": 2.487947929288618e-06, |
| "loss": 0.57505946, |
| "memory(GiB)": 67.62, |
| "step": 6575, |
| "train_speed(iter/s)": 0.176609 |
| }, |
| { |
| "acc": 0.82321806, |
| "epoch": 4.52544704264099, |
| "grad_norm": 2.1447110176086426, |
| "learning_rate": 2.4527144343261097e-06, |
| "loss": 0.58117051, |
| "memory(GiB)": 67.62, |
| "step": 6580, |
| "train_speed(iter/s)": 0.176429 |
| }, |
| { |
| "acc": 0.81534252, |
| "epoch": 4.52888583218707, |
| "grad_norm": 2.3002796173095703, |
| "learning_rate": 2.417726952226283e-06, |
| "loss": 0.59847307, |
| "memory(GiB)": 67.62, |
| "step": 6585, |
| "train_speed(iter/s)": 0.176205 |
| }, |
| { |
| "acc": 0.83123646, |
| "epoch": 4.53232462173315, |
| "grad_norm": 2.134842872619629, |
| "learning_rate": 2.382985663997712e-06, |
| "loss": 0.56259084, |
| "memory(GiB)": 67.62, |
| "step": 6590, |
| "train_speed(iter/s)": 0.175983 |
| }, |
| { |
| "acc": 0.82430344, |
| "epoch": 4.5357634112792296, |
| "grad_norm": 2.316795825958252, |
| "learning_rate": 2.348490749375251e-06, |
| "loss": 0.57970629, |
| "memory(GiB)": 67.62, |
| "step": 6595, |
| "train_speed(iter/s)": 0.1758 |
| }, |
| { |
| "acc": 0.83597136, |
| "epoch": 4.53920220082531, |
| "grad_norm": 2.263073444366455, |
| "learning_rate": 2.3142423868191563e-06, |
| "loss": 0.54895492, |
| "memory(GiB)": 67.62, |
| "step": 6600, |
| "train_speed(iter/s)": 0.175618 |
| }, |
| { |
| "epoch": 4.53920220082531, |
| "eval_acc": 0.7819375084356863, |
| "eval_loss": 0.7933745980262756, |
| "eval_runtime": 1098.756, |
| "eval_samples_per_second": 3.898, |
| "eval_steps_per_second": 0.07, |
| "step": 6600 |
| }, |
| { |
| "acc": 0.832055, |
| "epoch": 4.542640990371389, |
| "grad_norm": 2.175189971923828, |
| "learning_rate": 2.2802407535141275e-06, |
| "loss": 0.56409612, |
| "memory(GiB)": 67.62, |
| "step": 6605, |
| "train_speed(iter/s)": 0.170456 |
| }, |
| { |
| "acc": 0.82646189, |
| "epoch": 4.546079779917469, |
| "grad_norm": 2.112194776535034, |
| "learning_rate": 2.246486025368418e-06, |
| "loss": 0.56891632, |
| "memory(GiB)": 67.62, |
| "step": 6610, |
| "train_speed(iter/s)": 0.170302 |
| }, |
| { |
| "acc": 0.82066345, |
| "epoch": 4.549518569463549, |
| "grad_norm": 2.304631233215332, |
| "learning_rate": 2.212978377012892e-06, |
| "loss": 0.60033989, |
| "memory(GiB)": 67.62, |
| "step": 6615, |
| "train_speed(iter/s)": 0.170106 |
| }, |
| { |
| "acc": 0.83084068, |
| "epoch": 4.552957359009628, |
| "grad_norm": 2.2651240825653076, |
| "learning_rate": 2.179717981800164e-06, |
| "loss": 0.55889602, |
| "memory(GiB)": 67.62, |
| "step": 6620, |
| "train_speed(iter/s)": 0.169961 |
| }, |
| { |
| "acc": 0.82767801, |
| "epoch": 4.5563961485557085, |
| "grad_norm": 2.218092918395996, |
| "learning_rate": 2.1467050118036613e-06, |
| "loss": 0.58023634, |
| "memory(GiB)": 67.62, |
| "step": 6625, |
| "train_speed(iter/s)": 0.1698 |
| }, |
| { |
| "acc": 0.82311954, |
| "epoch": 4.559834938101789, |
| "grad_norm": 2.081865072250366, |
| "learning_rate": 2.1139396378167637e-06, |
| "loss": 0.58637218, |
| "memory(GiB)": 67.62, |
| "step": 6630, |
| "train_speed(iter/s)": 0.169621 |
| }, |
| { |
| "acc": 0.82979736, |
| "epoch": 4.563273727647868, |
| "grad_norm": 2.2547144889831543, |
| "learning_rate": 2.08142202935188e-06, |
| "loss": 0.55914106, |
| "memory(GiB)": 67.62, |
| "step": 6635, |
| "train_speed(iter/s)": 0.169453 |
| }, |
| { |
| "acc": 0.82038078, |
| "epoch": 4.566712517193948, |
| "grad_norm": 2.181720495223999, |
| "learning_rate": 2.0491523546396466e-06, |
| "loss": 0.59662962, |
| "memory(GiB)": 67.62, |
| "step": 6640, |
| "train_speed(iter/s)": 0.169266 |
| }, |
| { |
| "acc": 0.8245801, |
| "epoch": 4.570151306740027, |
| "grad_norm": 2.330573558807373, |
| "learning_rate": 2.01713078062797e-06, |
| "loss": 0.58751688, |
| "memory(GiB)": 67.62, |
| "step": 6645, |
| "train_speed(iter/s)": 0.169123 |
| }, |
| { |
| "acc": 0.83173065, |
| "epoch": 4.573590096286107, |
| "grad_norm": 2.1227643489837646, |
| "learning_rate": 1.9853574729812123e-06, |
| "loss": 0.54269109, |
| "memory(GiB)": 67.62, |
| "step": 6650, |
| "train_speed(iter/s)": 0.168939 |
| }, |
| { |
| "acc": 0.83502407, |
| "epoch": 4.577028885832187, |
| "grad_norm": 2.232192277908325, |
| "learning_rate": 1.953832596079319e-06, |
| "loss": 0.5437376, |
| "memory(GiB)": 67.62, |
| "step": 6655, |
| "train_speed(iter/s)": 0.168764 |
| }, |
| { |
| "acc": 0.83107376, |
| "epoch": 4.580467675378267, |
| "grad_norm": 2.2640929222106934, |
| "learning_rate": 1.9225563130169875e-06, |
| "loss": 0.54885445, |
| "memory(GiB)": 67.62, |
| "step": 6660, |
| "train_speed(iter/s)": 0.168622 |
| }, |
| { |
| "acc": 0.83116302, |
| "epoch": 4.583906464924347, |
| "grad_norm": 2.4255106449127197, |
| "learning_rate": 1.8915287856027996e-06, |
| "loss": 0.57933769, |
| "memory(GiB)": 67.62, |
| "step": 6665, |
| "train_speed(iter/s)": 0.168435 |
| }, |
| { |
| "acc": 0.83079157, |
| "epoch": 4.587345254470426, |
| "grad_norm": 2.252610445022583, |
| "learning_rate": 1.8607501743583902e-06, |
| "loss": 0.57562494, |
| "memory(GiB)": 67.62, |
| "step": 6670, |
| "train_speed(iter/s)": 0.168263 |
| }, |
| { |
| "acc": 0.82178955, |
| "epoch": 4.590784044016506, |
| "grad_norm": 2.378258466720581, |
| "learning_rate": 1.8302206385176258e-06, |
| "loss": 0.59762077, |
| "memory(GiB)": 67.62, |
| "step": 6675, |
| "train_speed(iter/s)": 0.168089 |
| }, |
| { |
| "acc": 0.83059864, |
| "epoch": 4.594222833562586, |
| "grad_norm": 2.4089572429656982, |
| "learning_rate": 1.7999403360257766e-06, |
| "loss": 0.57908206, |
| "memory(GiB)": 67.62, |
| "step": 6680, |
| "train_speed(iter/s)": 0.167941 |
| }, |
| { |
| "acc": 0.82545843, |
| "epoch": 4.5976616231086656, |
| "grad_norm": 2.556912660598755, |
| "learning_rate": 1.7699094235386956e-06, |
| "loss": 0.5731123, |
| "memory(GiB)": 67.62, |
| "step": 6685, |
| "train_speed(iter/s)": 0.167785 |
| }, |
| { |
| "acc": 0.83477535, |
| "epoch": 4.601100412654746, |
| "grad_norm": 2.144914150238037, |
| "learning_rate": 1.7401280564220138e-06, |
| "loss": 0.54660711, |
| "memory(GiB)": 67.62, |
| "step": 6690, |
| "train_speed(iter/s)": 0.167611 |
| }, |
| { |
| "acc": 0.82835121, |
| "epoch": 4.604539202200826, |
| "grad_norm": 2.0818796157836914, |
| "learning_rate": 1.7105963887503236e-06, |
| "loss": 0.57266307, |
| "memory(GiB)": 67.62, |
| "step": 6695, |
| "train_speed(iter/s)": 0.167452 |
| }, |
| { |
| "acc": 0.82310772, |
| "epoch": 4.607977991746905, |
| "grad_norm": 2.37752366065979, |
| "learning_rate": 1.6813145733064094e-06, |
| "loss": 0.5846642, |
| "memory(GiB)": 67.62, |
| "step": 6700, |
| "train_speed(iter/s)": 0.167295 |
| }, |
| { |
| "epoch": 4.607977991746905, |
| "eval_acc": 0.7817238043820579, |
| "eval_loss": 0.7931625843048096, |
| "eval_runtime": 1146.6195, |
| "eval_samples_per_second": 3.735, |
| "eval_steps_per_second": 0.067, |
| "step": 6700 |
| }, |
| { |
| "acc": 0.82920761, |
| "epoch": 4.611416781292985, |
| "grad_norm": 2.6180896759033203, |
| "learning_rate": 1.6522827615804277e-06, |
| "loss": 0.55708656, |
| "memory(GiB)": 67.62, |
| "step": 6705, |
| "train_speed(iter/s)": 0.162482 |
| }, |
| { |
| "acc": 0.82743568, |
| "epoch": 4.614855570839064, |
| "grad_norm": 2.1857407093048096, |
| "learning_rate": 1.6235011037691344e-06, |
| "loss": 0.58240447, |
| "memory(GiB)": 67.62, |
| "step": 6710, |
| "train_speed(iter/s)": 0.16232 |
| }, |
| { |
| "acc": 0.81718578, |
| "epoch": 4.6182943603851445, |
| "grad_norm": 2.2875170707702637, |
| "learning_rate": 1.5949697487751052e-06, |
| "loss": 0.61164322, |
| "memory(GiB)": 67.62, |
| "step": 6715, |
| "train_speed(iter/s)": 0.162187 |
| }, |
| { |
| "acc": 0.82232466, |
| "epoch": 4.621733149931224, |
| "grad_norm": 2.1736197471618652, |
| "learning_rate": 1.5666888442059804e-06, |
| "loss": 0.58460808, |
| "memory(GiB)": 67.62, |
| "step": 6720, |
| "train_speed(iter/s)": 0.162055 |
| }, |
| { |
| "acc": 0.82449484, |
| "epoch": 4.625171939477304, |
| "grad_norm": 2.126422643661499, |
| "learning_rate": 1.538658536373673e-06, |
| "loss": 0.57822762, |
| "memory(GiB)": 67.62, |
| "step": 6725, |
| "train_speed(iter/s)": 0.161946 |
| }, |
| { |
| "acc": 0.82507849, |
| "epoch": 4.628610729023384, |
| "grad_norm": 2.2693231105804443, |
| "learning_rate": 1.5108789702936455e-06, |
| "loss": 0.57952757, |
| "memory(GiB)": 67.62, |
| "step": 6730, |
| "train_speed(iter/s)": 0.161804 |
| }, |
| { |
| "acc": 0.8332633, |
| "epoch": 4.632049518569463, |
| "grad_norm": 2.1562063694000244, |
| "learning_rate": 1.4833502896841289e-06, |
| "loss": 0.55239053, |
| "memory(GiB)": 67.62, |
| "step": 6735, |
| "train_speed(iter/s)": 0.161675 |
| }, |
| { |
| "acc": 0.82784958, |
| "epoch": 4.635488308115543, |
| "grad_norm": 2.1214349269866943, |
| "learning_rate": 1.456072636965399e-06, |
| "loss": 0.5708005, |
| "memory(GiB)": 67.62, |
| "step": 6740, |
| "train_speed(iter/s)": 0.161537 |
| }, |
| { |
| "acc": 0.8265028, |
| "epoch": 4.6389270976616235, |
| "grad_norm": 2.2998435497283936, |
| "learning_rate": 1.4290461532590343e-06, |
| "loss": 0.58597693, |
| "memory(GiB)": 67.62, |
| "step": 6745, |
| "train_speed(iter/s)": 0.161423 |
| }, |
| { |
| "acc": 0.82159843, |
| "epoch": 4.642365887207703, |
| "grad_norm": 2.096148729324341, |
| "learning_rate": 1.4022709783871718e-06, |
| "loss": 0.60574317, |
| "memory(GiB)": 67.62, |
| "step": 6750, |
| "train_speed(iter/s)": 0.161279 |
| }, |
| { |
| "acc": 0.82948322, |
| "epoch": 4.645804676753783, |
| "grad_norm": 1.9622774124145508, |
| "learning_rate": 1.375747250871807e-06, |
| "loss": 0.57297769, |
| "memory(GiB)": 67.62, |
| "step": 6755, |
| "train_speed(iter/s)": 0.161116 |
| }, |
| { |
| "acc": 0.82648077, |
| "epoch": 4.649243466299862, |
| "grad_norm": 2.2610554695129395, |
| "learning_rate": 1.3494751079340738e-06, |
| "loss": 0.56792774, |
| "memory(GiB)": 67.62, |
| "step": 6760, |
| "train_speed(iter/s)": 0.16096 |
| }, |
| { |
| "acc": 0.82656231, |
| "epoch": 4.652682255845942, |
| "grad_norm": 2.134491205215454, |
| "learning_rate": 1.3234546854935154e-06, |
| "loss": 0.56553001, |
| "memory(GiB)": 67.62, |
| "step": 6765, |
| "train_speed(iter/s)": 0.160816 |
| }, |
| { |
| "acc": 0.83355551, |
| "epoch": 4.656121045392022, |
| "grad_norm": 2.2637131214141846, |
| "learning_rate": 1.2976861181673923e-06, |
| "loss": 0.55729747, |
| "memory(GiB)": 67.62, |
| "step": 6770, |
| "train_speed(iter/s)": 0.160707 |
| }, |
| { |
| "acc": 0.83004456, |
| "epoch": 4.6595598349381016, |
| "grad_norm": 2.241671323776245, |
| "learning_rate": 1.2721695392699869e-06, |
| "loss": 0.55024014, |
| "memory(GiB)": 67.62, |
| "step": 6775, |
| "train_speed(iter/s)": 0.16056 |
| }, |
| { |
| "acc": 0.82354479, |
| "epoch": 4.662998624484182, |
| "grad_norm": 2.196913480758667, |
| "learning_rate": 1.2469050808119282e-06, |
| "loss": 0.57635975, |
| "memory(GiB)": 67.62, |
| "step": 6780, |
| "train_speed(iter/s)": 0.160424 |
| }, |
| { |
| "acc": 0.814569, |
| "epoch": 4.666437414030261, |
| "grad_norm": 2.4140119552612305, |
| "learning_rate": 1.221892873499479e-06, |
| "loss": 0.61613665, |
| "memory(GiB)": 67.62, |
| "step": 6785, |
| "train_speed(iter/s)": 0.160253 |
| }, |
| { |
| "acc": 0.83262882, |
| "epoch": 4.669876203576341, |
| "grad_norm": 2.239264726638794, |
| "learning_rate": 1.1971330467338833e-06, |
| "loss": 0.55864224, |
| "memory(GiB)": 67.62, |
| "step": 6790, |
| "train_speed(iter/s)": 0.1601 |
| }, |
| { |
| "acc": 0.82022276, |
| "epoch": 4.673314993122421, |
| "grad_norm": 2.135786771774292, |
| "learning_rate": 1.172625728610676e-06, |
| "loss": 0.58857613, |
| "memory(GiB)": 67.62, |
| "step": 6795, |
| "train_speed(iter/s)": 0.15997 |
| }, |
| { |
| "acc": 0.83236532, |
| "epoch": 4.6767537826685, |
| "grad_norm": 1.979997992515564, |
| "learning_rate": 1.1483710459190515e-06, |
| "loss": 0.56562681, |
| "memory(GiB)": 67.62, |
| "step": 6800, |
| "train_speed(iter/s)": 0.159825 |
| }, |
| { |
| "epoch": 4.6767537826685, |
| "eval_acc": 0.7819093894812615, |
| "eval_loss": 0.7931298613548279, |
| "eval_runtime": 1157.2244, |
| "eval_samples_per_second": 3.701, |
| "eval_steps_per_second": 0.067, |
| "step": 6800 |
| }, |
| { |
| "acc": 0.8271327, |
| "epoch": 4.6801925722145805, |
| "grad_norm": 2.349480152130127, |
| "learning_rate": 1.1243691241411644e-06, |
| "loss": 0.58665218, |
| "memory(GiB)": 67.62, |
| "step": 6805, |
| "train_speed(iter/s)": 0.155462 |
| }, |
| { |
| "acc": 0.83063755, |
| "epoch": 4.683631361760661, |
| "grad_norm": 2.1535379886627197, |
| "learning_rate": 1.1006200874515338e-06, |
| "loss": 0.55733638, |
| "memory(GiB)": 67.62, |
| "step": 6810, |
| "train_speed(iter/s)": 0.155314 |
| }, |
| { |
| "acc": 0.81677713, |
| "epoch": 4.68707015130674, |
| "grad_norm": 2.1077511310577393, |
| "learning_rate": 1.0771240587163464e-06, |
| "loss": 0.60006194, |
| "memory(GiB)": 67.62, |
| "step": 6815, |
| "train_speed(iter/s)": 0.155164 |
| }, |
| { |
| "acc": 0.83417349, |
| "epoch": 4.69050894085282, |
| "grad_norm": 2.45220685005188, |
| "learning_rate": 1.0538811594928607e-06, |
| "loss": 0.53521776, |
| "memory(GiB)": 67.62, |
| "step": 6820, |
| "train_speed(iter/s)": 0.155057 |
| }, |
| { |
| "acc": 0.82799282, |
| "epoch": 4.693947730398899, |
| "grad_norm": 2.1742374897003174, |
| "learning_rate": 1.0308915100287642e-06, |
| "loss": 0.56440144, |
| "memory(GiB)": 67.62, |
| "step": 6825, |
| "train_speed(iter/s)": 0.154917 |
| }, |
| { |
| "acc": 0.83087101, |
| "epoch": 4.697386519944979, |
| "grad_norm": 2.1993463039398193, |
| "learning_rate": 1.0081552292615454e-06, |
| "loss": 0.5529726, |
| "memory(GiB)": 67.62, |
| "step": 6830, |
| "train_speed(iter/s)": 0.154819 |
| }, |
| { |
| "acc": 0.83782015, |
| "epoch": 4.7008253094910595, |
| "grad_norm": 2.260230541229248, |
| "learning_rate": 9.856724348178841e-07, |
| "loss": 0.53974109, |
| "memory(GiB)": 67.62, |
| "step": 6835, |
| "train_speed(iter/s)": 0.154699 |
| }, |
| { |
| "acc": 0.83221836, |
| "epoch": 4.704264099037139, |
| "grad_norm": 2.035860061645508, |
| "learning_rate": 9.634432430130399e-07, |
| "loss": 0.54515915, |
| "memory(GiB)": 67.62, |
| "step": 6840, |
| "train_speed(iter/s)": 0.154586 |
| }, |
| { |
| "acc": 0.82770882, |
| "epoch": 4.707702888583219, |
| "grad_norm": 2.026685953140259, |
| "learning_rate": 9.414677688502594e-07, |
| "loss": 0.5836278, |
| "memory(GiB)": 67.62, |
| "step": 6845, |
| "train_speed(iter/s)": 0.154451 |
| }, |
| { |
| "acc": 0.82769499, |
| "epoch": 4.711141678129298, |
| "grad_norm": 2.1812551021575928, |
| "learning_rate": 9.1974612602017e-07, |
| "loss": 0.57010379, |
| "memory(GiB)": 67.62, |
| "step": 6850, |
| "train_speed(iter/s)": 0.154322 |
| }, |
| { |
| "acc": 0.81980133, |
| "epoch": 4.714580467675378, |
| "grad_norm": 2.3447399139404297, |
| "learning_rate": 8.982784269002089e-07, |
| "loss": 0.59749265, |
| "memory(GiB)": 67.62, |
| "step": 6855, |
| "train_speed(iter/s)": 0.154209 |
| }, |
| { |
| "acc": 0.83671551, |
| "epoch": 4.718019257221458, |
| "grad_norm": 2.019040107727051, |
| "learning_rate": 8.770647825540072e-07, |
| "loss": 0.5339366, |
| "memory(GiB)": 67.62, |
| "step": 6860, |
| "train_speed(iter/s)": 0.154098 |
| }, |
| { |
| "acc": 0.83358383, |
| "epoch": 4.7214580467675376, |
| "grad_norm": 2.4504003524780273, |
| "learning_rate": 8.561053027308616e-07, |
| "loss": 0.54877663, |
| "memory(GiB)": 67.62, |
| "step": 6865, |
| "train_speed(iter/s)": 0.153941 |
| }, |
| { |
| "acc": 0.82203579, |
| "epoch": 4.724896836313618, |
| "grad_norm": 2.2956948280334473, |
| "learning_rate": 8.354000958651198e-07, |
| "loss": 0.58671484, |
| "memory(GiB)": 67.62, |
| "step": 6870, |
| "train_speed(iter/s)": 0.153816 |
| }, |
| { |
| "acc": 0.82069569, |
| "epoch": 4.728335625859698, |
| "grad_norm": 2.3851406574249268, |
| "learning_rate": 8.149492690756679e-07, |
| "loss": 0.58018303, |
| "memory(GiB)": 67.62, |
| "step": 6875, |
| "train_speed(iter/s)": 0.153716 |
| }, |
| { |
| "acc": 0.82189007, |
| "epoch": 4.731774415405777, |
| "grad_norm": 2.3761680126190186, |
| "learning_rate": 7.947529281653329e-07, |
| "loss": 0.5802557, |
| "memory(GiB)": 67.62, |
| "step": 6880, |
| "train_speed(iter/s)": 0.153572 |
| }, |
| { |
| "acc": 0.8201951, |
| "epoch": 4.735213204951857, |
| "grad_norm": 2.3680715560913086, |
| "learning_rate": 7.748111776203488e-07, |
| "loss": 0.5941371, |
| "memory(GiB)": 67.62, |
| "step": 6885, |
| "train_speed(iter/s)": 0.153396 |
| }, |
| { |
| "acc": 0.83601265, |
| "epoch": 4.738651994497936, |
| "grad_norm": 2.2949132919311523, |
| "learning_rate": 7.551241206098402e-07, |
| "loss": 0.54753556, |
| "memory(GiB)": 67.62, |
| "step": 6890, |
| "train_speed(iter/s)": 0.153255 |
| }, |
| { |
| "acc": 0.82891521, |
| "epoch": 4.7420907840440165, |
| "grad_norm": 2.6076362133026123, |
| "learning_rate": 7.356918589852512e-07, |
| "loss": 0.56754522, |
| "memory(GiB)": 67.62, |
| "step": 6895, |
| "train_speed(iter/s)": 0.153143 |
| }, |
| { |
| "acc": 0.82609663, |
| "epoch": 4.745529573590097, |
| "grad_norm": 2.297222852706909, |
| "learning_rate": 7.165144932798456e-07, |
| "loss": 0.56647487, |
| "memory(GiB)": 67.62, |
| "step": 6900, |
| "train_speed(iter/s)": 0.153005 |
| }, |
| { |
| "epoch": 4.745529573590097, |
| "eval_acc": 0.7816394475187834, |
| "eval_loss": 0.7943344116210938, |
| "eval_runtime": 1104.7871, |
| "eval_samples_per_second": 3.877, |
| "eval_steps_per_second": 0.07, |
| "step": 6900 |
| }, |
| { |
| "acc": 0.83013258, |
| "epoch": 4.748968363136176, |
| "grad_norm": 2.427417755126953, |
| "learning_rate": 6.975921227081685e-07, |
| "loss": 0.55977812, |
| "memory(GiB)": 67.62, |
| "step": 6905, |
| "train_speed(iter/s)": 0.149232 |
| }, |
| { |
| "acc": 0.8199255, |
| "epoch": 4.752407152682256, |
| "grad_norm": 2.2759101390838623, |
| "learning_rate": 6.789248451655523e-07, |
| "loss": 0.58387136, |
| "memory(GiB)": 67.62, |
| "step": 6910, |
| "train_speed(iter/s)": 0.149131 |
| }, |
| { |
| "acc": 0.82206144, |
| "epoch": 4.755845942228335, |
| "grad_norm": 2.231541395187378, |
| "learning_rate": 6.605127572275894e-07, |
| "loss": 0.59709778, |
| "memory(GiB)": 67.62, |
| "step": 6915, |
| "train_speed(iter/s)": 0.149024 |
| }, |
| { |
| "acc": 0.8175106, |
| "epoch": 4.759284731774415, |
| "grad_norm": 2.4362175464630127, |
| "learning_rate": 6.423559541496492e-07, |
| "loss": 0.6127625, |
| "memory(GiB)": 67.62, |
| "step": 6920, |
| "train_speed(iter/s)": 0.148912 |
| }, |
| { |
| "acc": 0.83411427, |
| "epoch": 4.7627235213204955, |
| "grad_norm": 2.0732574462890625, |
| "learning_rate": 6.244545298663843e-07, |
| "loss": 0.54563398, |
| "memory(GiB)": 67.62, |
| "step": 6925, |
| "train_speed(iter/s)": 0.148809 |
| }, |
| { |
| "acc": 0.8238575, |
| "epoch": 4.766162310866575, |
| "grad_norm": 2.174506187438965, |
| "learning_rate": 6.068085769912308e-07, |
| "loss": 0.58828888, |
| "memory(GiB)": 67.62, |
| "step": 6930, |
| "train_speed(iter/s)": 0.148728 |
| }, |
| { |
| "acc": 0.82762337, |
| "epoch": 4.769601100412655, |
| "grad_norm": 2.551449775695801, |
| "learning_rate": 5.894181868159313e-07, |
| "loss": 0.57614126, |
| "memory(GiB)": 67.62, |
| "step": 6935, |
| "train_speed(iter/s)": 0.148607 |
| }, |
| { |
| "acc": 0.82847862, |
| "epoch": 4.773039889958735, |
| "grad_norm": 2.242396354675293, |
| "learning_rate": 5.722834493100845e-07, |
| "loss": 0.58625593, |
| "memory(GiB)": 67.62, |
| "step": 6940, |
| "train_speed(iter/s)": 0.148523 |
| }, |
| { |
| "acc": 0.83427067, |
| "epoch": 4.776478679504814, |
| "grad_norm": 2.2920279502868652, |
| "learning_rate": 5.554044531206463e-07, |
| "loss": 0.55577106, |
| "memory(GiB)": 67.62, |
| "step": 6945, |
| "train_speed(iter/s)": 0.148434 |
| }, |
| { |
| "acc": 0.82505064, |
| "epoch": 4.779917469050894, |
| "grad_norm": 2.4490933418273926, |
| "learning_rate": 5.387812855715081e-07, |
| "loss": 0.57476597, |
| "memory(GiB)": 67.62, |
| "step": 6950, |
| "train_speed(iter/s)": 0.148301 |
| }, |
| { |
| "acc": 0.82180548, |
| "epoch": 4.7833562585969736, |
| "grad_norm": 2.4874212741851807, |
| "learning_rate": 5.224140326630133e-07, |
| "loss": 0.59430389, |
| "memory(GiB)": 67.62, |
| "step": 6955, |
| "train_speed(iter/s)": 0.148156 |
| }, |
| { |
| "acc": 0.81490593, |
| "epoch": 4.786795048143054, |
| "grad_norm": 2.1581063270568848, |
| "learning_rate": 5.063027790715248e-07, |
| "loss": 0.60423484, |
| "memory(GiB)": 67.62, |
| "step": 6960, |
| "train_speed(iter/s)": 0.148057 |
| }, |
| { |
| "acc": 0.82663193, |
| "epoch": 4.790233837689134, |
| "grad_norm": 2.1210756301879883, |
| "learning_rate": 4.904476081489975e-07, |
| "loss": 0.56228495, |
| "memory(GiB)": 67.62, |
| "step": 6965, |
| "train_speed(iter/s)": 0.147956 |
| }, |
| { |
| "acc": 0.83753424, |
| "epoch": 4.793672627235213, |
| "grad_norm": 2.065978527069092, |
| "learning_rate": 4.7484860192252317e-07, |
| "loss": 0.53960943, |
| "memory(GiB)": 67.62, |
| "step": 6970, |
| "train_speed(iter/s)": 0.147817 |
| }, |
| { |
| "acc": 0.83375235, |
| "epoch": 4.797111416781293, |
| "grad_norm": 2.488433837890625, |
| "learning_rate": 4.595058410939305e-07, |
| "loss": 0.55561361, |
| "memory(GiB)": 67.62, |
| "step": 6975, |
| "train_speed(iter/s)": 0.147709 |
| }, |
| { |
| "acc": 0.82256441, |
| "epoch": 4.800550206327372, |
| "grad_norm": 2.134580135345459, |
| "learning_rate": 4.4441940503934173e-07, |
| "loss": 0.59003773, |
| "memory(GiB)": 67.62, |
| "step": 6980, |
| "train_speed(iter/s)": 0.147603 |
| }, |
| { |
| "acc": 0.82549543, |
| "epoch": 4.8039889958734525, |
| "grad_norm": 2.2374000549316406, |
| "learning_rate": 4.295893718088e-07, |
| "loss": 0.57104192, |
| "memory(GiB)": 67.62, |
| "step": 6985, |
| "train_speed(iter/s)": 0.147515 |
| }, |
| { |
| "acc": 0.83225937, |
| "epoch": 4.807427785419533, |
| "grad_norm": 1.948536992073059, |
| "learning_rate": 4.150158181258259e-07, |
| "loss": 0.55912457, |
| "memory(GiB)": 67.62, |
| "step": 6990, |
| "train_speed(iter/s)": 0.147431 |
| }, |
| { |
| "acc": 0.81516037, |
| "epoch": 4.810866574965612, |
| "grad_norm": 2.495556354522705, |
| "learning_rate": 4.0069881938703406e-07, |
| "loss": 0.59933119, |
| "memory(GiB)": 67.62, |
| "step": 6995, |
| "train_speed(iter/s)": 0.147319 |
| }, |
| { |
| "acc": 0.82695866, |
| "epoch": 4.814305364511692, |
| "grad_norm": 2.299910545349121, |
| "learning_rate": 3.866384496617616e-07, |
| "loss": 0.58013859, |
| "memory(GiB)": 67.62, |
| "step": 7000, |
| "train_speed(iter/s)": 0.147213 |
| }, |
| { |
| "epoch": 4.814305364511692, |
| "eval_acc": 0.7819543798083413, |
| "eval_loss": 0.793637216091156, |
| "eval_runtime": 1150.3821, |
| "eval_samples_per_second": 3.723, |
| "eval_steps_per_second": 0.067, |
| "step": 7000 |
| }, |
| { |
| "acc": 0.82657938, |
| "epoch": 4.817744154057772, |
| "grad_norm": 2.2365365028381348, |
| "learning_rate": 3.7283478169165165e-07, |
| "loss": 0.59894753, |
| "memory(GiB)": 67.62, |
| "step": 7005, |
| "train_speed(iter/s)": 0.143635 |
| }, |
| { |
| "acc": 0.82827587, |
| "epoch": 4.821182943603851, |
| "grad_norm": 2.287341833114624, |
| "learning_rate": 3.592878868903036e-07, |
| "loss": 0.56538892, |
| "memory(GiB)": 67.62, |
| "step": 7010, |
| "train_speed(iter/s)": 0.143557 |
| }, |
| { |
| "acc": 0.82892952, |
| "epoch": 4.8246217331499315, |
| "grad_norm": 2.37528920173645, |
| "learning_rate": 3.459978353429071e-07, |
| "loss": 0.56618586, |
| "memory(GiB)": 67.62, |
| "step": 7015, |
| "train_speed(iter/s)": 0.143484 |
| }, |
| { |
| "acc": 0.82298727, |
| "epoch": 4.828060522696011, |
| "grad_norm": 2.1354215145111084, |
| "learning_rate": 3.3296469580584186e-07, |
| "loss": 0.58705649, |
| "memory(GiB)": 67.62, |
| "step": 7020, |
| "train_speed(iter/s)": 0.143374 |
| }, |
| { |
| "acc": 0.81914625, |
| "epoch": 4.831499312242091, |
| "grad_norm": 2.2157156467437744, |
| "learning_rate": 3.201885357063674e-07, |
| "loss": 0.60606232, |
| "memory(GiB)": 67.62, |
| "step": 7025, |
| "train_speed(iter/s)": 0.143299 |
| }, |
| { |
| "acc": 0.82481365, |
| "epoch": 4.83493810178817, |
| "grad_norm": 2.350295066833496, |
| "learning_rate": 3.076694211422452e-07, |
| "loss": 0.59341784, |
| "memory(GiB)": 67.62, |
| "step": 7030, |
| "train_speed(iter/s)": 0.14321 |
| }, |
| { |
| "acc": 0.8327177, |
| "epoch": 4.83837689133425, |
| "grad_norm": 2.483370304107666, |
| "learning_rate": 2.954074168814115e-07, |
| "loss": 0.57141585, |
| "memory(GiB)": 67.62, |
| "step": 7035, |
| "train_speed(iter/s)": 0.1431 |
| }, |
| { |
| "acc": 0.82273092, |
| "epoch": 4.84181568088033, |
| "grad_norm": 2.237597942352295, |
| "learning_rate": 2.8340258636162734e-07, |
| "loss": 0.59980655, |
| "memory(GiB)": 67.62, |
| "step": 7040, |
| "train_speed(iter/s)": 0.142991 |
| }, |
| { |
| "acc": 0.81792231, |
| "epoch": 4.8452544704264096, |
| "grad_norm": 2.165174961090088, |
| "learning_rate": 2.716549916901624e-07, |
| "loss": 0.59414587, |
| "memory(GiB)": 67.62, |
| "step": 7045, |
| "train_speed(iter/s)": 0.142869 |
| }, |
| { |
| "acc": 0.83234692, |
| "epoch": 4.84869325997249, |
| "grad_norm": 2.2582786083221436, |
| "learning_rate": 2.601646936434731e-07, |
| "loss": 0.56242762, |
| "memory(GiB)": 67.62, |
| "step": 7050, |
| "train_speed(iter/s)": 0.142752 |
| }, |
| { |
| "acc": 0.82207642, |
| "epoch": 4.85213204951857, |
| "grad_norm": 2.386744737625122, |
| "learning_rate": 2.4893175166689693e-07, |
| "loss": 0.5899931, |
| "memory(GiB)": 67.62, |
| "step": 7055, |
| "train_speed(iter/s)": 0.142651 |
| }, |
| { |
| "acc": 0.83866978, |
| "epoch": 4.855570839064649, |
| "grad_norm": 2.4052698612213135, |
| "learning_rate": 2.3795622387430887e-07, |
| "loss": 0.52610168, |
| "memory(GiB)": 67.62, |
| "step": 7060, |
| "train_speed(iter/s)": 0.14255 |
| }, |
| { |
| "acc": 0.81752338, |
| "epoch": 4.859009628610729, |
| "grad_norm": 2.0953776836395264, |
| "learning_rate": 2.272381670478657e-07, |
| "loss": 0.60933762, |
| "memory(GiB)": 67.62, |
| "step": 7065, |
| "train_speed(iter/s)": 0.142448 |
| }, |
| { |
| "acc": 0.82258358, |
| "epoch": 4.862448418156809, |
| "grad_norm": 2.4188003540039062, |
| "learning_rate": 2.1677763663768406e-07, |
| "loss": 0.58760223, |
| "memory(GiB)": 67.62, |
| "step": 7070, |
| "train_speed(iter/s)": 0.142309 |
| }, |
| { |
| "acc": 0.82635889, |
| "epoch": 4.8658872077028885, |
| "grad_norm": 2.2215888500213623, |
| "learning_rate": 2.0657468676155762e-07, |
| "loss": 0.58528147, |
| "memory(GiB)": 67.62, |
| "step": 7075, |
| "train_speed(iter/s)": 0.142201 |
| }, |
| { |
| "acc": 0.84418049, |
| "epoch": 4.869325997248969, |
| "grad_norm": 2.0003366470336914, |
| "learning_rate": 1.9662937020469589e-07, |
| "loss": 0.51888628, |
| "memory(GiB)": 67.62, |
| "step": 7080, |
| "train_speed(iter/s)": 0.142127 |
| }, |
| { |
| "acc": 0.83303547, |
| "epoch": 4.872764786795048, |
| "grad_norm": 2.1851377487182617, |
| "learning_rate": 1.8694173841941928e-07, |
| "loss": 0.55756779, |
| "memory(GiB)": 67.62, |
| "step": 7085, |
| "train_speed(iter/s)": 0.142023 |
| }, |
| { |
| "acc": 0.82351046, |
| "epoch": 4.876203576341128, |
| "grad_norm": 2.3260505199432373, |
| "learning_rate": 1.775118415249201e-07, |
| "loss": 0.58764186, |
| "memory(GiB)": 67.62, |
| "step": 7090, |
| "train_speed(iter/s)": 0.141935 |
| }, |
| { |
| "acc": 0.82887058, |
| "epoch": 4.879642365887207, |
| "grad_norm": 2.2045719623565674, |
| "learning_rate": 1.6833972830699635e-07, |
| "loss": 0.56427956, |
| "memory(GiB)": 67.62, |
| "step": 7095, |
| "train_speed(iter/s)": 0.141825 |
| }, |
| { |
| "acc": 0.81958294, |
| "epoch": 4.883081155433287, |
| "grad_norm": 2.245159149169922, |
| "learning_rate": 1.5942544621777965e-07, |
| "loss": 0.60630999, |
| "memory(GiB)": 67.62, |
| "step": 7100, |
| "train_speed(iter/s)": 0.141715 |
| }, |
| { |
| "epoch": 4.883081155433287, |
| "eval_acc": 0.7818137850362172, |
| "eval_loss": 0.7931898832321167, |
| "eval_runtime": 1152.8663, |
| "eval_samples_per_second": 3.715, |
| "eval_steps_per_second": 0.067, |
| "step": 7100 |
| }, |
| { |
| "acc": 0.83190765, |
| "epoch": 4.8865199449793675, |
| "grad_norm": 2.2760040760040283, |
| "learning_rate": 1.507690413755244e-07, |
| "loss": 0.56932721, |
| "memory(GiB)": 67.62, |
| "step": 7105, |
| "train_speed(iter/s)": 0.138438 |
| }, |
| { |
| "acc": 0.82073574, |
| "epoch": 4.889958734525447, |
| "grad_norm": 2.2572543621063232, |
| "learning_rate": 1.423705585643412e-07, |
| "loss": 0.59770269, |
| "memory(GiB)": 67.62, |
| "step": 7110, |
| "train_speed(iter/s)": 0.138349 |
| }, |
| { |
| "acc": 0.82008648, |
| "epoch": 4.893397524071527, |
| "grad_norm": 2.431645631790161, |
| "learning_rate": 1.342300412339805e-07, |
| "loss": 0.60884895, |
| "memory(GiB)": 67.62, |
| "step": 7115, |
| "train_speed(iter/s)": 0.138261 |
| }, |
| { |
| "acc": 0.83563404, |
| "epoch": 4.896836313617607, |
| "grad_norm": 2.210167646408081, |
| "learning_rate": 1.2634753149959394e-07, |
| "loss": 0.55552473, |
| "memory(GiB)": 67.62, |
| "step": 7120, |
| "train_speed(iter/s)": 0.138175 |
| }, |
| { |
| "acc": 0.83866234, |
| "epoch": 4.900275103163686, |
| "grad_norm": 2.1584184169769287, |
| "learning_rate": 1.1872307014153448e-07, |
| "loss": 0.5373682, |
| "memory(GiB)": 67.62, |
| "step": 7125, |
| "train_speed(iter/s)": 0.138058 |
| }, |
| { |
| "acc": 0.83324118, |
| "epoch": 4.903713892709766, |
| "grad_norm": 2.51465106010437, |
| "learning_rate": 1.1135669660512879e-07, |
| "loss": 0.54701567, |
| "memory(GiB)": 67.62, |
| "step": 7130, |
| "train_speed(iter/s)": 0.137984 |
| }, |
| { |
| "acc": 0.84189644, |
| "epoch": 4.9071526822558456, |
| "grad_norm": 2.2430858612060547, |
| "learning_rate": 1.0424844900048863e-07, |
| "loss": 0.52747626, |
| "memory(GiB)": 67.62, |
| "step": 7135, |
| "train_speed(iter/s)": 0.137875 |
| }, |
| { |
| "acc": 0.82826939, |
| "epoch": 4.910591471801926, |
| "grad_norm": 2.468977451324463, |
| "learning_rate": 9.739836410229431e-08, |
| "loss": 0.56382651, |
| "memory(GiB)": 67.62, |
| "step": 7140, |
| "train_speed(iter/s)": 0.137804 |
| }, |
| { |
| "acc": 0.82850714, |
| "epoch": 4.914030261348006, |
| "grad_norm": 2.1959378719329834, |
| "learning_rate": 9.080647734961705e-08, |
| "loss": 0.5642982, |
| "memory(GiB)": 67.62, |
| "step": 7145, |
| "train_speed(iter/s)": 0.137706 |
| }, |
| { |
| "acc": 0.80990505, |
| "epoch": 4.917469050894085, |
| "grad_norm": 2.1937224864959717, |
| "learning_rate": 8.447282284574144e-08, |
| "loss": 0.64270401, |
| "memory(GiB)": 67.62, |
| "step": 7150, |
| "train_speed(iter/s)": 0.137623 |
| }, |
| { |
| "acc": 0.83124857, |
| "epoch": 4.920907840440165, |
| "grad_norm": 2.274343967437744, |
| "learning_rate": 7.839743335798222e-08, |
| "loss": 0.58021183, |
| "memory(GiB)": 67.62, |
| "step": 7155, |
| "train_speed(iter/s)": 0.137544 |
| }, |
| { |
| "acc": 0.83879738, |
| "epoch": 4.924346629986244, |
| "grad_norm": 2.0642943382263184, |
| "learning_rate": 7.258034031750108e-08, |
| "loss": 0.55038834, |
| "memory(GiB)": 67.62, |
| "step": 7160, |
| "train_speed(iter/s)": 0.137462 |
| }, |
| { |
| "acc": 0.82772274, |
| "epoch": 4.9277854195323245, |
| "grad_norm": 2.236903429031372, |
| "learning_rate": 6.702157381916804e-08, |
| "loss": 0.57812862, |
| "memory(GiB)": 67.62, |
| "step": 7165, |
| "train_speed(iter/s)": 0.137368 |
| }, |
| { |
| "acc": 0.81524467, |
| "epoch": 4.931224209078405, |
| "grad_norm": 2.3664135932922363, |
| "learning_rate": 6.172116262139473e-08, |
| "loss": 0.59173594, |
| "memory(GiB)": 67.62, |
| "step": 7170, |
| "train_speed(iter/s)": 0.137273 |
| }, |
| { |
| "acc": 0.84409065, |
| "epoch": 4.934662998624484, |
| "grad_norm": 1.8912343978881836, |
| "learning_rate": 5.66791341459791e-08, |
| "loss": 0.51706591, |
| "memory(GiB)": 67.62, |
| "step": 7175, |
| "train_speed(iter/s)": 0.13718 |
| }, |
| { |
| "acc": 0.82883434, |
| "epoch": 4.938101788170564, |
| "grad_norm": 2.3647637367248535, |
| "learning_rate": 5.189551447797223e-08, |
| "loss": 0.57346845, |
| "memory(GiB)": 67.62, |
| "step": 7180, |
| "train_speed(iter/s)": 0.137112 |
| }, |
| { |
| "acc": 0.82722406, |
| "epoch": 4.941540577716644, |
| "grad_norm": 2.390969753265381, |
| "learning_rate": 4.7370328365550553e-08, |
| "loss": 0.58734665, |
| "memory(GiB)": 67.62, |
| "step": 7185, |
| "train_speed(iter/s)": 0.137029 |
| }, |
| { |
| "acc": 0.83363981, |
| "epoch": 4.944979367262723, |
| "grad_norm": 2.3286654949188232, |
| "learning_rate": 4.3103599219855e-08, |
| "loss": 0.55134306, |
| "memory(GiB)": 67.62, |
| "step": 7190, |
| "train_speed(iter/s)": 0.13694 |
| }, |
| { |
| "acc": 0.8398654, |
| "epoch": 4.9484181568088035, |
| "grad_norm": 1.95890474319458, |
| "learning_rate": 3.909534911492433e-08, |
| "loss": 0.52122355, |
| "memory(GiB)": 67.62, |
| "step": 7195, |
| "train_speed(iter/s)": 0.136882 |
| }, |
| { |
| "acc": 0.82601204, |
| "epoch": 4.951856946354883, |
| "grad_norm": 2.7101948261260986, |
| "learning_rate": 3.534559878752308e-08, |
| "loss": 0.58264699, |
| "memory(GiB)": 67.62, |
| "step": 7200, |
| "train_speed(iter/s)": 0.13679 |
| }, |
| { |
| "epoch": 4.951856946354883, |
| "eval_acc": 0.7817575471273677, |
| "eval_loss": 0.7930530309677124, |
| "eval_runtime": 1110.3461, |
| "eval_samples_per_second": 3.857, |
| "eval_steps_per_second": 0.069, |
| "step": 7200 |
| }, |
| { |
| "acc": 0.83350286, |
| "epoch": 4.955295735900963, |
| "grad_norm": 2.281674861907959, |
| "learning_rate": 3.185436763708053e-08, |
| "loss": 0.55087848, |
| "memory(GiB)": 67.62, |
| "step": 7205, |
| "train_speed(iter/s)": 0.13388 |
| }, |
| { |
| "acc": 0.83256226, |
| "epoch": 4.958734525447043, |
| "grad_norm": 2.406829357147217, |
| "learning_rate": 2.862167372556297e-08, |
| "loss": 0.55789819, |
| "memory(GiB)": 67.62, |
| "step": 7210, |
| "train_speed(iter/s)": 0.133821 |
| }, |
| { |
| "acc": 0.81454487, |
| "epoch": 4.962173314993122, |
| "grad_norm": 2.2203316688537598, |
| "learning_rate": 2.564753377737945e-08, |
| "loss": 0.60484362, |
| "memory(GiB)": 67.62, |
| "step": 7215, |
| "train_speed(iter/s)": 0.133748 |
| }, |
| { |
| "acc": 0.82130527, |
| "epoch": 4.965612104539202, |
| "grad_norm": 2.130246162414551, |
| "learning_rate": 2.2931963179320628e-08, |
| "loss": 0.59843221, |
| "memory(GiB)": 67.62, |
| "step": 7220, |
| "train_speed(iter/s)": 0.133637 |
| }, |
| { |
| "acc": 0.83102131, |
| "epoch": 4.9690508940852816, |
| "grad_norm": 2.521017551422119, |
| "learning_rate": 2.04749759804478e-08, |
| "loss": 0.55911312, |
| "memory(GiB)": 67.62, |
| "step": 7225, |
| "train_speed(iter/s)": 0.133538 |
| }, |
| { |
| "acc": 0.81951447, |
| "epoch": 4.972489683631362, |
| "grad_norm": 2.495345115661621, |
| "learning_rate": 1.8276584892048502e-08, |
| "loss": 0.59946508, |
| "memory(GiB)": 67.62, |
| "step": 7230, |
| "train_speed(iter/s)": 0.133454 |
| }, |
| { |
| "acc": 0.83500395, |
| "epoch": 4.975928473177442, |
| "grad_norm": 2.169851541519165, |
| "learning_rate": 1.6336801287547673e-08, |
| "loss": 0.55714474, |
| "memory(GiB)": 67.62, |
| "step": 7235, |
| "train_speed(iter/s)": 0.133371 |
| }, |
| { |
| "acc": 0.83105001, |
| "epoch": 4.979367262723521, |
| "grad_norm": 1.9003541469573975, |
| "learning_rate": 1.4655635202457724e-08, |
| "loss": 0.56020293, |
| "memory(GiB)": 67.62, |
| "step": 7240, |
| "train_speed(iter/s)": 0.133301 |
| }, |
| { |
| "acc": 0.82036457, |
| "epoch": 4.982806052269601, |
| "grad_norm": 2.2826859951019287, |
| "learning_rate": 1.3233095334339681e-08, |
| "loss": 0.5854476, |
| "memory(GiB)": 67.62, |
| "step": 7245, |
| "train_speed(iter/s)": 0.133207 |
| }, |
| { |
| "acc": 0.82185326, |
| "epoch": 4.986244841815681, |
| "grad_norm": 2.5508041381835938, |
| "learning_rate": 1.2069189042725465e-08, |
| "loss": 0.58682165, |
| "memory(GiB)": 67.62, |
| "step": 7250, |
| "train_speed(iter/s)": 0.133133 |
| }, |
| { |
| "acc": 0.83299255, |
| "epoch": 4.9896836313617605, |
| "grad_norm": 2.2958316802978516, |
| "learning_rate": 1.1163922349123454e-08, |
| "loss": 0.54637289, |
| "memory(GiB)": 67.62, |
| "step": 7255, |
| "train_speed(iter/s)": 0.133052 |
| }, |
| { |
| "acc": 0.81462727, |
| "epoch": 4.993122420907841, |
| "grad_norm": 2.2949371337890625, |
| "learning_rate": 1.051729993694077e-08, |
| "loss": 0.60125666, |
| "memory(GiB)": 67.62, |
| "step": 7260, |
| "train_speed(iter/s)": 0.132965 |
| }, |
| { |
| "acc": 0.83669167, |
| "epoch": 4.99656121045392, |
| "grad_norm": 2.335374593734741, |
| "learning_rate": 1.0129325151499931e-08, |
| "loss": 0.51913919, |
| "memory(GiB)": 67.62, |
| "step": 7265, |
| "train_speed(iter/s)": 0.132889 |
| }, |
| { |
| "acc": 0.83688688, |
| "epoch": 5.0, |
| "grad_norm": 2.2776167392730713, |
| "learning_rate": 1e-08, |
| "loss": 0.54480848, |
| "memory(GiB)": 67.62, |
| "step": 7270, |
| "train_speed(iter/s)": 0.132804 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_acc": 0.781712556800288, |
| "eval_loss": 0.793134868144989, |
| "eval_runtime": 1106.3573, |
| "eval_samples_per_second": 3.871, |
| "eval_steps_per_second": 0.07, |
| "step": 7270 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 7270, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.939618530083786e+19, |
| "train_batch_size": 14, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|