SoccerChat-qwen2-vl-7b / trainer_state.json
SushantGautam's picture
Upload folder using huggingface_hub
03d70f4 verified
{
"best_metric": 0.78049123,
"best_model_checkpoint": "/global/D1/homes/sushant/SoccerNetExperiments/Soccer-Video-ChatGPT/November_xvars/swift/output/qwen2-vl-7b-instruct/v7-20241118-100959/checkpoint-5800",
"epoch": 5.0,
"eval_steps": 100,
"global_step": 7270,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.51318568,
"epoch": 0.000687757909215956,
"grad_norm": 0.8106947541236877,
"learning_rate": 0.0,
"loss": 2.20640945,
"memory(GiB)": 68.96,
"step": 1,
"train_speed(iter/s)": 0.018567
},
{
"acc": 0.522241,
"epoch": 0.0034387895460797797,
"grad_norm": 0.7490503191947937,
"learning_rate": 2.7291774109314122e-05,
"loss": 2.21162605,
"memory(GiB)": 68.96,
"step": 5,
"train_speed(iter/s)": 0.040393
},
{
"acc": 0.54119682,
"epoch": 0.0068775790921595595,
"grad_norm": 0.7276351451873779,
"learning_rate": 3.904570144643008e-05,
"loss": 2.13246613,
"memory(GiB)": 73.29,
"step": 10,
"train_speed(iter/s)": 0.052109
},
{
"acc": 0.5478312,
"epoch": 0.01031636863823934,
"grad_norm": 0.763149082660675,
"learning_rate": 4.5921308174844174e-05,
"loss": 2.00201836,
"memory(GiB)": 73.29,
"step": 15,
"train_speed(iter/s)": 0.056038
},
{
"acc": 0.56882777,
"epoch": 0.013755158184319119,
"grad_norm": 0.7898057103157043,
"learning_rate": 5.0799628783546016e-05,
"loss": 1.84855347,
"memory(GiB)": 73.29,
"step": 20,
"train_speed(iter/s)": 0.058389
},
{
"acc": 0.59600019,
"epoch": 0.0171939477303989,
"grad_norm": 0.9877901673316956,
"learning_rate": 5.4583548218628245e-05,
"loss": 1.73883362,
"memory(GiB)": 73.29,
"step": 25,
"train_speed(iter/s)": 0.060811
},
{
"acc": 0.60646133,
"epoch": 0.02063273727647868,
"grad_norm": 0.9179441332817078,
"learning_rate": 5.7675235511960126e-05,
"loss": 1.64034233,
"memory(GiB)": 73.29,
"step": 30,
"train_speed(iter/s)": 0.06211
},
{
"acc": 0.61271744,
"epoch": 0.024071526822558458,
"grad_norm": 0.9307955503463745,
"learning_rate": 6.028921987267401e-05,
"loss": 1.5906249,
"memory(GiB)": 73.29,
"step": 35,
"train_speed(iter/s)": 0.06288
},
{
"acc": 0.63797045,
"epoch": 0.027510316368638238,
"grad_norm": 1.0717326402664185,
"learning_rate": 6.255355612066197e-05,
"loss": 1.44913902,
"memory(GiB)": 73.29,
"step": 40,
"train_speed(iter/s)": 0.064
},
{
"acc": 0.64407902,
"epoch": 0.030949105914718018,
"grad_norm": 1.0732834339141846,
"learning_rate": 6.455084224037423e-05,
"loss": 1.44504213,
"memory(GiB)": 73.29,
"step": 45,
"train_speed(iter/s)": 0.064348
},
{
"acc": 0.64924326,
"epoch": 0.0343878954607978,
"grad_norm": 1.1162458658218384,
"learning_rate": 6.633747555574418e-05,
"loss": 1.36141453,
"memory(GiB)": 73.29,
"step": 50,
"train_speed(iter/s)": 0.064587
},
{
"acc": 0.6600091,
"epoch": 0.03782668500687758,
"grad_norm": 1.0633102655410767,
"learning_rate": 6.795368198249832e-05,
"loss": 1.3460659,
"memory(GiB)": 73.29,
"step": 55,
"train_speed(iter/s)": 0.064754
},
{
"acc": 0.65980716,
"epoch": 0.04126547455295736,
"grad_norm": 1.0164440870285034,
"learning_rate": 6.942916284907606e-05,
"loss": 1.33123722,
"memory(GiB)": 73.29,
"step": 60,
"train_speed(iter/s)": 0.065266
},
{
"acc": 0.68235178,
"epoch": 0.04470426409903714,
"grad_norm": 1.185117483139038,
"learning_rate": 7.078647367172232e-05,
"loss": 1.24153843,
"memory(GiB)": 73.29,
"step": 65,
"train_speed(iter/s)": 0.065105
},
{
"acc": 0.67718811,
"epoch": 0.048143053645116916,
"grad_norm": 1.1237530708312988,
"learning_rate": 7.204314720978996e-05,
"loss": 1.28077126,
"memory(GiB)": 73.29,
"step": 70,
"train_speed(iter/s)": 0.065778
},
{
"acc": 0.68681493,
"epoch": 0.0515818431911967,
"grad_norm": 1.2692396640777588,
"learning_rate": 7.321308228415829e-05,
"loss": 1.25220881,
"memory(GiB)": 73.29,
"step": 75,
"train_speed(iter/s)": 0.066254
},
{
"acc": 0.67568145,
"epoch": 0.055020632737276476,
"grad_norm": 1.118291974067688,
"learning_rate": 7.43074834577779e-05,
"loss": 1.2524622,
"memory(GiB)": 73.29,
"step": 80,
"train_speed(iter/s)": 0.066119
},
{
"acc": 0.67670813,
"epoch": 0.05845942228335626,
"grad_norm": 1.1989713907241821,
"learning_rate": 7.533551533853211e-05,
"loss": 1.24576015,
"memory(GiB)": 73.29,
"step": 85,
"train_speed(iter/s)": 0.066293
},
{
"acc": 0.69235625,
"epoch": 0.061898211829436035,
"grad_norm": 1.2158828973770142,
"learning_rate": 7.630476957749017e-05,
"loss": 1.20176125,
"memory(GiB)": 73.29,
"step": 90,
"train_speed(iter/s)": 0.066483
},
{
"acc": 0.68914762,
"epoch": 0.06533700137551582,
"grad_norm": 1.1737231016159058,
"learning_rate": 7.722160543566566e-05,
"loss": 1.21274147,
"memory(GiB)": 73.29,
"step": 95,
"train_speed(iter/s)": 0.066668
},
{
"acc": 0.69834042,
"epoch": 0.0687757909215956,
"grad_norm": 1.351590871810913,
"learning_rate": 7.809140289286016e-05,
"loss": 1.1592926,
"memory(GiB)": 73.29,
"step": 100,
"train_speed(iter/s)": 0.067074
},
{
"epoch": 0.0687757909215956,
"eval_acc": 0.6962927970486346,
"eval_loss": 1.1645218133926392,
"eval_runtime": 1212.4549,
"eval_samples_per_second": 3.533,
"eval_steps_per_second": 0.064,
"step": 100
},
{
"acc": 0.68415451,
"epoch": 0.07221458046767538,
"grad_norm": 1.3831959962844849,
"learning_rate": 7.891875393820406e-05,
"loss": 1.2015852,
"memory(GiB)": 73.29,
"step": 105,
"train_speed(iter/s)": 0.037875
},
{
"acc": 0.68617525,
"epoch": 0.07565337001375516,
"grad_norm": 1.1734046936035156,
"learning_rate": 7.970760931961428e-05,
"loss": 1.20146303,
"memory(GiB)": 67.64,
"step": 110,
"train_speed(iter/s)": 0.038604
},
{
"acc": 0.71534705,
"epoch": 0.07909215955983494,
"grad_norm": 1.1629948616027832,
"learning_rate": 8.046139264575035e-05,
"loss": 1.0837039,
"memory(GiB)": 67.64,
"step": 115,
"train_speed(iter/s)": 0.039388
},
{
"acc": 0.69408731,
"epoch": 0.08253094910591471,
"grad_norm": 1.3171385526657104,
"learning_rate": 8.118309018619202e-05,
"loss": 1.18307505,
"memory(GiB)": 67.64,
"step": 120,
"train_speed(iter/s)": 0.040077
},
{
"acc": 0.70501647,
"epoch": 0.0859697386519945,
"grad_norm": 1.3044822216033936,
"learning_rate": 8.187532232794237e-05,
"loss": 1.13398886,
"memory(GiB)": 67.64,
"step": 125,
"train_speed(iter/s)": 0.040793
},
{
"acc": 0.70435572,
"epoch": 0.08940852819807428,
"grad_norm": 1.329248070716858,
"learning_rate": 8.254040100883828e-05,
"loss": 1.12086363,
"memory(GiB)": 67.64,
"step": 130,
"train_speed(iter/s)": 0.041495
},
{
"acc": 0.69323554,
"epoch": 0.09284731774415406,
"grad_norm": 1.2392340898513794,
"learning_rate": 8.318037630590428e-05,
"loss": 1.17682867,
"memory(GiB)": 67.64,
"step": 135,
"train_speed(iter/s)": 0.042188
},
{
"acc": 0.68262854,
"epoch": 0.09628610729023383,
"grad_norm": 1.50913667678833,
"learning_rate": 8.379707454690589e-05,
"loss": 1.20728226,
"memory(GiB)": 67.64,
"step": 140,
"train_speed(iter/s)": 0.042814
},
{
"acc": 0.71855187,
"epoch": 0.09972489683631362,
"grad_norm": 1.3727279901504517,
"learning_rate": 8.439212973113382e-05,
"loss": 1.06742191,
"memory(GiB)": 67.64,
"step": 145,
"train_speed(iter/s)": 0.043424
},
{
"acc": 0.70871205,
"epoch": 0.1031636863823934,
"grad_norm": 1.3805909156799316,
"learning_rate": 8.496700962127424e-05,
"loss": 1.11401825,
"memory(GiB)": 67.64,
"step": 150,
"train_speed(iter/s)": 0.044042
},
{
"acc": 0.73342023,
"epoch": 0.10660247592847318,
"grad_norm": 1.2183469533920288,
"learning_rate": 8.552303755540939e-05,
"loss": 1.00700331,
"memory(GiB)": 67.64,
"step": 155,
"train_speed(iter/s)": 0.044601
},
{
"acc": 0.70797634,
"epoch": 0.11004126547455295,
"grad_norm": 1.4554626941680908,
"learning_rate": 8.606141079489386e-05,
"loss": 1.09950924,
"memory(GiB)": 67.64,
"step": 160,
"train_speed(iter/s)": 0.045018
},
{
"acc": 0.69420943,
"epoch": 0.11348005502063274,
"grad_norm": 1.3245141506195068,
"learning_rate": 8.658321604802837e-05,
"loss": 1.15192003,
"memory(GiB)": 67.64,
"step": 165,
"train_speed(iter/s)": 0.045554
},
{
"acc": 0.7062036,
"epoch": 0.11691884456671252,
"grad_norm": 1.271952748298645,
"learning_rate": 8.708944267564807e-05,
"loss": 1.08263731,
"memory(GiB)": 67.64,
"step": 170,
"train_speed(iter/s)": 0.045943
},
{
"acc": 0.69984941,
"epoch": 0.1203576341127923,
"grad_norm": 1.4456363916397095,
"learning_rate": 8.758099398198813e-05,
"loss": 1.12532272,
"memory(GiB)": 67.64,
"step": 175,
"train_speed(iter/s)": 0.046356
},
{
"acc": 0.71651649,
"epoch": 0.12379642365887207,
"grad_norm": 1.44161057472229,
"learning_rate": 8.805869691460613e-05,
"loss": 1.07470217,
"memory(GiB)": 67.64,
"step": 180,
"train_speed(iter/s)": 0.04681
},
{
"acc": 0.71227612,
"epoch": 0.12723521320495185,
"grad_norm": 1.3441652059555054,
"learning_rate": 8.852331043501091e-05,
"loss": 1.09072762,
"memory(GiB)": 67.64,
"step": 185,
"train_speed(iter/s)": 0.047226
},
{
"acc": 0.72131248,
"epoch": 0.13067400275103164,
"grad_norm": 1.6152911186218262,
"learning_rate": 8.897553277278162e-05,
"loss": 1.05218563,
"memory(GiB)": 67.64,
"step": 190,
"train_speed(iter/s)": 0.047678
},
{
"acc": 0.71243434,
"epoch": 0.13411279229711143,
"grad_norm": 1.45099937915802,
"learning_rate": 8.94160077372524e-05,
"loss": 1.08823862,
"memory(GiB)": 67.64,
"step": 195,
"train_speed(iter/s)": 0.048064
},
{
"acc": 0.71744361,
"epoch": 0.1375515818431912,
"grad_norm": 1.4195399284362793,
"learning_rate": 8.984533022997609e-05,
"loss": 1.05846539,
"memory(GiB)": 67.64,
"step": 200,
"train_speed(iter/s)": 0.048352
},
{
"epoch": 0.1375515818431912,
"eval_acc": 0.7119831736176722,
"eval_loss": 1.0829237699508667,
"eval_runtime": 1085.6657,
"eval_samples_per_second": 3.945,
"eval_steps_per_second": 0.071,
"step": 200
},
{
"acc": 0.69938354,
"epoch": 0.14099037138927098,
"grad_norm": 1.3801318407058716,
"learning_rate": 9.026405107641496e-05,
"loss": 1.1244791,
"memory(GiB)": 67.64,
"step": 205,
"train_speed(iter/s)": 0.038724
},
{
"acc": 0.70936947,
"epoch": 0.14442916093535077,
"grad_norm": 1.3959752321243286,
"learning_rate": 9.067268127532e-05,
"loss": 1.09259109,
"memory(GiB)": 67.64,
"step": 210,
"train_speed(iter/s)": 0.039167
},
{
"acc": 0.70823245,
"epoch": 0.14786795048143053,
"grad_norm": 1.4662190675735474,
"learning_rate": 9.107169574803587e-05,
"loss": 1.11423931,
"memory(GiB)": 67.64,
"step": 215,
"train_speed(iter/s)": 0.039589
},
{
"acc": 0.71795692,
"epoch": 0.15130674002751032,
"grad_norm": 1.2458115816116333,
"learning_rate": 9.146153665673023e-05,
"loss": 1.08119087,
"memory(GiB)": 67.64,
"step": 220,
"train_speed(iter/s)": 0.040035
},
{
"acc": 0.70050411,
"epoch": 0.15474552957359008,
"grad_norm": 1.3731013536453247,
"learning_rate": 9.184261634968835e-05,
"loss": 1.12374535,
"memory(GiB)": 67.64,
"step": 225,
"train_speed(iter/s)": 0.040423
},
{
"acc": 0.7144835,
"epoch": 0.15818431911966988,
"grad_norm": 1.4678212404251099,
"learning_rate": 9.221531998286629e-05,
"loss": 1.05655756,
"memory(GiB)": 67.64,
"step": 230,
"train_speed(iter/s)": 0.04076
},
{
"acc": 0.70189781,
"epoch": 0.16162310866574967,
"grad_norm": 1.3910584449768066,
"learning_rate": 9.258000785948739e-05,
"loss": 1.12144871,
"memory(GiB)": 67.64,
"step": 235,
"train_speed(iter/s)": 0.04114
},
{
"acc": 0.73604274,
"epoch": 0.16506189821182943,
"grad_norm": 1.3721731901168823,
"learning_rate": 9.293701752330797e-05,
"loss": 0.98678083,
"memory(GiB)": 67.64,
"step": 240,
"train_speed(iter/s)": 0.041584
},
{
"acc": 0.71869593,
"epoch": 0.16850068775790922,
"grad_norm": 1.6655057668685913,
"learning_rate": 9.32866656360339e-05,
"loss": 1.07134695,
"memory(GiB)": 67.64,
"step": 245,
"train_speed(iter/s)": 0.042014
},
{
"acc": 0.71307015,
"epoch": 0.171939477303989,
"grad_norm": 1.3052812814712524,
"learning_rate": 9.36292496650583e-05,
"loss": 1.06437588,
"memory(GiB)": 67.64,
"step": 250,
"train_speed(iter/s)": 0.042373
},
{
"acc": 0.71391711,
"epoch": 0.17537826685006877,
"grad_norm": 1.5577940940856934,
"learning_rate": 9.396504940406217e-05,
"loss": 1.07451763,
"memory(GiB)": 67.64,
"step": 255,
"train_speed(iter/s)": 0.042705
},
{
"acc": 0.73166742,
"epoch": 0.17881705639614856,
"grad_norm": 1.33721923828125,
"learning_rate": 9.429432834595424e-05,
"loss": 0.99717045,
"memory(GiB)": 67.64,
"step": 260,
"train_speed(iter/s)": 0.043055
},
{
"acc": 0.71127567,
"epoch": 0.18225584594222832,
"grad_norm": 1.549870252609253,
"learning_rate": 9.461733492503013e-05,
"loss": 1.10144587,
"memory(GiB)": 67.64,
"step": 265,
"train_speed(iter/s)": 0.043386
},
{
"acc": 0.72763004,
"epoch": 0.1856946354883081,
"grad_norm": 1.4047890901565552,
"learning_rate": 9.493430364302024e-05,
"loss": 1.01531572,
"memory(GiB)": 67.64,
"step": 270,
"train_speed(iter/s)": 0.043699
},
{
"acc": 0.72718954,
"epoch": 0.1891334250343879,
"grad_norm": 1.3470264673233032,
"learning_rate": 9.524545609181246e-05,
"loss": 1.01689529,
"memory(GiB)": 67.64,
"step": 275,
"train_speed(iter/s)": 0.043985
},
{
"acc": 0.72714009,
"epoch": 0.19257221458046767,
"grad_norm": 1.3996589183807373,
"learning_rate": 9.555100188402185e-05,
"loss": 1.01372051,
"memory(GiB)": 67.64,
"step": 280,
"train_speed(iter/s)": 0.044244
},
{
"acc": 0.71828256,
"epoch": 0.19601100412654746,
"grad_norm": 1.5369681119918823,
"learning_rate": 9.585113950119573e-05,
"loss": 1.06217566,
"memory(GiB)": 67.64,
"step": 285,
"train_speed(iter/s)": 0.044532
},
{
"acc": 0.72461739,
"epoch": 0.19944979367262725,
"grad_norm": 1.3398535251617432,
"learning_rate": 9.614605706824978e-05,
"loss": 1.03975096,
"memory(GiB)": 67.64,
"step": 290,
"train_speed(iter/s)": 0.044835
},
{
"acc": 0.71793423,
"epoch": 0.202888583218707,
"grad_norm": 1.4092602729797363,
"learning_rate": 9.64359330617034e-05,
"loss": 1.05028229,
"memory(GiB)": 67.64,
"step": 295,
"train_speed(iter/s)": 0.045175
},
{
"acc": 0.72302713,
"epoch": 0.2063273727647868,
"grad_norm": 1.2952080965042114,
"learning_rate": 9.67209369583902e-05,
"loss": 1.01765738,
"memory(GiB)": 67.64,
"step": 300,
"train_speed(iter/s)": 0.045401
},
{
"epoch": 0.2063273727647868,
"eval_acc": 0.7200083232105098,
"eval_loss": 1.0458483695983887,
"eval_runtime": 1091.4981,
"eval_samples_per_second": 3.924,
"eval_steps_per_second": 0.071,
"step": 300
},
{
"acc": 0.709624,
"epoch": 0.2097661623108666,
"grad_norm": 1.5634573698043823,
"learning_rate": 9.700122983054879e-05,
"loss": 1.07294426,
"memory(GiB)": 67.64,
"step": 305,
"train_speed(iter/s)": 0.039263
},
{
"acc": 0.70980182,
"epoch": 0.21320495185694635,
"grad_norm": 1.372841477394104,
"learning_rate": 9.727696489252533e-05,
"loss": 1.10122662,
"memory(GiB)": 67.64,
"step": 310,
"train_speed(iter/s)": 0.039598
},
{
"acc": 0.73576632,
"epoch": 0.21664374140302614,
"grad_norm": 1.539969801902771,
"learning_rate": 9.754828800373411e-05,
"loss": 0.98867779,
"memory(GiB)": 67.64,
"step": 315,
"train_speed(iter/s)": 0.039872
},
{
"acc": 0.7245533,
"epoch": 0.2200825309491059,
"grad_norm": 1.2448300123214722,
"learning_rate": 9.781533813200982e-05,
"loss": 1.01700201,
"memory(GiB)": 67.64,
"step": 320,
"train_speed(iter/s)": 0.040166
},
{
"acc": 0.72238054,
"epoch": 0.2235213204951857,
"grad_norm": 1.2697371244430542,
"learning_rate": 9.807824778103646e-05,
"loss": 1.0503273,
"memory(GiB)": 67.64,
"step": 325,
"train_speed(iter/s)": 0.040431
},
{
"acc": 0.7078352,
"epoch": 0.22696011004126548,
"grad_norm": 1.3228161334991455,
"learning_rate": 9.833714338514432e-05,
"loss": 1.10422878,
"memory(GiB)": 67.64,
"step": 330,
"train_speed(iter/s)": 0.040712
},
{
"acc": 0.71083031,
"epoch": 0.23039889958734525,
"grad_norm": 1.2554104328155518,
"learning_rate": 9.859214567441929e-05,
"loss": 1.06728878,
"memory(GiB)": 67.64,
"step": 335,
"train_speed(iter/s)": 0.040973
},
{
"acc": 0.73246231,
"epoch": 0.23383768913342504,
"grad_norm": 1.601881742477417,
"learning_rate": 9.884337001276401e-05,
"loss": 0.99594593,
"memory(GiB)": 67.64,
"step": 340,
"train_speed(iter/s)": 0.041255
},
{
"acc": 0.72822175,
"epoch": 0.23727647867950483,
"grad_norm": 1.374062418937683,
"learning_rate": 9.90909267112804e-05,
"loss": 0.9949461,
"memory(GiB)": 67.64,
"step": 345,
"train_speed(iter/s)": 0.041529
},
{
"acc": 0.72813654,
"epoch": 0.2407152682255846,
"grad_norm": 1.4039307832717896,
"learning_rate": 9.933492131910406e-05,
"loss": 1.00009727,
"memory(GiB)": 67.64,
"step": 350,
"train_speed(iter/s)": 0.041803
},
{
"acc": 0.71051707,
"epoch": 0.24415405777166438,
"grad_norm": 1.4029077291488647,
"learning_rate": 9.957545489361027e-05,
"loss": 1.05340385,
"memory(GiB)": 67.64,
"step": 355,
"train_speed(iter/s)": 0.042061
},
{
"acc": 0.72410893,
"epoch": 0.24759284731774414,
"grad_norm": 1.379601001739502,
"learning_rate": 9.981262425172208e-05,
"loss": 1.03275814,
"memory(GiB)": 67.64,
"step": 360,
"train_speed(iter/s)": 0.042262
},
{
"acc": 0.72450876,
"epoch": 0.25103163686382396,
"grad_norm": 1.2809425592422485,
"learning_rate": 9.999999482699181e-05,
"loss": 1.02212152,
"memory(GiB)": 67.64,
"step": 365,
"train_speed(iter/s)": 0.042465
},
{
"acc": 0.72661881,
"epoch": 0.2544704264099037,
"grad_norm": 1.5250205993652344,
"learning_rate": 9.999981377181717e-05,
"loss": 1.03086433,
"memory(GiB)": 67.64,
"step": 370,
"train_speed(iter/s)": 0.042736
},
{
"acc": 0.72812705,
"epoch": 0.2579092159559835,
"grad_norm": 1.2832344770431519,
"learning_rate": 9.999937406730297e-05,
"loss": 1.00952168,
"memory(GiB)": 67.64,
"step": 375,
"train_speed(iter/s)": 0.042979
},
{
"acc": 0.69843874,
"epoch": 0.2613480055020633,
"grad_norm": 1.3689916133880615,
"learning_rate": 9.999867571572407e-05,
"loss": 1.1430685,
"memory(GiB)": 67.64,
"step": 380,
"train_speed(iter/s)": 0.043186
},
{
"acc": 0.71121368,
"epoch": 0.26478679504814306,
"grad_norm": 1.5115655660629272,
"learning_rate": 9.999771872069336e-05,
"loss": 1.06673965,
"memory(GiB)": 67.64,
"step": 385,
"train_speed(iter/s)": 0.043399
},
{
"acc": 0.71244879,
"epoch": 0.26822558459422285,
"grad_norm": 1.2644624710083008,
"learning_rate": 9.999650308716193e-05,
"loss": 1.0759717,
"memory(GiB)": 67.64,
"step": 390,
"train_speed(iter/s)": 0.043596
},
{
"acc": 0.71011033,
"epoch": 0.2716643741403026,
"grad_norm": 1.425584077835083,
"learning_rate": 9.999502882141882e-05,
"loss": 1.08612566,
"memory(GiB)": 67.64,
"step": 395,
"train_speed(iter/s)": 0.043813
},
{
"acc": 0.71973572,
"epoch": 0.2751031636863824,
"grad_norm": 1.281044840812683,
"learning_rate": 9.999329593109124e-05,
"loss": 1.04273968,
"memory(GiB)": 67.64,
"step": 400,
"train_speed(iter/s)": 0.04406
},
{
"epoch": 0.2751031636863824,
"eval_acc": 0.723596301795114,
"eval_loss": 1.0237661600112915,
"eval_runtime": 1145.744,
"eval_samples_per_second": 3.738,
"eval_steps_per_second": 0.067,
"step": 400
},
{
"acc": 0.72366686,
"epoch": 0.27854195323246217,
"grad_norm": 1.4894949197769165,
"learning_rate": 9.999130442514431e-05,
"loss": 1.02950411,
"memory(GiB)": 67.64,
"step": 405,
"train_speed(iter/s)": 0.03932
},
{
"acc": 0.74041648,
"epoch": 0.28198074277854196,
"grad_norm": 1.2302844524383545,
"learning_rate": 9.998905431388113e-05,
"loss": 0.95937977,
"memory(GiB)": 67.64,
"step": 410,
"train_speed(iter/s)": 0.039555
},
{
"acc": 0.72292333,
"epoch": 0.28541953232462175,
"grad_norm": 1.1821825504302979,
"learning_rate": 9.998654560894271e-05,
"loss": 1.02365704,
"memory(GiB)": 67.64,
"step": 415,
"train_speed(iter/s)": 0.039766
},
{
"acc": 0.70973835,
"epoch": 0.28885832187070154,
"grad_norm": 1.2947014570236206,
"learning_rate": 9.998377832330788e-05,
"loss": 1.07417269,
"memory(GiB)": 67.64,
"step": 420,
"train_speed(iter/s)": 0.039992
},
{
"acc": 0.73527951,
"epoch": 0.2922971114167813,
"grad_norm": 1.2616949081420898,
"learning_rate": 9.99807524712933e-05,
"loss": 0.98149738,
"memory(GiB)": 67.64,
"step": 425,
"train_speed(iter/s)": 0.040206
},
{
"acc": 0.71251645,
"epoch": 0.29573590096286106,
"grad_norm": 1.2349984645843506,
"learning_rate": 9.997746806855323e-05,
"loss": 1.07718506,
"memory(GiB)": 67.64,
"step": 430,
"train_speed(iter/s)": 0.040404
},
{
"acc": 0.72872591,
"epoch": 0.29917469050894085,
"grad_norm": 1.128265619277954,
"learning_rate": 9.997392513207963e-05,
"loss": 1.00703831,
"memory(GiB)": 67.64,
"step": 435,
"train_speed(iter/s)": 0.040581
},
{
"acc": 0.72117209,
"epoch": 0.30261348005502064,
"grad_norm": 1.249985933303833,
"learning_rate": 9.997012368020198e-05,
"loss": 1.01667709,
"memory(GiB)": 67.64,
"step": 440,
"train_speed(iter/s)": 0.040799
},
{
"acc": 0.72518797,
"epoch": 0.30605226960110044,
"grad_norm": 1.3999882936477661,
"learning_rate": 9.996606373258716e-05,
"loss": 1.04834728,
"memory(GiB)": 67.64,
"step": 445,
"train_speed(iter/s)": 0.041
},
{
"acc": 0.72560539,
"epoch": 0.30949105914718017,
"grad_norm": 1.3446978330612183,
"learning_rate": 9.99617453102394e-05,
"loss": 1.01653395,
"memory(GiB)": 67.64,
"step": 450,
"train_speed(iter/s)": 0.041207
},
{
"acc": 0.72350621,
"epoch": 0.31292984869325996,
"grad_norm": 1.2894266843795776,
"learning_rate": 9.99571684355002e-05,
"loss": 0.99579372,
"memory(GiB)": 67.64,
"step": 455,
"train_speed(iter/s)": 0.041368
},
{
"acc": 0.70948811,
"epoch": 0.31636863823933975,
"grad_norm": 1.4214539527893066,
"learning_rate": 9.995233313204806e-05,
"loss": 1.09332161,
"memory(GiB)": 67.64,
"step": 460,
"train_speed(iter/s)": 0.041539
},
{
"acc": 0.74661293,
"epoch": 0.31980742778541954,
"grad_norm": 1.2697914838790894,
"learning_rate": 9.994723942489859e-05,
"loss": 0.93414135,
"memory(GiB)": 67.64,
"step": 465,
"train_speed(iter/s)": 0.041724
},
{
"acc": 0.72276139,
"epoch": 0.32324621733149933,
"grad_norm": 1.2612886428833008,
"learning_rate": 9.99418873404042e-05,
"loss": 1.04514399,
"memory(GiB)": 67.64,
"step": 470,
"train_speed(iter/s)": 0.041901
},
{
"acc": 0.72859631,
"epoch": 0.32668500687757906,
"grad_norm": 1.2637856006622314,
"learning_rate": 9.993627690625399e-05,
"loss": 0.99566994,
"memory(GiB)": 67.64,
"step": 475,
"train_speed(iter/s)": 0.042059
},
{
"acc": 0.72311392,
"epoch": 0.33012379642365886,
"grad_norm": 1.2103707790374756,
"learning_rate": 9.993040815147369e-05,
"loss": 1.02551346,
"memory(GiB)": 67.64,
"step": 480,
"train_speed(iter/s)": 0.042237
},
{
"acc": 0.7304266,
"epoch": 0.33356258596973865,
"grad_norm": 1.4478263854980469,
"learning_rate": 9.992428110642546e-05,
"loss": 1.00502892,
"memory(GiB)": 67.64,
"step": 485,
"train_speed(iter/s)": 0.042429
},
{
"acc": 0.72812204,
"epoch": 0.33700137551581844,
"grad_norm": 1.28928542137146,
"learning_rate": 9.991789580280768e-05,
"loss": 0.99270744,
"memory(GiB)": 67.64,
"step": 490,
"train_speed(iter/s)": 0.042611
},
{
"acc": 0.73110504,
"epoch": 0.3404401650618982,
"grad_norm": 1.277113914489746,
"learning_rate": 9.991125227365489e-05,
"loss": 0.9932848,
"memory(GiB)": 67.71,
"step": 495,
"train_speed(iter/s)": 0.042803
},
{
"acc": 0.73536983,
"epoch": 0.343878954607978,
"grad_norm": 1.4031190872192383,
"learning_rate": 9.990435055333755e-05,
"loss": 1.00407228,
"memory(GiB)": 67.71,
"step": 500,
"train_speed(iter/s)": 0.042997
},
{
"epoch": 0.343878954607978,
"eval_acc": 0.7273923606424618,
"eval_loss": 1.006140112876892,
"eval_runtime": 1123.2925,
"eval_samples_per_second": 3.813,
"eval_steps_per_second": 0.069,
"step": 500
},
{
"acc": 0.7310411,
"epoch": 0.34731774415405775,
"grad_norm": 1.1264581680297852,
"learning_rate": 9.989719067756184e-05,
"loss": 0.97913218,
"memory(GiB)": 67.71,
"step": 505,
"train_speed(iter/s)": 0.039389
},
{
"acc": 0.72247181,
"epoch": 0.35075653370013754,
"grad_norm": 1.2322190999984741,
"learning_rate": 9.988977268336956e-05,
"loss": 1.04118223,
"memory(GiB)": 67.71,
"step": 510,
"train_speed(iter/s)": 0.039571
},
{
"acc": 0.7294539,
"epoch": 0.35419532324621733,
"grad_norm": 1.1988883018493652,
"learning_rate": 9.988209660913789e-05,
"loss": 0.96120787,
"memory(GiB)": 67.71,
"step": 515,
"train_speed(iter/s)": 0.039729
},
{
"acc": 0.72807951,
"epoch": 0.3576341127922971,
"grad_norm": 1.4514073133468628,
"learning_rate": 9.987416249457917e-05,
"loss": 1.00832357,
"memory(GiB)": 67.71,
"step": 520,
"train_speed(iter/s)": 0.039869
},
{
"acc": 0.72818184,
"epoch": 0.3610729023383769,
"grad_norm": 1.2781667709350586,
"learning_rate": 9.986597038074072e-05,
"loss": 1.00557394,
"memory(GiB)": 67.71,
"step": 525,
"train_speed(iter/s)": 0.040019
},
{
"acc": 0.7372427,
"epoch": 0.36451169188445665,
"grad_norm": 1.196447491645813,
"learning_rate": 9.985752031000465e-05,
"loss": 0.97588711,
"memory(GiB)": 67.71,
"step": 530,
"train_speed(iter/s)": 0.040179
},
{
"acc": 0.73485746,
"epoch": 0.36795048143053644,
"grad_norm": 1.2713799476623535,
"learning_rate": 9.984881232608758e-05,
"loss": 0.99121141,
"memory(GiB)": 67.71,
"step": 535,
"train_speed(iter/s)": 0.040356
},
{
"acc": 0.7316514,
"epoch": 0.3713892709766162,
"grad_norm": 1.388735055923462,
"learning_rate": 9.983984647404047e-05,
"loss": 0.97529774,
"memory(GiB)": 67.71,
"step": 540,
"train_speed(iter/s)": 0.040533
},
{
"acc": 0.73824301,
"epoch": 0.374828060522696,
"grad_norm": 1.263832926750183,
"learning_rate": 9.983062280024837e-05,
"loss": 0.95761375,
"memory(GiB)": 67.71,
"step": 545,
"train_speed(iter/s)": 0.040707
},
{
"acc": 0.72791233,
"epoch": 0.3782668500687758,
"grad_norm": 1.3154568672180176,
"learning_rate": 9.982114135243019e-05,
"loss": 1.00505419,
"memory(GiB)": 67.71,
"step": 550,
"train_speed(iter/s)": 0.040862
},
{
"acc": 0.73077579,
"epoch": 0.3817056396148556,
"grad_norm": 1.2996647357940674,
"learning_rate": 9.981140217963838e-05,
"loss": 0.98154631,
"memory(GiB)": 67.71,
"step": 555,
"train_speed(iter/s)": 0.041008
},
{
"acc": 0.7352643,
"epoch": 0.38514442916093533,
"grad_norm": 1.3090369701385498,
"learning_rate": 9.980140533225882e-05,
"loss": 0.9830574,
"memory(GiB)": 67.71,
"step": 560,
"train_speed(iter/s)": 0.041146
},
{
"acc": 0.7195425,
"epoch": 0.3885832187070151,
"grad_norm": 1.655612587928772,
"learning_rate": 9.979115086201042e-05,
"loss": 1.05448446,
"memory(GiB)": 67.71,
"step": 565,
"train_speed(iter/s)": 0.041304
},
{
"acc": 0.73759327,
"epoch": 0.3920220082530949,
"grad_norm": 1.183268427848816,
"learning_rate": 9.978063882194492e-05,
"loss": 0.96683788,
"memory(GiB)": 67.71,
"step": 570,
"train_speed(iter/s)": 0.041468
},
{
"acc": 0.73216171,
"epoch": 0.3954607977991747,
"grad_norm": 1.2590916156768799,
"learning_rate": 9.976986926644662e-05,
"loss": 0.97658138,
"memory(GiB)": 67.71,
"step": 575,
"train_speed(iter/s)": 0.04163
},
{
"acc": 0.72127271,
"epoch": 0.3988995873452545,
"grad_norm": 1.1548501253128052,
"learning_rate": 9.975884225123204e-05,
"loss": 1.00985394,
"memory(GiB)": 67.71,
"step": 580,
"train_speed(iter/s)": 0.041797
},
{
"acc": 0.74563594,
"epoch": 0.4023383768913342,
"grad_norm": 1.0580244064331055,
"learning_rate": 9.974755783334972e-05,
"loss": 0.94991455,
"memory(GiB)": 67.71,
"step": 585,
"train_speed(iter/s)": 0.041937
},
{
"acc": 0.72397938,
"epoch": 0.405777166437414,
"grad_norm": 1.2799969911575317,
"learning_rate": 9.973601607117985e-05,
"loss": 1.04541121,
"memory(GiB)": 67.71,
"step": 590,
"train_speed(iter/s)": 0.042103
},
{
"acc": 0.75536423,
"epoch": 0.4092159559834938,
"grad_norm": 1.2122467756271362,
"learning_rate": 9.972421702443402e-05,
"loss": 0.91661882,
"memory(GiB)": 67.71,
"step": 595,
"train_speed(iter/s)": 0.042263
},
{
"acc": 0.72923999,
"epoch": 0.4126547455295736,
"grad_norm": 1.3098151683807373,
"learning_rate": 9.971216075415486e-05,
"loss": 0.99268637,
"memory(GiB)": 67.71,
"step": 600,
"train_speed(iter/s)": 0.042394
},
{
"epoch": 0.4126547455295736,
"eval_acc": 0.7299005713771539,
"eval_loss": 0.9898082613945007,
"eval_runtime": 1136.3836,
"eval_samples_per_second": 3.769,
"eval_steps_per_second": 0.068,
"step": 600
},
{
"acc": 0.73311081,
"epoch": 0.4160935350756534,
"grad_norm": 1.149190902709961,
"learning_rate": 9.969984732271578e-05,
"loss": 0.98028679,
"memory(GiB)": 67.71,
"step": 605,
"train_speed(iter/s)": 0.039392
},
{
"acc": 0.7316927,
"epoch": 0.4195323246217332,
"grad_norm": 1.3081296682357788,
"learning_rate": 9.96872767938206e-05,
"loss": 0.98179483,
"memory(GiB)": 67.71,
"step": 610,
"train_speed(iter/s)": 0.039559
},
{
"acc": 0.73893361,
"epoch": 0.4229711141678129,
"grad_norm": 1.1731023788452148,
"learning_rate": 9.967444923250323e-05,
"loss": 0.94215651,
"memory(GiB)": 67.71,
"step": 615,
"train_speed(iter/s)": 0.039695
},
{
"acc": 0.72336564,
"epoch": 0.4264099037138927,
"grad_norm": 1.2004274129867554,
"learning_rate": 9.966136470512739e-05,
"loss": 1.01167727,
"memory(GiB)": 67.71,
"step": 620,
"train_speed(iter/s)": 0.03985
},
{
"acc": 0.73260341,
"epoch": 0.4298486932599725,
"grad_norm": 1.1863032579421997,
"learning_rate": 9.964802327938616e-05,
"loss": 0.98780212,
"memory(GiB)": 67.71,
"step": 625,
"train_speed(iter/s)": 0.039998
},
{
"acc": 0.72430835,
"epoch": 0.4332874828060523,
"grad_norm": 1.2297348976135254,
"learning_rate": 9.963442502430173e-05,
"loss": 1.02258396,
"memory(GiB)": 67.71,
"step": 630,
"train_speed(iter/s)": 0.0401
},
{
"acc": 0.73400669,
"epoch": 0.43672627235213207,
"grad_norm": 1.1201564073562622,
"learning_rate": 9.962057001022499e-05,
"loss": 0.95277481,
"memory(GiB)": 67.71,
"step": 635,
"train_speed(iter/s)": 0.040238
},
{
"acc": 0.72435627,
"epoch": 0.4401650618982118,
"grad_norm": 1.2594115734100342,
"learning_rate": 9.96064583088352e-05,
"loss": 1.01793871,
"memory(GiB)": 67.71,
"step": 640,
"train_speed(iter/s)": 0.040389
},
{
"acc": 0.74932237,
"epoch": 0.4436038514442916,
"grad_norm": 1.0871134996414185,
"learning_rate": 9.959208999313953e-05,
"loss": 0.92056198,
"memory(GiB)": 67.71,
"step": 645,
"train_speed(iter/s)": 0.040522
},
{
"acc": 0.74172649,
"epoch": 0.4470426409903714,
"grad_norm": 1.0481441020965576,
"learning_rate": 9.957746513747285e-05,
"loss": 0.94307327,
"memory(GiB)": 67.71,
"step": 650,
"train_speed(iter/s)": 0.040673
},
{
"acc": 0.73418083,
"epoch": 0.4504814305364512,
"grad_norm": 1.2039026021957397,
"learning_rate": 9.956258381749717e-05,
"loss": 0.96942959,
"memory(GiB)": 67.71,
"step": 655,
"train_speed(iter/s)": 0.04079
},
{
"acc": 0.73663011,
"epoch": 0.45392022008253097,
"grad_norm": 1.2746825218200684,
"learning_rate": 9.954744611020134e-05,
"loss": 0.96783085,
"memory(GiB)": 67.71,
"step": 660,
"train_speed(iter/s)": 0.040931
},
{
"acc": 0.75085382,
"epoch": 0.4573590096286107,
"grad_norm": 1.1864688396453857,
"learning_rate": 9.953205209390065e-05,
"loss": 0.93258324,
"memory(GiB)": 67.71,
"step": 665,
"train_speed(iter/s)": 0.041065
},
{
"acc": 0.74181981,
"epoch": 0.4607977991746905,
"grad_norm": 1.2284380197525024,
"learning_rate": 9.95164018482364e-05,
"loss": 0.94610729,
"memory(GiB)": 67.71,
"step": 670,
"train_speed(iter/s)": 0.041186
},
{
"acc": 0.73316283,
"epoch": 0.4642365887207703,
"grad_norm": 1.0974282026290894,
"learning_rate": 9.950049545417551e-05,
"loss": 0.97180891,
"memory(GiB)": 67.71,
"step": 675,
"train_speed(iter/s)": 0.041284
},
{
"acc": 0.73497968,
"epoch": 0.4676753782668501,
"grad_norm": 1.1195545196533203,
"learning_rate": 9.948433299401008e-05,
"loss": 0.96802521,
"memory(GiB)": 67.71,
"step": 680,
"train_speed(iter/s)": 0.041406
},
{
"acc": 0.71404638,
"epoch": 0.47111416781292986,
"grad_norm": 1.2557018995285034,
"learning_rate": 9.946791455135697e-05,
"loss": 1.04876156,
"memory(GiB)": 67.71,
"step": 685,
"train_speed(iter/s)": 0.041511
},
{
"acc": 0.73286834,
"epoch": 0.47455295735900965,
"grad_norm": 1.2220708131790161,
"learning_rate": 9.945124021115738e-05,
"loss": 0.96964302,
"memory(GiB)": 67.71,
"step": 690,
"train_speed(iter/s)": 0.041631
},
{
"acc": 0.73684483,
"epoch": 0.4779917469050894,
"grad_norm": 1.2621607780456543,
"learning_rate": 9.94343100596764e-05,
"loss": 0.95697803,
"memory(GiB)": 67.71,
"step": 695,
"train_speed(iter/s)": 0.041775
},
{
"acc": 0.73987064,
"epoch": 0.4814305364511692,
"grad_norm": 1.1854294538497925,
"learning_rate": 9.941712418450258e-05,
"loss": 0.94488659,
"memory(GiB)": 67.71,
"step": 700,
"train_speed(iter/s)": 0.041901
},
{
"epoch": 0.4814305364511692,
"eval_acc": 0.7335672830341476,
"eval_loss": 0.9757564663887024,
"eval_runtime": 1129.274,
"eval_samples_per_second": 3.793,
"eval_steps_per_second": 0.068,
"step": 700
},
{
"acc": 0.73576145,
"epoch": 0.48486932599724897,
"grad_norm": 1.1550548076629639,
"learning_rate": 9.939968267454743e-05,
"loss": 0.95160465,
"memory(GiB)": 67.71,
"step": 705,
"train_speed(iter/s)": 0.03937
},
{
"acc": 0.71119275,
"epoch": 0.48830811554332876,
"grad_norm": 1.2182416915893555,
"learning_rate": 9.938198562004501e-05,
"loss": 1.04482851,
"memory(GiB)": 67.71,
"step": 710,
"train_speed(iter/s)": 0.039477
},
{
"acc": 0.74570274,
"epoch": 0.49174690508940855,
"grad_norm": 1.1353340148925781,
"learning_rate": 9.936403311255144e-05,
"loss": 0.92555218,
"memory(GiB)": 67.71,
"step": 715,
"train_speed(iter/s)": 0.039603
},
{
"acc": 0.74782338,
"epoch": 0.4951856946354883,
"grad_norm": 1.2046043872833252,
"learning_rate": 9.934582524494446e-05,
"loss": 0.92999516,
"memory(GiB)": 67.71,
"step": 720,
"train_speed(iter/s)": 0.039731
},
{
"acc": 0.73299646,
"epoch": 0.4986244841815681,
"grad_norm": 1.102347731590271,
"learning_rate": 9.932736211142291e-05,
"loss": 0.97149315,
"memory(GiB)": 67.71,
"step": 725,
"train_speed(iter/s)": 0.03984
},
{
"acc": 0.72648382,
"epoch": 0.5020632737276479,
"grad_norm": 1.0632636547088623,
"learning_rate": 9.930864380750617e-05,
"loss": 1.01790123,
"memory(GiB)": 67.71,
"step": 730,
"train_speed(iter/s)": 0.039945
},
{
"acc": 0.71636868,
"epoch": 0.5055020632737276,
"grad_norm": 1.1830312013626099,
"learning_rate": 9.928967043003391e-05,
"loss": 1.01803741,
"memory(GiB)": 67.71,
"step": 735,
"train_speed(iter/s)": 0.040055
},
{
"acc": 0.73447638,
"epoch": 0.5089408528198074,
"grad_norm": 1.1544054746627808,
"learning_rate": 9.92704420771653e-05,
"loss": 0.97713757,
"memory(GiB)": 67.71,
"step": 740,
"train_speed(iter/s)": 0.040173
},
{
"acc": 0.73799992,
"epoch": 0.5123796423658872,
"grad_norm": 1.0744158029556274,
"learning_rate": 9.925095884837867e-05,
"loss": 0.95858746,
"memory(GiB)": 67.71,
"step": 745,
"train_speed(iter/s)": 0.040296
},
{
"acc": 0.74002094,
"epoch": 0.515818431911967,
"grad_norm": 1.086005687713623,
"learning_rate": 9.923122084447098e-05,
"loss": 0.95759525,
"memory(GiB)": 67.71,
"step": 750,
"train_speed(iter/s)": 0.040432
},
{
"acc": 0.73197713,
"epoch": 0.5192572214580468,
"grad_norm": 1.177945852279663,
"learning_rate": 9.921122816755725e-05,
"loss": 0.98773813,
"memory(GiB)": 67.71,
"step": 755,
"train_speed(iter/s)": 0.040536
},
{
"acc": 0.71955528,
"epoch": 0.5226960110041265,
"grad_norm": 1.1270967721939087,
"learning_rate": 9.919098092107003e-05,
"loss": 1.0065423,
"memory(GiB)": 67.71,
"step": 760,
"train_speed(iter/s)": 0.040641
},
{
"acc": 0.72435188,
"epoch": 0.5261348005502063,
"grad_norm": 1.1566613912582397,
"learning_rate": 9.917047920975897e-05,
"loss": 1.00753899,
"memory(GiB)": 67.71,
"step": 765,
"train_speed(iter/s)": 0.040761
},
{
"acc": 0.72682076,
"epoch": 0.5295735900962861,
"grad_norm": 1.0998412370681763,
"learning_rate": 9.914972313969015e-05,
"loss": 0.99639912,
"memory(GiB)": 67.71,
"step": 770,
"train_speed(iter/s)": 0.040857
},
{
"acc": 0.73786283,
"epoch": 0.5330123796423659,
"grad_norm": 1.0717042684555054,
"learning_rate": 9.912871281824555e-05,
"loss": 0.95036526,
"memory(GiB)": 67.71,
"step": 775,
"train_speed(iter/s)": 0.040955
},
{
"acc": 0.72474022,
"epoch": 0.5364511691884457,
"grad_norm": 1.1307621002197266,
"learning_rate": 9.910744835412258e-05,
"loss": 1.00282173,
"memory(GiB)": 67.71,
"step": 780,
"train_speed(iter/s)": 0.041067
},
{
"acc": 0.73896732,
"epoch": 0.5398899587345255,
"grad_norm": 1.0760217905044556,
"learning_rate": 9.908592985733346e-05,
"loss": 0.95014591,
"memory(GiB)": 67.71,
"step": 785,
"train_speed(iter/s)": 0.041189
},
{
"acc": 0.73375082,
"epoch": 0.5433287482806052,
"grad_norm": 1.1228985786437988,
"learning_rate": 9.90641574392046e-05,
"loss": 0.97449379,
"memory(GiB)": 67.71,
"step": 790,
"train_speed(iter/s)": 0.041296
},
{
"acc": 0.73906136,
"epoch": 0.546767537826685,
"grad_norm": 1.0855998992919922,
"learning_rate": 9.904213121237616e-05,
"loss": 0.9437438,
"memory(GiB)": 67.71,
"step": 795,
"train_speed(iter/s)": 0.041409
},
{
"acc": 0.7277792,
"epoch": 0.5502063273727648,
"grad_norm": 1.24734365940094,
"learning_rate": 9.90198512908013e-05,
"loss": 1.01125345,
"memory(GiB)": 67.71,
"step": 800,
"train_speed(iter/s)": 0.041532
},
{
"epoch": 0.5502063273727648,
"eval_acc": 0.736024879650875,
"eval_loss": 0.9637655019760132,
"eval_runtime": 1126.4376,
"eval_samples_per_second": 3.802,
"eval_steps_per_second": 0.068,
"step": 800
},
{
"acc": 0.75724821,
"epoch": 0.5536451169188445,
"grad_norm": 1.1258316040039062,
"learning_rate": 9.899731778974572e-05,
"loss": 0.87265921,
"memory(GiB)": 67.71,
"step": 805,
"train_speed(iter/s)": 0.039349
},
{
"acc": 0.74204683,
"epoch": 0.5570839064649243,
"grad_norm": 0.9689936637878418,
"learning_rate": 9.897453082578703e-05,
"loss": 0.91779423,
"memory(GiB)": 67.71,
"step": 810,
"train_speed(iter/s)": 0.039466
},
{
"acc": 0.73968034,
"epoch": 0.5605226960110041,
"grad_norm": 1.1123220920562744,
"learning_rate": 9.895149051681413e-05,
"loss": 0.97357388,
"memory(GiB)": 67.71,
"step": 815,
"train_speed(iter/s)": 0.039574
},
{
"acc": 0.73935227,
"epoch": 0.5639614855570839,
"grad_norm": 1.0451692342758179,
"learning_rate": 9.892819698202658e-05,
"loss": 0.93994102,
"memory(GiB)": 67.71,
"step": 820,
"train_speed(iter/s)": 0.039675
},
{
"acc": 0.73578658,
"epoch": 0.5674002751031637,
"grad_norm": 1.0823888778686523,
"learning_rate": 9.890465034193403e-05,
"loss": 0.92713509,
"memory(GiB)": 67.71,
"step": 825,
"train_speed(iter/s)": 0.039784
},
{
"acc": 0.7370616,
"epoch": 0.5708390646492435,
"grad_norm": 1.1076163053512573,
"learning_rate": 9.888085071835557e-05,
"loss": 0.96277084,
"memory(GiB)": 67.71,
"step": 830,
"train_speed(iter/s)": 0.03987
},
{
"acc": 0.74359312,
"epoch": 0.5742778541953233,
"grad_norm": 0.9995237588882446,
"learning_rate": 9.885679823441913e-05,
"loss": 0.92473927,
"memory(GiB)": 67.71,
"step": 835,
"train_speed(iter/s)": 0.039986
},
{
"acc": 0.73567324,
"epoch": 0.5777166437414031,
"grad_norm": 1.1980810165405273,
"learning_rate": 9.883249301456078e-05,
"loss": 0.97589169,
"memory(GiB)": 67.71,
"step": 840,
"train_speed(iter/s)": 0.040091
},
{
"acc": 0.72378907,
"epoch": 0.5811554332874828,
"grad_norm": 1.059746503829956,
"learning_rate": 9.880793518452414e-05,
"loss": 1.01202221,
"memory(GiB)": 67.71,
"step": 845,
"train_speed(iter/s)": 0.040196
},
{
"acc": 0.72781639,
"epoch": 0.5845942228335625,
"grad_norm": 1.1578445434570312,
"learning_rate": 9.878312487135973e-05,
"loss": 0.98674173,
"memory(GiB)": 67.71,
"step": 850,
"train_speed(iter/s)": 0.040293
},
{
"acc": 0.7325696,
"epoch": 0.5880330123796423,
"grad_norm": 1.1622587442398071,
"learning_rate": 9.87580622034243e-05,
"loss": 0.96467819,
"memory(GiB)": 67.71,
"step": 855,
"train_speed(iter/s)": 0.040397
},
{
"acc": 0.74238405,
"epoch": 0.5914718019257221,
"grad_norm": 1.221163034439087,
"learning_rate": 9.873274731038013e-05,
"loss": 0.94902515,
"memory(GiB)": 67.71,
"step": 860,
"train_speed(iter/s)": 0.040497
},
{
"acc": 0.73676643,
"epoch": 0.5949105914718019,
"grad_norm": 1.0908128023147583,
"learning_rate": 9.87071803231944e-05,
"loss": 0.94923353,
"memory(GiB)": 67.71,
"step": 865,
"train_speed(iter/s)": 0.040613
},
{
"acc": 0.7285512,
"epoch": 0.5983493810178817,
"grad_norm": 0.9778567552566528,
"learning_rate": 9.868136137413854e-05,
"loss": 0.99065866,
"memory(GiB)": 67.71,
"step": 870,
"train_speed(iter/s)": 0.040705
},
{
"acc": 0.75390539,
"epoch": 0.6017881705639615,
"grad_norm": 1.1204711198806763,
"learning_rate": 9.865529059678749e-05,
"loss": 0.89114456,
"memory(GiB)": 67.71,
"step": 875,
"train_speed(iter/s)": 0.040815
},
{
"acc": 0.72871351,
"epoch": 0.6052269601100413,
"grad_norm": 1.1295973062515259,
"learning_rate": 9.8628968126019e-05,
"loss": 0.97484636,
"memory(GiB)": 67.71,
"step": 880,
"train_speed(iter/s)": 0.04091
},
{
"acc": 0.75279789,
"epoch": 0.6086657496561211,
"grad_norm": 1.276840090751648,
"learning_rate": 9.8602394098013e-05,
"loss": 0.9101244,
"memory(GiB)": 67.71,
"step": 885,
"train_speed(iter/s)": 0.041017
},
{
"acc": 0.72960396,
"epoch": 0.6121045392022009,
"grad_norm": 1.1485203504562378,
"learning_rate": 9.857556865025087e-05,
"loss": 0.9954258,
"memory(GiB)": 67.71,
"step": 890,
"train_speed(iter/s)": 0.041116
},
{
"acc": 0.73271265,
"epoch": 0.6155433287482807,
"grad_norm": 1.2299952507019043,
"learning_rate": 9.854849192151468e-05,
"loss": 0.97523527,
"memory(GiB)": 67.71,
"step": 895,
"train_speed(iter/s)": 0.041212
},
{
"acc": 0.72924538,
"epoch": 0.6189821182943603,
"grad_norm": 1.1494402885437012,
"learning_rate": 9.852116405188648e-05,
"loss": 0.98907299,
"memory(GiB)": 67.71,
"step": 900,
"train_speed(iter/s)": 0.041323
},
{
"epoch": 0.6189821182943603,
"eval_acc": 0.7370371620101678,
"eval_loss": 0.9578044414520264,
"eval_runtime": 1104.9055,
"eval_samples_per_second": 3.876,
"eval_steps_per_second": 0.07,
"step": 900
},
{
"acc": 0.72602391,
"epoch": 0.6224209078404401,
"grad_norm": 1.0728832483291626,
"learning_rate": 9.849358518274771e-05,
"loss": 1.01037588,
"memory(GiB)": 67.71,
"step": 905,
"train_speed(iter/s)": 0.039428
},
{
"acc": 0.7520565,
"epoch": 0.6258596973865199,
"grad_norm": 1.0786807537078857,
"learning_rate": 9.846575545677823e-05,
"loss": 0.92040062,
"memory(GiB)": 67.71,
"step": 910,
"train_speed(iter/s)": 0.039534
},
{
"acc": 0.74691858,
"epoch": 0.6292984869325997,
"grad_norm": 1.0437581539154053,
"learning_rate": 9.843767501795583e-05,
"loss": 0.9074029,
"memory(GiB)": 67.71,
"step": 915,
"train_speed(iter/s)": 0.039631
},
{
"acc": 0.73221941,
"epoch": 0.6327372764786795,
"grad_norm": 1.1795591115951538,
"learning_rate": 9.840934401155528e-05,
"loss": 0.988484,
"memory(GiB)": 67.71,
"step": 920,
"train_speed(iter/s)": 0.039722
},
{
"acc": 0.72777405,
"epoch": 0.6361760660247593,
"grad_norm": 1.1894828081130981,
"learning_rate": 9.838076258414776e-05,
"loss": 1.01051292,
"memory(GiB)": 67.71,
"step": 925,
"train_speed(iter/s)": 0.039818
},
{
"acc": 0.75026011,
"epoch": 0.6396148555708391,
"grad_norm": 0.9834104180335999,
"learning_rate": 9.835193088359988e-05,
"loss": 0.90967407,
"memory(GiB)": 67.71,
"step": 930,
"train_speed(iter/s)": 0.0399
},
{
"acc": 0.74026661,
"epoch": 0.6430536451169189,
"grad_norm": 1.2417614459991455,
"learning_rate": 9.832284905907318e-05,
"loss": 0.92580471,
"memory(GiB)": 67.71,
"step": 935,
"train_speed(iter/s)": 0.03999
},
{
"acc": 0.73846035,
"epoch": 0.6464924346629987,
"grad_norm": 1.1710271835327148,
"learning_rate": 9.829351726102313e-05,
"loss": 0.95107613,
"memory(GiB)": 67.71,
"step": 940,
"train_speed(iter/s)": 0.040081
},
{
"acc": 0.7366385,
"epoch": 0.6499312242090785,
"grad_norm": 1.0618470907211304,
"learning_rate": 9.826393564119847e-05,
"loss": 0.94500179,
"memory(GiB)": 67.71,
"step": 945,
"train_speed(iter/s)": 0.040159
},
{
"acc": 0.74606085,
"epoch": 0.6533700137551581,
"grad_norm": 1.0151257514953613,
"learning_rate": 9.823410435264042e-05,
"loss": 0.90975704,
"memory(GiB)": 67.71,
"step": 950,
"train_speed(iter/s)": 0.040245
},
{
"acc": 0.73123455,
"epoch": 0.6568088033012379,
"grad_norm": 1.1929761171340942,
"learning_rate": 9.820402354968183e-05,
"loss": 0.95826616,
"memory(GiB)": 67.71,
"step": 955,
"train_speed(iter/s)": 0.040333
},
{
"acc": 0.73816185,
"epoch": 0.6602475928473177,
"grad_norm": 1.240237832069397,
"learning_rate": 9.817369338794646e-05,
"loss": 0.94996367,
"memory(GiB)": 67.71,
"step": 960,
"train_speed(iter/s)": 0.040436
},
{
"acc": 0.74816332,
"epoch": 0.6636863823933975,
"grad_norm": 0.9286736845970154,
"learning_rate": 9.81431140243481e-05,
"loss": 0.90342827,
"memory(GiB)": 67.71,
"step": 965,
"train_speed(iter/s)": 0.040521
},
{
"acc": 0.74362345,
"epoch": 0.6671251719394773,
"grad_norm": 1.1020361185073853,
"learning_rate": 9.811228561708979e-05,
"loss": 0.92705402,
"memory(GiB)": 67.71,
"step": 970,
"train_speed(iter/s)": 0.040625
},
{
"acc": 0.73400373,
"epoch": 0.6705639614855571,
"grad_norm": 1.0580672025680542,
"learning_rate": 9.808120832566306e-05,
"loss": 0.98702965,
"memory(GiB)": 67.71,
"step": 975,
"train_speed(iter/s)": 0.040726
},
{
"acc": 0.71981792,
"epoch": 0.6740027510316369,
"grad_norm": 1.054178237915039,
"learning_rate": 9.804988231084695e-05,
"loss": 1.02396307,
"memory(GiB)": 67.71,
"step": 980,
"train_speed(iter/s)": 0.040822
},
{
"acc": 0.7412406,
"epoch": 0.6774415405777167,
"grad_norm": 1.064276933670044,
"learning_rate": 9.801830773470738e-05,
"loss": 0.92902575,
"memory(GiB)": 67.71,
"step": 985,
"train_speed(iter/s)": 0.040901
},
{
"acc": 0.73765955,
"epoch": 0.6808803301237965,
"grad_norm": 1.02224862575531,
"learning_rate": 9.798648476059612e-05,
"loss": 0.96069899,
"memory(GiB)": 67.71,
"step": 990,
"train_speed(iter/s)": 0.040989
},
{
"acc": 0.72184877,
"epoch": 0.6843191196698762,
"grad_norm": 1.10880446434021,
"learning_rate": 9.795441355315009e-05,
"loss": 1.00857792,
"memory(GiB)": 67.71,
"step": 995,
"train_speed(iter/s)": 0.041069
},
{
"acc": 0.75687084,
"epoch": 0.687757909215956,
"grad_norm": 1.0748587846755981,
"learning_rate": 9.792209427829044e-05,
"loss": 0.89921093,
"memory(GiB)": 67.71,
"step": 1000,
"train_speed(iter/s)": 0.041172
},
{
"epoch": 0.687757909215956,
"eval_acc": 0.739815314707338,
"eval_loss": 0.9472519159317017,
"eval_runtime": 1138.6289,
"eval_samples_per_second": 3.762,
"eval_steps_per_second": 0.068,
"step": 1000
},
{
"acc": 0.74216719,
"epoch": 0.6911966987620357,
"grad_norm": 0.9975650906562805,
"learning_rate": 9.788952710322168e-05,
"loss": 0.92038422,
"memory(GiB)": 67.71,
"step": 1005,
"train_speed(iter/s)": 0.039418
},
{
"acc": 0.73918667,
"epoch": 0.6946354883081155,
"grad_norm": 1.167277455329895,
"learning_rate": 9.785671219643086e-05,
"loss": 0.95244484,
"memory(GiB)": 67.71,
"step": 1010,
"train_speed(iter/s)": 0.039509
},
{
"acc": 0.74078741,
"epoch": 0.6980742778541953,
"grad_norm": 1.1248480081558228,
"learning_rate": 9.782364972768667e-05,
"loss": 0.95239239,
"memory(GiB)": 67.71,
"step": 1015,
"train_speed(iter/s)": 0.039611
},
{
"acc": 0.73078346,
"epoch": 0.7015130674002751,
"grad_norm": 1.1177655458450317,
"learning_rate": 9.779033986803856e-05,
"loss": 0.97850962,
"memory(GiB)": 67.71,
"step": 1020,
"train_speed(iter/s)": 0.039689
},
{
"acc": 0.72922001,
"epoch": 0.7049518569463549,
"grad_norm": 1.025723934173584,
"learning_rate": 9.775678278981587e-05,
"loss": 0.97461071,
"memory(GiB)": 67.71,
"step": 1025,
"train_speed(iter/s)": 0.039762
},
{
"acc": 0.74167843,
"epoch": 0.7083906464924347,
"grad_norm": 1.0677716732025146,
"learning_rate": 9.772297866662694e-05,
"loss": 0.93668747,
"memory(GiB)": 67.71,
"step": 1030,
"train_speed(iter/s)": 0.039851
},
{
"acc": 0.73580718,
"epoch": 0.7118294360385145,
"grad_norm": 1.071346402168274,
"learning_rate": 9.768892767335818e-05,
"loss": 0.94455872,
"memory(GiB)": 67.71,
"step": 1035,
"train_speed(iter/s)": 0.03994
},
{
"acc": 0.73037386,
"epoch": 0.7152682255845942,
"grad_norm": 1.1164538860321045,
"learning_rate": 9.76546299861732e-05,
"loss": 0.96749563,
"memory(GiB)": 67.71,
"step": 1040,
"train_speed(iter/s)": 0.040026
},
{
"acc": 0.73348866,
"epoch": 0.718707015130674,
"grad_norm": 1.016825556755066,
"learning_rate": 9.76200857825119e-05,
"loss": 0.97527409,
"memory(GiB)": 67.71,
"step": 1045,
"train_speed(iter/s)": 0.040097
},
{
"acc": 0.74810896,
"epoch": 0.7221458046767538,
"grad_norm": 1.0394419431686401,
"learning_rate": 9.758529524108952e-05,
"loss": 0.91727133,
"memory(GiB)": 67.71,
"step": 1050,
"train_speed(iter/s)": 0.040188
},
{
"acc": 0.7377789,
"epoch": 0.7255845942228336,
"grad_norm": 1.2021335363388062,
"learning_rate": 9.755025854189574e-05,
"loss": 0.96904411,
"memory(GiB)": 67.71,
"step": 1055,
"train_speed(iter/s)": 0.040268
},
{
"acc": 0.7272027,
"epoch": 0.7290233837689133,
"grad_norm": 1.012821912765503,
"learning_rate": 9.751497586619374e-05,
"loss": 0.9858429,
"memory(GiB)": 67.71,
"step": 1060,
"train_speed(iter/s)": 0.040336
},
{
"acc": 0.73458595,
"epoch": 0.7324621733149931,
"grad_norm": 1.1023552417755127,
"learning_rate": 9.747944739651928e-05,
"loss": 0.95475712,
"memory(GiB)": 67.71,
"step": 1065,
"train_speed(iter/s)": 0.04042
},
{
"acc": 0.74384351,
"epoch": 0.7359009628610729,
"grad_norm": 1.1670334339141846,
"learning_rate": 9.744367331667972e-05,
"loss": 0.90986481,
"memory(GiB)": 67.71,
"step": 1070,
"train_speed(iter/s)": 0.040504
},
{
"acc": 0.74164953,
"epoch": 0.7393397524071527,
"grad_norm": 1.1003512144088745,
"learning_rate": 9.740765381175308e-05,
"loss": 0.95252619,
"memory(GiB)": 67.71,
"step": 1075,
"train_speed(iter/s)": 0.040585
},
{
"acc": 0.72832394,
"epoch": 0.7427785419532325,
"grad_norm": 1.141493320465088,
"learning_rate": 9.737138906808716e-05,
"loss": 0.9896246,
"memory(GiB)": 67.71,
"step": 1080,
"train_speed(iter/s)": 0.040669
},
{
"acc": 0.75672712,
"epoch": 0.7462173314993122,
"grad_norm": 1.0548261404037476,
"learning_rate": 9.733487927329842e-05,
"loss": 0.89013748,
"memory(GiB)": 67.71,
"step": 1085,
"train_speed(iter/s)": 0.040758
},
{
"acc": 0.74028645,
"epoch": 0.749656121045392,
"grad_norm": 1.058765172958374,
"learning_rate": 9.729812461627116e-05,
"loss": 0.9446207,
"memory(GiB)": 67.71,
"step": 1090,
"train_speed(iter/s)": 0.040824
},
{
"acc": 0.74290891,
"epoch": 0.7530949105914718,
"grad_norm": 1.0750882625579834,
"learning_rate": 9.726112528715645e-05,
"loss": 0.93429804,
"memory(GiB)": 67.71,
"step": 1095,
"train_speed(iter/s)": 0.040904
},
{
"acc": 0.74171824,
"epoch": 0.7565337001375516,
"grad_norm": 1.1232870817184448,
"learning_rate": 9.722388147737117e-05,
"loss": 0.9356823,
"memory(GiB)": 67.71,
"step": 1100,
"train_speed(iter/s)": 0.040982
},
{
"epoch": 0.7565337001375516,
"eval_acc": 0.7418005128897287,
"eval_loss": 0.9385226964950562,
"eval_runtime": 1120.0428,
"eval_samples_per_second": 3.824,
"eval_steps_per_second": 0.069,
"step": 1100
},
{
"acc": 0.73683257,
"epoch": 0.7599724896836314,
"grad_norm": 1.0508232116699219,
"learning_rate": 9.718639337959709e-05,
"loss": 0.95805416,
"memory(GiB)": 67.71,
"step": 1105,
"train_speed(iter/s)": 0.03942
},
{
"acc": 0.74061327,
"epoch": 0.7634112792297112,
"grad_norm": 1.0770542621612549,
"learning_rate": 9.714866118777971e-05,
"loss": 0.92782459,
"memory(GiB)": 67.71,
"step": 1110,
"train_speed(iter/s)": 0.039509
},
{
"acc": 0.72901726,
"epoch": 0.7668500687757909,
"grad_norm": 1.104008674621582,
"learning_rate": 9.711068509712744e-05,
"loss": 0.99297533,
"memory(GiB)": 67.71,
"step": 1115,
"train_speed(iter/s)": 0.039593
},
{
"acc": 0.73747034,
"epoch": 0.7702888583218707,
"grad_norm": 1.1213022470474243,
"learning_rate": 9.707246530411045e-05,
"loss": 0.96422043,
"memory(GiB)": 67.71,
"step": 1120,
"train_speed(iter/s)": 0.039669
},
{
"acc": 0.75132704,
"epoch": 0.7737276478679505,
"grad_norm": 0.9887475967407227,
"learning_rate": 9.703400200645976e-05,
"loss": 0.90485935,
"memory(GiB)": 67.71,
"step": 1125,
"train_speed(iter/s)": 0.039747
},
{
"acc": 0.74963489,
"epoch": 0.7771664374140302,
"grad_norm": 1.105952501296997,
"learning_rate": 9.69952954031661e-05,
"loss": 0.89224911,
"memory(GiB)": 67.71,
"step": 1130,
"train_speed(iter/s)": 0.039821
},
{
"acc": 0.73749495,
"epoch": 0.78060522696011,
"grad_norm": 1.140572428703308,
"learning_rate": 9.695634569447904e-05,
"loss": 0.9487175,
"memory(GiB)": 67.71,
"step": 1135,
"train_speed(iter/s)": 0.039888
},
{
"acc": 0.7312088,
"epoch": 0.7840440165061898,
"grad_norm": 1.1275548934936523,
"learning_rate": 9.691715308190576e-05,
"loss": 0.96534138,
"memory(GiB)": 67.71,
"step": 1140,
"train_speed(iter/s)": 0.039959
},
{
"acc": 0.73048372,
"epoch": 0.7874828060522696,
"grad_norm": 1.0315409898757935,
"learning_rate": 9.68777177682102e-05,
"loss": 0.97743053,
"memory(GiB)": 67.71,
"step": 1145,
"train_speed(iter/s)": 0.040036
},
{
"acc": 0.72779579,
"epoch": 0.7909215955983494,
"grad_norm": 0.9368631839752197,
"learning_rate": 9.683803995741186e-05,
"loss": 0.98139448,
"memory(GiB)": 67.71,
"step": 1150,
"train_speed(iter/s)": 0.040117
},
{
"acc": 0.74332333,
"epoch": 0.7943603851444292,
"grad_norm": 1.1103096008300781,
"learning_rate": 9.679811985478483e-05,
"loss": 0.9456337,
"memory(GiB)": 67.71,
"step": 1155,
"train_speed(iter/s)": 0.040199
},
{
"acc": 0.73605175,
"epoch": 0.797799174690509,
"grad_norm": 0.9985005259513855,
"learning_rate": 9.675795766685669e-05,
"loss": 0.94118538,
"memory(GiB)": 67.71,
"step": 1160,
"train_speed(iter/s)": 0.040265
},
{
"acc": 0.7368608,
"epoch": 0.8012379642365888,
"grad_norm": 1.086758017539978,
"learning_rate": 9.671755360140746e-05,
"loss": 0.94844141,
"memory(GiB)": 67.71,
"step": 1165,
"train_speed(iter/s)": 0.040343
},
{
"acc": 0.74641371,
"epoch": 0.8046767537826685,
"grad_norm": 0.9669944643974304,
"learning_rate": 9.667690786746852e-05,
"loss": 0.91691303,
"memory(GiB)": 67.71,
"step": 1170,
"train_speed(iter/s)": 0.040416
},
{
"acc": 0.74651995,
"epoch": 0.8081155433287482,
"grad_norm": 1.0597587823867798,
"learning_rate": 9.663602067532151e-05,
"loss": 0.91813259,
"memory(GiB)": 67.71,
"step": 1175,
"train_speed(iter/s)": 0.040502
},
{
"acc": 0.73420897,
"epoch": 0.811554332874828,
"grad_norm": 1.1257351636886597,
"learning_rate": 9.659489223649731e-05,
"loss": 0.97081699,
"memory(GiB)": 67.71,
"step": 1180,
"train_speed(iter/s)": 0.040572
},
{
"acc": 0.76278071,
"epoch": 0.8149931224209078,
"grad_norm": 1.0399693250656128,
"learning_rate": 9.655352276377484e-05,
"loss": 0.85249825,
"memory(GiB)": 67.71,
"step": 1185,
"train_speed(iter/s)": 0.040662
},
{
"acc": 0.74080434,
"epoch": 0.8184319119669876,
"grad_norm": 1.104978322982788,
"learning_rate": 9.651191247118003e-05,
"loss": 0.93528318,
"memory(GiB)": 67.71,
"step": 1190,
"train_speed(iter/s)": 0.040735
},
{
"acc": 0.7570159,
"epoch": 0.8218707015130674,
"grad_norm": 1.1633975505828857,
"learning_rate": 9.647006157398471e-05,
"loss": 0.8937582,
"memory(GiB)": 67.71,
"step": 1195,
"train_speed(iter/s)": 0.040819
},
{
"acc": 0.75301266,
"epoch": 0.8253094910591472,
"grad_norm": 1.0719282627105713,
"learning_rate": 9.642797028870549e-05,
"loss": 0.87665348,
"memory(GiB)": 67.71,
"step": 1200,
"train_speed(iter/s)": 0.040893
},
{
"epoch": 0.8253094910591472,
"eval_acc": 0.7415305709272506,
"eval_loss": 0.9332711100578308,
"eval_runtime": 1102.5228,
"eval_samples_per_second": 3.885,
"eval_steps_per_second": 0.07,
"step": 1200
},
{
"acc": 0.73832102,
"epoch": 0.828748280605227,
"grad_norm": 1.1222566366195679,
"learning_rate": 9.63856388331026e-05,
"loss": 0.92794905,
"memory(GiB)": 67.71,
"step": 1205,
"train_speed(iter/s)": 0.039487
},
{
"acc": 0.74549799,
"epoch": 0.8321870701513068,
"grad_norm": 1.0469160079956055,
"learning_rate": 9.634306742617881e-05,
"loss": 0.91989012,
"memory(GiB)": 67.71,
"step": 1210,
"train_speed(iter/s)": 0.039561
},
{
"acc": 0.73400946,
"epoch": 0.8356258596973866,
"grad_norm": 1.1092973947525024,
"learning_rate": 9.630025628817833e-05,
"loss": 0.96797295,
"memory(GiB)": 67.71,
"step": 1215,
"train_speed(iter/s)": 0.039633
},
{
"acc": 0.74465179,
"epoch": 0.8390646492434664,
"grad_norm": 1.0476914644241333,
"learning_rate": 9.625720564058553e-05,
"loss": 0.9328536,
"memory(GiB)": 67.71,
"step": 1220,
"train_speed(iter/s)": 0.039708
},
{
"acc": 0.7472661,
"epoch": 0.842503438789546,
"grad_norm": 1.002954363822937,
"learning_rate": 9.6213915706124e-05,
"loss": 0.90329132,
"memory(GiB)": 67.71,
"step": 1225,
"train_speed(iter/s)": 0.039779
},
{
"acc": 0.73477154,
"epoch": 0.8459422283356258,
"grad_norm": 1.1124727725982666,
"learning_rate": 9.617038670875518e-05,
"loss": 0.97662973,
"memory(GiB)": 67.71,
"step": 1230,
"train_speed(iter/s)": 0.039851
},
{
"acc": 0.7584034,
"epoch": 0.8493810178817056,
"grad_norm": 0.9963657855987549,
"learning_rate": 9.612661887367738e-05,
"loss": 0.87994137,
"memory(GiB)": 67.71,
"step": 1235,
"train_speed(iter/s)": 0.03993
},
{
"acc": 0.74326572,
"epoch": 0.8528198074277854,
"grad_norm": 1.0891412496566772,
"learning_rate": 9.608261242732453e-05,
"loss": 0.92397137,
"memory(GiB)": 67.71,
"step": 1240,
"train_speed(iter/s)": 0.039998
},
{
"acc": 0.74097652,
"epoch": 0.8562585969738652,
"grad_norm": 0.9601296186447144,
"learning_rate": 9.603836759736501e-05,
"loss": 0.92762499,
"memory(GiB)": 67.71,
"step": 1245,
"train_speed(iter/s)": 0.040067
},
{
"acc": 0.74741158,
"epoch": 0.859697386519945,
"grad_norm": 1.0737489461898804,
"learning_rate": 9.599388461270046e-05,
"loss": 0.89353437,
"memory(GiB)": 67.71,
"step": 1250,
"train_speed(iter/s)": 0.040137
},
{
"acc": 0.7489872,
"epoch": 0.8631361760660248,
"grad_norm": 0.8971010446548462,
"learning_rate": 9.594916370346464e-05,
"loss": 0.9029624,
"memory(GiB)": 67.71,
"step": 1255,
"train_speed(iter/s)": 0.040204
},
{
"acc": 0.76023664,
"epoch": 0.8665749656121046,
"grad_norm": 1.1241250038146973,
"learning_rate": 9.590420510102226e-05,
"loss": 0.87794628,
"memory(GiB)": 67.71,
"step": 1260,
"train_speed(iter/s)": 0.040279
},
{
"acc": 0.73114996,
"epoch": 0.8700137551581844,
"grad_norm": 1.2173177003860474,
"learning_rate": 9.585900903796766e-05,
"loss": 0.97089109,
"memory(GiB)": 67.71,
"step": 1265,
"train_speed(iter/s)": 0.040357
},
{
"acc": 0.74563522,
"epoch": 0.8734525447042641,
"grad_norm": 1.1517419815063477,
"learning_rate": 9.581357574812375e-05,
"loss": 0.89781647,
"memory(GiB)": 67.71,
"step": 1270,
"train_speed(iter/s)": 0.040428
},
{
"acc": 0.77028093,
"epoch": 0.8768913342503438,
"grad_norm": 1.0377655029296875,
"learning_rate": 9.576790546654071e-05,
"loss": 0.85278912,
"memory(GiB)": 67.71,
"step": 1275,
"train_speed(iter/s)": 0.040497
},
{
"acc": 0.73771534,
"epoch": 0.8803301237964236,
"grad_norm": 1.3595341444015503,
"learning_rate": 9.572199842949484e-05,
"loss": 0.94212608,
"memory(GiB)": 67.71,
"step": 1280,
"train_speed(iter/s)": 0.04057
},
{
"acc": 0.73818164,
"epoch": 0.8837689133425034,
"grad_norm": 1.0683890581130981,
"learning_rate": 9.567585487448723e-05,
"loss": 0.94818478,
"memory(GiB)": 67.71,
"step": 1285,
"train_speed(iter/s)": 0.040642
},
{
"acc": 0.73784003,
"epoch": 0.8872077028885832,
"grad_norm": 1.0474903583526611,
"learning_rate": 9.562947504024267e-05,
"loss": 0.93362265,
"memory(GiB)": 67.71,
"step": 1290,
"train_speed(iter/s)": 0.040707
},
{
"acc": 0.73629189,
"epoch": 0.890646492434663,
"grad_norm": 0.9906838536262512,
"learning_rate": 9.558285916670833e-05,
"loss": 0.96513948,
"memory(GiB)": 67.71,
"step": 1295,
"train_speed(iter/s)": 0.04077
},
{
"acc": 0.74767346,
"epoch": 0.8940852819807428,
"grad_norm": 0.937610924243927,
"learning_rate": 9.553600749505249e-05,
"loss": 0.91039581,
"memory(GiB)": 67.71,
"step": 1300,
"train_speed(iter/s)": 0.040841
},
{
"epoch": 0.8940852819807428,
"eval_acc": 0.7442806046699959,
"eval_loss": 0.9251495003700256,
"eval_runtime": 1163.3237,
"eval_samples_per_second": 3.682,
"eval_steps_per_second": 0.066,
"step": 1300
},
{
"acc": 0.75481806,
"epoch": 0.8975240715268226,
"grad_norm": 1.093235969543457,
"learning_rate": 9.548892026766336e-05,
"loss": 0.89359856,
"memory(GiB)": 67.71,
"step": 1305,
"train_speed(iter/s)": 0.039469
},
{
"acc": 0.7461483,
"epoch": 0.9009628610729024,
"grad_norm": 1.1600829362869263,
"learning_rate": 9.544159772814784e-05,
"loss": 0.92318993,
"memory(GiB)": 67.71,
"step": 1310,
"train_speed(iter/s)": 0.039531
},
{
"acc": 0.74466972,
"epoch": 0.9044016506189821,
"grad_norm": 1.0172392129898071,
"learning_rate": 9.539404012133022e-05,
"loss": 0.92656469,
"memory(GiB)": 67.71,
"step": 1315,
"train_speed(iter/s)": 0.039601
},
{
"acc": 0.74887996,
"epoch": 0.9078404401650619,
"grad_norm": 1.0804096460342407,
"learning_rate": 9.534624769325086e-05,
"loss": 0.9098074,
"memory(GiB)": 67.71,
"step": 1320,
"train_speed(iter/s)": 0.039666
},
{
"acc": 0.74121346,
"epoch": 0.9112792297111417,
"grad_norm": 0.9664003849029541,
"learning_rate": 9.529822069116499e-05,
"loss": 0.9316514,
"memory(GiB)": 67.71,
"step": 1325,
"train_speed(iter/s)": 0.039723
},
{
"acc": 0.75105286,
"epoch": 0.9147180192572214,
"grad_norm": 0.9869258999824524,
"learning_rate": 9.524995936354147e-05,
"loss": 0.88554888,
"memory(GiB)": 67.71,
"step": 1330,
"train_speed(iter/s)": 0.039787
},
{
"acc": 0.73669834,
"epoch": 0.9181568088033012,
"grad_norm": 1.0221189260482788,
"learning_rate": 9.520146396006138e-05,
"loss": 0.96149244,
"memory(GiB)": 67.71,
"step": 1335,
"train_speed(iter/s)": 0.03984
},
{
"acc": 0.73676386,
"epoch": 0.921595598349381,
"grad_norm": 1.1528207063674927,
"learning_rate": 9.515273473161683e-05,
"loss": 0.96016941,
"memory(GiB)": 67.71,
"step": 1340,
"train_speed(iter/s)": 0.039908
},
{
"acc": 0.74311166,
"epoch": 0.9250343878954608,
"grad_norm": 1.161664366722107,
"learning_rate": 9.510377193030963e-05,
"loss": 0.93340931,
"memory(GiB)": 67.71,
"step": 1345,
"train_speed(iter/s)": 0.039969
},
{
"acc": 0.7412838,
"epoch": 0.9284731774415406,
"grad_norm": 1.1524734497070312,
"learning_rate": 9.505457580944998e-05,
"loss": 0.94830551,
"memory(GiB)": 67.71,
"step": 1350,
"train_speed(iter/s)": 0.040033
},
{
"acc": 0.7353076,
"epoch": 0.9319119669876204,
"grad_norm": 0.991431474685669,
"learning_rate": 9.500514662355515e-05,
"loss": 0.94869709,
"memory(GiB)": 67.71,
"step": 1355,
"train_speed(iter/s)": 0.040099
},
{
"acc": 0.74592419,
"epoch": 0.9353507565337001,
"grad_norm": 0.9828781485557556,
"learning_rate": 9.495548462834822e-05,
"loss": 0.91297379,
"memory(GiB)": 67.71,
"step": 1360,
"train_speed(iter/s)": 0.040166
},
{
"acc": 0.72816801,
"epoch": 0.9387895460797799,
"grad_norm": 0.9917466640472412,
"learning_rate": 9.490559008075665e-05,
"loss": 0.97318478,
"memory(GiB)": 67.71,
"step": 1365,
"train_speed(iter/s)": 0.040224
},
{
"acc": 0.75423832,
"epoch": 0.9422283356258597,
"grad_norm": 1.021081566810608,
"learning_rate": 9.485546323891107e-05,
"loss": 0.88315115,
"memory(GiB)": 67.71,
"step": 1370,
"train_speed(iter/s)": 0.040286
},
{
"acc": 0.74885693,
"epoch": 0.9456671251719395,
"grad_norm": 1.1856554746627808,
"learning_rate": 9.480510436214387e-05,
"loss": 0.91739559,
"memory(GiB)": 67.71,
"step": 1375,
"train_speed(iter/s)": 0.040345
},
{
"acc": 0.7300355,
"epoch": 0.9491059147180193,
"grad_norm": 0.9983332753181458,
"learning_rate": 9.475451371098787e-05,
"loss": 0.96374044,
"memory(GiB)": 67.71,
"step": 1380,
"train_speed(iter/s)": 0.040398
},
{
"acc": 0.76302462,
"epoch": 0.952544704264099,
"grad_norm": 1.0011341571807861,
"learning_rate": 9.470369154717498e-05,
"loss": 0.86735439,
"memory(GiB)": 67.71,
"step": 1385,
"train_speed(iter/s)": 0.040463
},
{
"acc": 0.74582882,
"epoch": 0.9559834938101788,
"grad_norm": 1.051133394241333,
"learning_rate": 9.465263813363488e-05,
"loss": 0.90945959,
"memory(GiB)": 67.71,
"step": 1390,
"train_speed(iter/s)": 0.040529
},
{
"acc": 0.74545488,
"epoch": 0.9594222833562586,
"grad_norm": 1.0635967254638672,
"learning_rate": 9.460135373449359e-05,
"loss": 0.92096958,
"memory(GiB)": 67.71,
"step": 1395,
"train_speed(iter/s)": 0.0406
},
{
"acc": 0.7286293,
"epoch": 0.9628610729023384,
"grad_norm": 1.146252155303955,
"learning_rate": 9.454983861507213e-05,
"loss": 0.98950424,
"memory(GiB)": 67.71,
"step": 1400,
"train_speed(iter/s)": 0.040657
},
{
"epoch": 0.9628610729023384,
"eval_acc": 0.7448486075493769,
"eval_loss": 0.9200888872146606,
"eval_runtime": 1140.3609,
"eval_samples_per_second": 3.756,
"eval_steps_per_second": 0.068,
"step": 1400
},
{
"acc": 0.73903141,
"epoch": 0.9662998624484181,
"grad_norm": 1.1544698476791382,
"learning_rate": 9.44980930418852e-05,
"loss": 0.94653835,
"memory(GiB)": 67.71,
"step": 1405,
"train_speed(iter/s)": 0.039418
},
{
"acc": 0.73803802,
"epoch": 0.9697386519944979,
"grad_norm": 1.1627522706985474,
"learning_rate": 9.444611728263972e-05,
"loss": 0.92657709,
"memory(GiB)": 67.71,
"step": 1410,
"train_speed(iter/s)": 0.039486
},
{
"acc": 0.74716005,
"epoch": 0.9731774415405777,
"grad_norm": 1.0238428115844727,
"learning_rate": 9.439391160623352e-05,
"loss": 0.91622248,
"memory(GiB)": 67.71,
"step": 1415,
"train_speed(iter/s)": 0.039543
},
{
"acc": 0.72584734,
"epoch": 0.9766162310866575,
"grad_norm": 1.0079649686813354,
"learning_rate": 9.434147628275387e-05,
"loss": 0.99349623,
"memory(GiB)": 67.71,
"step": 1420,
"train_speed(iter/s)": 0.039598
},
{
"acc": 0.74595861,
"epoch": 0.9800550206327373,
"grad_norm": 0.93181973695755,
"learning_rate": 9.428881158347614e-05,
"loss": 0.90428505,
"memory(GiB)": 67.71,
"step": 1425,
"train_speed(iter/s)": 0.039658
},
{
"acc": 0.74024305,
"epoch": 0.9834938101788171,
"grad_norm": 1.0997734069824219,
"learning_rate": 9.42359177808624e-05,
"loss": 0.92796974,
"memory(GiB)": 67.71,
"step": 1430,
"train_speed(iter/s)": 0.039723
},
{
"acc": 0.75488276,
"epoch": 0.9869325997248969,
"grad_norm": 1.075714111328125,
"learning_rate": 9.418279514855995e-05,
"loss": 0.88083801,
"memory(GiB)": 67.71,
"step": 1435,
"train_speed(iter/s)": 0.039792
},
{
"acc": 0.75168705,
"epoch": 0.9903713892709766,
"grad_norm": 1.039860486984253,
"learning_rate": 9.412944396139998e-05,
"loss": 0.89997187,
"memory(GiB)": 67.71,
"step": 1440,
"train_speed(iter/s)": 0.039851
},
{
"acc": 0.74686685,
"epoch": 0.9938101788170564,
"grad_norm": 0.9597694277763367,
"learning_rate": 9.407586449539616e-05,
"loss": 0.90008278,
"memory(GiB)": 67.71,
"step": 1445,
"train_speed(iter/s)": 0.039911
},
{
"acc": 0.74714336,
"epoch": 0.9972489683631361,
"grad_norm": 1.0538160800933838,
"learning_rate": 9.402205702774304e-05,
"loss": 0.89391537,
"memory(GiB)": 67.71,
"step": 1450,
"train_speed(iter/s)": 0.039977
},
{
"acc": 0.74310694,
"epoch": 1.000687757909216,
"grad_norm": 1.170095443725586,
"learning_rate": 9.396802183681483e-05,
"loss": 0.9227499,
"memory(GiB)": 67.71,
"step": 1455,
"train_speed(iter/s)": 0.039992
},
{
"acc": 0.75078964,
"epoch": 1.0041265474552958,
"grad_norm": 0.9855571985244751,
"learning_rate": 9.391375920216388e-05,
"loss": 0.86350327,
"memory(GiB)": 67.71,
"step": 1460,
"train_speed(iter/s)": 0.040045
},
{
"acc": 0.76349792,
"epoch": 1.0075653370013755,
"grad_norm": 1.0560338497161865,
"learning_rate": 9.38592694045192e-05,
"loss": 0.8352499,
"memory(GiB)": 67.71,
"step": 1465,
"train_speed(iter/s)": 0.040102
},
{
"acc": 0.75685053,
"epoch": 1.0110041265474552,
"grad_norm": 1.2401360273361206,
"learning_rate": 9.380455272578501e-05,
"loss": 0.86346865,
"memory(GiB)": 67.71,
"step": 1470,
"train_speed(iter/s)": 0.040151
},
{
"acc": 0.74811668,
"epoch": 1.014442916093535,
"grad_norm": 1.0885376930236816,
"learning_rate": 9.374960944903933e-05,
"loss": 0.90040436,
"memory(GiB)": 67.71,
"step": 1475,
"train_speed(iter/s)": 0.04021
},
{
"acc": 0.76131544,
"epoch": 1.0178817056396148,
"grad_norm": 1.10344660282135,
"learning_rate": 9.36944398585325e-05,
"loss": 0.84312658,
"memory(GiB)": 67.71,
"step": 1480,
"train_speed(iter/s)": 0.040266
},
{
"acc": 0.76296844,
"epoch": 1.0213204951856947,
"grad_norm": 0.9839646816253662,
"learning_rate": 9.36390442396857e-05,
"loss": 0.83021584,
"memory(GiB)": 67.71,
"step": 1485,
"train_speed(iter/s)": 0.040327
},
{
"acc": 0.75224285,
"epoch": 1.0247592847317744,
"grad_norm": 1.2059285640716553,
"learning_rate": 9.358342287908944e-05,
"loss": 0.90571365,
"memory(GiB)": 67.71,
"step": 1490,
"train_speed(iter/s)": 0.040386
},
{
"acc": 0.75849729,
"epoch": 1.0281980742778543,
"grad_norm": 1.134822130203247,
"learning_rate": 9.352757606450213e-05,
"loss": 0.86388903,
"memory(GiB)": 67.71,
"step": 1495,
"train_speed(iter/s)": 0.040446
},
{
"acc": 0.757271,
"epoch": 1.031636863823934,
"grad_norm": 1.153064489364624,
"learning_rate": 9.34715040848486e-05,
"loss": 0.86167965,
"memory(GiB)": 67.71,
"step": 1500,
"train_speed(iter/s)": 0.040505
},
{
"epoch": 1.031636863823934,
"eval_acc": 0.7464288927880506,
"eval_loss": 0.9147906303405762,
"eval_runtime": 1155.4137,
"eval_samples_per_second": 3.707,
"eval_steps_per_second": 0.067,
"step": 1500
},
{
"acc": 0.74669247,
"epoch": 1.0350756533700138,
"grad_norm": 1.0673768520355225,
"learning_rate": 9.341520723021853e-05,
"loss": 0.8943819,
"memory(GiB)": 67.71,
"step": 1505,
"train_speed(iter/s)": 0.03933
},
{
"acc": 0.76012006,
"epoch": 1.0385144429160935,
"grad_norm": 1.1268237829208374,
"learning_rate": 9.3358685791865e-05,
"loss": 0.82661228,
"memory(GiB)": 67.71,
"step": 1510,
"train_speed(iter/s)": 0.039388
},
{
"acc": 0.75681725,
"epoch": 1.0419532324621734,
"grad_norm": 0.9775263667106628,
"learning_rate": 9.330194006220301e-05,
"loss": 0.85321465,
"memory(GiB)": 67.71,
"step": 1515,
"train_speed(iter/s)": 0.039438
},
{
"acc": 0.75653033,
"epoch": 1.045392022008253,
"grad_norm": 1.0308629274368286,
"learning_rate": 9.324497033480792e-05,
"loss": 0.85134258,
"memory(GiB)": 67.71,
"step": 1520,
"train_speed(iter/s)": 0.039499
},
{
"acc": 0.75386848,
"epoch": 1.0488308115543328,
"grad_norm": 1.1134449243545532,
"learning_rate": 9.318777690441397e-05,
"loss": 0.87145538,
"memory(GiB)": 67.71,
"step": 1525,
"train_speed(iter/s)": 0.039557
},
{
"acc": 0.75666504,
"epoch": 1.0522696011004127,
"grad_norm": 1.0742757320404053,
"learning_rate": 9.31303600669127e-05,
"loss": 0.86683607,
"memory(GiB)": 67.71,
"step": 1530,
"train_speed(iter/s)": 0.03961
},
{
"acc": 0.75246024,
"epoch": 1.0557083906464924,
"grad_norm": 1.33464515209198,
"learning_rate": 9.30727201193514e-05,
"loss": 0.85711727,
"memory(GiB)": 67.71,
"step": 1535,
"train_speed(iter/s)": 0.039666
},
{
"acc": 0.7529563,
"epoch": 1.0591471801925723,
"grad_norm": 1.165124535560608,
"learning_rate": 9.301485735993179e-05,
"loss": 0.86484051,
"memory(GiB)": 67.71,
"step": 1540,
"train_speed(iter/s)": 0.039727
},
{
"acc": 0.76439376,
"epoch": 1.062585969738652,
"grad_norm": 0.9591624736785889,
"learning_rate": 9.295677208800816e-05,
"loss": 0.82861805,
"memory(GiB)": 67.71,
"step": 1545,
"train_speed(iter/s)": 0.039774
},
{
"acc": 0.75873203,
"epoch": 1.0660247592847318,
"grad_norm": 1.1774530410766602,
"learning_rate": 9.289846460408602e-05,
"loss": 0.85537472,
"memory(GiB)": 67.71,
"step": 1550,
"train_speed(iter/s)": 0.039836
},
{
"acc": 0.75072222,
"epoch": 1.0694635488308115,
"grad_norm": 1.0823148488998413,
"learning_rate": 9.283993520982051e-05,
"loss": 0.87792244,
"memory(GiB)": 67.71,
"step": 1555,
"train_speed(iter/s)": 0.039888
},
{
"acc": 0.755092,
"epoch": 1.0729023383768914,
"grad_norm": 1.084096908569336,
"learning_rate": 9.278118420801481e-05,
"loss": 0.8602149,
"memory(GiB)": 67.71,
"step": 1560,
"train_speed(iter/s)": 0.03995
},
{
"acc": 0.76771908,
"epoch": 1.076341127922971,
"grad_norm": 1.157706379890442,
"learning_rate": 9.272221190261863e-05,
"loss": 0.83946896,
"memory(GiB)": 67.71,
"step": 1565,
"train_speed(iter/s)": 0.040014
},
{
"acc": 0.75533552,
"epoch": 1.0797799174690508,
"grad_norm": 1.1175612211227417,
"learning_rate": 9.266301859872657e-05,
"loss": 0.86314983,
"memory(GiB)": 67.71,
"step": 1570,
"train_speed(iter/s)": 0.040069
},
{
"acc": 0.74409065,
"epoch": 1.0832187070151307,
"grad_norm": 1.0851186513900757,
"learning_rate": 9.260360460257653e-05,
"loss": 0.91000662,
"memory(GiB)": 67.71,
"step": 1575,
"train_speed(iter/s)": 0.040117
},
{
"acc": 0.76165962,
"epoch": 1.0866574965612104,
"grad_norm": 1.208783507347107,
"learning_rate": 9.254397022154828e-05,
"loss": 0.86310711,
"memory(GiB)": 67.71,
"step": 1580,
"train_speed(iter/s)": 0.040175
},
{
"acc": 0.75226078,
"epoch": 1.0900962861072903,
"grad_norm": 1.2301445007324219,
"learning_rate": 9.248411576416162e-05,
"loss": 0.87202549,
"memory(GiB)": 67.71,
"step": 1585,
"train_speed(iter/s)": 0.04023
},
{
"acc": 0.74834862,
"epoch": 1.09353507565337,
"grad_norm": 1.0916322469711304,
"learning_rate": 9.242404154007502e-05,
"loss": 0.89558239,
"memory(GiB)": 67.71,
"step": 1590,
"train_speed(iter/s)": 0.040284
},
{
"acc": 0.75830355,
"epoch": 1.0969738651994498,
"grad_norm": 1.077378749847412,
"learning_rate": 9.236374786008389e-05,
"loss": 0.85708294,
"memory(GiB)": 67.71,
"step": 1595,
"train_speed(iter/s)": 0.04034
},
{
"acc": 0.76014338,
"epoch": 1.1004126547455295,
"grad_norm": 1.2474371194839478,
"learning_rate": 9.230323503611897e-05,
"loss": 0.85164671,
"memory(GiB)": 67.71,
"step": 1600,
"train_speed(iter/s)": 0.040404
},
{
"epoch": 1.1004126547455295,
"eval_acc": 0.7467494488684933,
"eval_loss": 0.9101867079734802,
"eval_runtime": 1124.3275,
"eval_samples_per_second": 3.809,
"eval_steps_per_second": 0.068,
"step": 1600
},
{
"acc": 0.75647812,
"epoch": 1.1038514442916094,
"grad_norm": 1.179998517036438,
"learning_rate": 9.224250338124481e-05,
"loss": 0.86214447,
"memory(GiB)": 67.71,
"step": 1605,
"train_speed(iter/s)": 0.039344
},
{
"acc": 0.76110773,
"epoch": 1.107290233837689,
"grad_norm": 1.1358368396759033,
"learning_rate": 9.2181553209658e-05,
"loss": 0.82860346,
"memory(GiB)": 67.71,
"step": 1610,
"train_speed(iter/s)": 0.039399
},
{
"acc": 0.7529737,
"epoch": 1.110729023383769,
"grad_norm": 1.1643849611282349,
"learning_rate": 9.212038483668572e-05,
"loss": 0.88976746,
"memory(GiB)": 67.71,
"step": 1615,
"train_speed(iter/s)": 0.039457
},
{
"acc": 0.75971909,
"epoch": 1.1141678129298487,
"grad_norm": 1.1216496229171753,
"learning_rate": 9.205899857878396e-05,
"loss": 0.85760059,
"memory(GiB)": 67.71,
"step": 1620,
"train_speed(iter/s)": 0.039517
},
{
"acc": 0.76570654,
"epoch": 1.1176066024759286,
"grad_norm": 1.2371224164962769,
"learning_rate": 9.199739475353596e-05,
"loss": 0.82434063,
"memory(GiB)": 67.71,
"step": 1625,
"train_speed(iter/s)": 0.039574
},
{
"acc": 0.75779676,
"epoch": 1.1210453920220083,
"grad_norm": 1.1810933351516724,
"learning_rate": 9.193557367965056e-05,
"loss": 0.85758648,
"memory(GiB)": 67.71,
"step": 1630,
"train_speed(iter/s)": 0.03963
},
{
"acc": 0.76221857,
"epoch": 1.124484181568088,
"grad_norm": 1.1857250928878784,
"learning_rate": 9.187353567696055e-05,
"loss": 0.84511681,
"memory(GiB)": 67.71,
"step": 1635,
"train_speed(iter/s)": 0.039679
},
{
"acc": 0.74748664,
"epoch": 1.1279229711141678,
"grad_norm": 1.026563286781311,
"learning_rate": 9.181128106642096e-05,
"loss": 0.9065136,
"memory(GiB)": 67.71,
"step": 1640,
"train_speed(iter/s)": 0.039732
},
{
"acc": 0.75156937,
"epoch": 1.1313617606602475,
"grad_norm": 1.0305781364440918,
"learning_rate": 9.174881017010746e-05,
"loss": 0.86748962,
"memory(GiB)": 67.71,
"step": 1645,
"train_speed(iter/s)": 0.039783
},
{
"acc": 0.75970831,
"epoch": 1.1348005502063274,
"grad_norm": 1.2061082124710083,
"learning_rate": 9.168612331121477e-05,
"loss": 0.84413948,
"memory(GiB)": 67.71,
"step": 1650,
"train_speed(iter/s)": 0.039833
},
{
"acc": 0.75250425,
"epoch": 1.138239339752407,
"grad_norm": 1.2730051279067993,
"learning_rate": 9.162322081405473e-05,
"loss": 0.86202583,
"memory(GiB)": 67.71,
"step": 1655,
"train_speed(iter/s)": 0.039887
},
{
"acc": 0.7535017,
"epoch": 1.141678129298487,
"grad_norm": 1.0208563804626465,
"learning_rate": 9.156010300405495e-05,
"loss": 0.86017208,
"memory(GiB)": 67.71,
"step": 1660,
"train_speed(iter/s)": 0.03994
},
{
"acc": 0.7593123,
"epoch": 1.1451169188445667,
"grad_norm": 1.2210179567337036,
"learning_rate": 9.149677020775686e-05,
"loss": 0.8386488,
"memory(GiB)": 67.71,
"step": 1665,
"train_speed(iter/s)": 0.039998
},
{
"acc": 0.76598496,
"epoch": 1.1485557083906466,
"grad_norm": 1.1266486644744873,
"learning_rate": 9.143322275281419e-05,
"loss": 0.84045124,
"memory(GiB)": 67.71,
"step": 1670,
"train_speed(iter/s)": 0.040053
},
{
"acc": 0.7449192,
"epoch": 1.1519944979367263,
"grad_norm": 1.2747905254364014,
"learning_rate": 9.136946096799117e-05,
"loss": 0.89558125,
"memory(GiB)": 67.71,
"step": 1675,
"train_speed(iter/s)": 0.040103
},
{
"acc": 0.77260947,
"epoch": 1.155433287482806,
"grad_norm": 1.1446512937545776,
"learning_rate": 9.13054851831609e-05,
"loss": 0.79779301,
"memory(GiB)": 67.71,
"step": 1680,
"train_speed(iter/s)": 0.040158
},
{
"acc": 0.73968267,
"epoch": 1.1588720770288858,
"grad_norm": 1.0520663261413574,
"learning_rate": 9.124129572930356e-05,
"loss": 0.91217728,
"memory(GiB)": 67.71,
"step": 1685,
"train_speed(iter/s)": 0.040209
},
{
"acc": 0.76177702,
"epoch": 1.1623108665749655,
"grad_norm": 1.0818169116973877,
"learning_rate": 9.117689293850484e-05,
"loss": 0.84482117,
"memory(GiB)": 67.71,
"step": 1690,
"train_speed(iter/s)": 0.040254
},
{
"acc": 0.75831223,
"epoch": 1.1657496561210454,
"grad_norm": 1.1914788484573364,
"learning_rate": 9.111227714395406e-05,
"loss": 0.85761623,
"memory(GiB)": 67.71,
"step": 1695,
"train_speed(iter/s)": 0.040302
},
{
"acc": 0.75756545,
"epoch": 1.169188445667125,
"grad_norm": 1.1335783004760742,
"learning_rate": 9.104744867994258e-05,
"loss": 0.85422668,
"memory(GiB)": 67.71,
"step": 1700,
"train_speed(iter/s)": 0.040347
},
{
"epoch": 1.169188445667125,
"eval_acc": 0.7492295406487605,
"eval_loss": 0.9045791625976562,
"eval_runtime": 1125.5138,
"eval_samples_per_second": 3.805,
"eval_steps_per_second": 0.068,
"step": 1700
},
{
"acc": 0.75732212,
"epoch": 1.172627235213205,
"grad_norm": 0.9720064997673035,
"learning_rate": 9.098240788186192e-05,
"loss": 0.85368481,
"memory(GiB)": 67.71,
"step": 1705,
"train_speed(iter/s)": 0.03935
},
{
"acc": 0.76147232,
"epoch": 1.1760660247592847,
"grad_norm": 1.2705514430999756,
"learning_rate": 9.091715508620222e-05,
"loss": 0.85527439,
"memory(GiB)": 67.71,
"step": 1710,
"train_speed(iter/s)": 0.039404
},
{
"acc": 0.74866266,
"epoch": 1.1795048143053646,
"grad_norm": 1.1010618209838867,
"learning_rate": 9.085169063055032e-05,
"loss": 0.8962719,
"memory(GiB)": 67.71,
"step": 1715,
"train_speed(iter/s)": 0.039457
},
{
"acc": 0.76777854,
"epoch": 1.1829436038514443,
"grad_norm": 1.0222831964492798,
"learning_rate": 9.078601485358813e-05,
"loss": 0.81568956,
"memory(GiB)": 67.71,
"step": 1720,
"train_speed(iter/s)": 0.03951
},
{
"acc": 0.74900856,
"epoch": 1.1863823933975242,
"grad_norm": 1.1607588529586792,
"learning_rate": 9.072012809509081e-05,
"loss": 0.88696823,
"memory(GiB)": 67.71,
"step": 1725,
"train_speed(iter/s)": 0.039555
},
{
"acc": 0.75043535,
"epoch": 1.1898211829436038,
"grad_norm": 1.1782574653625488,
"learning_rate": 9.065403069592505e-05,
"loss": 0.86962795,
"memory(GiB)": 67.71,
"step": 1730,
"train_speed(iter/s)": 0.039599
},
{
"acc": 0.74629622,
"epoch": 1.1932599724896837,
"grad_norm": 1.1644479036331177,
"learning_rate": 9.058772299804731e-05,
"loss": 0.88353643,
"memory(GiB)": 67.71,
"step": 1735,
"train_speed(iter/s)": 0.039653
},
{
"acc": 0.76667023,
"epoch": 1.1966987620357634,
"grad_norm": 1.176121711730957,
"learning_rate": 9.052120534450196e-05,
"loss": 0.82560787,
"memory(GiB)": 67.71,
"step": 1740,
"train_speed(iter/s)": 0.039705
},
{
"acc": 0.7706706,
"epoch": 1.200137551581843,
"grad_norm": 1.2071737051010132,
"learning_rate": 9.045447807941972e-05,
"loss": 0.82129135,
"memory(GiB)": 67.71,
"step": 1745,
"train_speed(iter/s)": 0.039758
},
{
"acc": 0.76220055,
"epoch": 1.203576341127923,
"grad_norm": 1.161576509475708,
"learning_rate": 9.038754154801559e-05,
"loss": 0.84442816,
"memory(GiB)": 67.71,
"step": 1750,
"train_speed(iter/s)": 0.039809
},
{
"acc": 0.76516528,
"epoch": 1.2070151306740027,
"grad_norm": 1.0194506645202637,
"learning_rate": 9.032039609658732e-05,
"loss": 0.82462807,
"memory(GiB)": 67.71,
"step": 1755,
"train_speed(iter/s)": 0.03985
},
{
"acc": 0.76512585,
"epoch": 1.2104539202200826,
"grad_norm": 1.123105764389038,
"learning_rate": 9.025304207251346e-05,
"loss": 0.84622154,
"memory(GiB)": 67.71,
"step": 1760,
"train_speed(iter/s)": 0.039905
},
{
"acc": 0.75925913,
"epoch": 1.2138927097661623,
"grad_norm": 1.0418940782546997,
"learning_rate": 9.018547982425164e-05,
"loss": 0.84370403,
"memory(GiB)": 67.71,
"step": 1765,
"train_speed(iter/s)": 0.03995
},
{
"acc": 0.76256437,
"epoch": 1.2173314993122422,
"grad_norm": 1.133818507194519,
"learning_rate": 9.011770970133671e-05,
"loss": 0.84478779,
"memory(GiB)": 67.71,
"step": 1770,
"train_speed(iter/s)": 0.040002
},
{
"acc": 0.75265675,
"epoch": 1.2207702888583218,
"grad_norm": 1.3675616979599,
"learning_rate": 9.0049732054379e-05,
"loss": 0.86621552,
"memory(GiB)": 67.71,
"step": 1775,
"train_speed(iter/s)": 0.040062
},
{
"acc": 0.75733051,
"epoch": 1.2242090784044017,
"grad_norm": 1.2875425815582275,
"learning_rate": 8.998154723506249e-05,
"loss": 0.88228512,
"memory(GiB)": 67.71,
"step": 1780,
"train_speed(iter/s)": 0.040108
},
{
"acc": 0.74635658,
"epoch": 1.2276478679504814,
"grad_norm": 1.2586891651153564,
"learning_rate": 8.991315559614288e-05,
"loss": 0.90037432,
"memory(GiB)": 67.71,
"step": 1785,
"train_speed(iter/s)": 0.040152
},
{
"acc": 0.7586679,
"epoch": 1.231086657496561,
"grad_norm": 1.1891663074493408,
"learning_rate": 8.984455749144597e-05,
"loss": 0.84769564,
"memory(GiB)": 67.71,
"step": 1790,
"train_speed(iter/s)": 0.0402
},
{
"acc": 0.74606829,
"epoch": 1.234525447042641,
"grad_norm": 1.154038667678833,
"learning_rate": 8.977575327586563e-05,
"loss": 0.88660145,
"memory(GiB)": 67.71,
"step": 1795,
"train_speed(iter/s)": 0.040243
},
{
"acc": 0.77012577,
"epoch": 1.2379642365887207,
"grad_norm": 1.2006701231002808,
"learning_rate": 8.97067433053621e-05,
"loss": 0.8128231,
"memory(GiB)": 67.71,
"step": 1800,
"train_speed(iter/s)": 0.040293
},
{
"epoch": 1.2379642365887207,
"eval_acc": 0.7502980609169029,
"eval_loss": 0.9007091522216797,
"eval_runtime": 1174.5762,
"eval_samples_per_second": 3.646,
"eval_steps_per_second": 0.066,
"step": 1800
},
{
"acc": 0.74588566,
"epoch": 1.2414030261348006,
"grad_norm": 1.2025572061538696,
"learning_rate": 8.963752793696004e-05,
"loss": 0.89730377,
"memory(GiB)": 67.71,
"step": 1805,
"train_speed(iter/s)": 0.039311
},
{
"acc": 0.76906261,
"epoch": 1.2448418156808803,
"grad_norm": 1.0686986446380615,
"learning_rate": 8.956810752874682e-05,
"loss": 0.81423302,
"memory(GiB)": 67.71,
"step": 1810,
"train_speed(iter/s)": 0.039359
},
{
"acc": 0.77615113,
"epoch": 1.2482806052269602,
"grad_norm": 1.2386928796768188,
"learning_rate": 8.949848243987054e-05,
"loss": 0.79887466,
"memory(GiB)": 67.71,
"step": 1815,
"train_speed(iter/s)": 0.039407
},
{
"acc": 0.75191274,
"epoch": 1.2517193947730398,
"grad_norm": 1.180568814277649,
"learning_rate": 8.94286530305382e-05,
"loss": 0.85600204,
"memory(GiB)": 67.71,
"step": 1820,
"train_speed(iter/s)": 0.039452
},
{
"acc": 0.76613312,
"epoch": 1.2551581843191197,
"grad_norm": 1.1538622379302979,
"learning_rate": 8.935861966201393e-05,
"loss": 0.82688131,
"memory(GiB)": 67.71,
"step": 1825,
"train_speed(iter/s)": 0.039499
},
{
"acc": 0.77081518,
"epoch": 1.2585969738651994,
"grad_norm": 1.0973575115203857,
"learning_rate": 8.928838269661694e-05,
"loss": 0.80709963,
"memory(GiB)": 67.71,
"step": 1830,
"train_speed(iter/s)": 0.039543
},
{
"acc": 0.74893703,
"epoch": 1.262035763411279,
"grad_norm": 1.1516822576522827,
"learning_rate": 8.921794249771987e-05,
"loss": 0.87887421,
"memory(GiB)": 67.71,
"step": 1835,
"train_speed(iter/s)": 0.039584
},
{
"acc": 0.74806905,
"epoch": 1.265474552957359,
"grad_norm": 1.1790939569473267,
"learning_rate": 8.914729942974674e-05,
"loss": 0.88099899,
"memory(GiB)": 67.71,
"step": 1840,
"train_speed(iter/s)": 0.03963
},
{
"acc": 0.77447009,
"epoch": 1.268913342503439,
"grad_norm": 1.009238600730896,
"learning_rate": 8.907645385817104e-05,
"loss": 0.7905911,
"memory(GiB)": 67.71,
"step": 1845,
"train_speed(iter/s)": 0.039686
},
{
"acc": 0.75110741,
"epoch": 1.2723521320495186,
"grad_norm": 1.2757585048675537,
"learning_rate": 8.900540614951409e-05,
"loss": 0.87034512,
"memory(GiB)": 67.71,
"step": 1850,
"train_speed(iter/s)": 0.03973
},
{
"acc": 0.74727058,
"epoch": 1.2757909215955983,
"grad_norm": 1.0743454694747925,
"learning_rate": 8.893415667134281e-05,
"loss": 0.88521938,
"memory(GiB)": 67.71,
"step": 1855,
"train_speed(iter/s)": 0.039772
},
{
"acc": 0.76257467,
"epoch": 1.2792297111416782,
"grad_norm": 1.0623903274536133,
"learning_rate": 8.886270579226807e-05,
"loss": 0.84139423,
"memory(GiB)": 67.71,
"step": 1860,
"train_speed(iter/s)": 0.03982
},
{
"acc": 0.76310492,
"epoch": 1.2826685006877578,
"grad_norm": 1.0730196237564087,
"learning_rate": 8.879105388194267e-05,
"loss": 0.84801579,
"memory(GiB)": 67.71,
"step": 1865,
"train_speed(iter/s)": 0.039868
},
{
"acc": 0.76296768,
"epoch": 1.2861072902338377,
"grad_norm": 1.0681921243667603,
"learning_rate": 8.871920131105943e-05,
"loss": 0.82966671,
"memory(GiB)": 67.71,
"step": 1870,
"train_speed(iter/s)": 0.039919
},
{
"acc": 0.7662539,
"epoch": 1.2895460797799174,
"grad_norm": 1.1676512956619263,
"learning_rate": 8.864714845134931e-05,
"loss": 0.82158031,
"memory(GiB)": 67.71,
"step": 1875,
"train_speed(iter/s)": 0.039968
},
{
"acc": 0.76386523,
"epoch": 1.2929848693259973,
"grad_norm": 1.2241677045822144,
"learning_rate": 8.857489567557949e-05,
"loss": 0.8327158,
"memory(GiB)": 67.71,
"step": 1880,
"train_speed(iter/s)": 0.040021
},
{
"acc": 0.77355728,
"epoch": 1.296423658872077,
"grad_norm": 1.0751720666885376,
"learning_rate": 8.850244335755136e-05,
"loss": 0.803335,
"memory(GiB)": 67.71,
"step": 1885,
"train_speed(iter/s)": 0.040073
},
{
"acc": 0.76320724,
"epoch": 1.299862448418157,
"grad_norm": 1.292360544204712,
"learning_rate": 8.84297918720987e-05,
"loss": 0.85545721,
"memory(GiB)": 67.71,
"step": 1890,
"train_speed(iter/s)": 0.040122
},
{
"acc": 0.76533775,
"epoch": 1.3033012379642366,
"grad_norm": 1.27505624294281,
"learning_rate": 8.835694159508568e-05,
"loss": 0.83456764,
"memory(GiB)": 67.71,
"step": 1895,
"train_speed(iter/s)": 0.040172
},
{
"acc": 0.77199011,
"epoch": 1.3067400275103163,
"grad_norm": 1.0612465143203735,
"learning_rate": 8.82838929034049e-05,
"loss": 0.81219292,
"memory(GiB)": 67.71,
"step": 1900,
"train_speed(iter/s)": 0.040222
},
{
"epoch": 1.3067400275103163,
"eval_acc": 0.7521145453727449,
"eval_loss": 0.8923233151435852,
"eval_runtime": 1131.335,
"eval_samples_per_second": 3.786,
"eval_steps_per_second": 0.068,
"step": 1900
},
{
"acc": 0.75053563,
"epoch": 1.3101788170563962,
"grad_norm": 1.2158348560333252,
"learning_rate": 8.821064617497549e-05,
"loss": 0.87764034,
"memory(GiB)": 67.71,
"step": 1905,
"train_speed(iter/s)": 0.039328
},
{
"acc": 0.76767535,
"epoch": 1.313617606602476,
"grad_norm": 1.0964173078536987,
"learning_rate": 8.81372017887411e-05,
"loss": 0.83279819,
"memory(GiB)": 67.71,
"step": 1910,
"train_speed(iter/s)": 0.039378
},
{
"acc": 0.75541239,
"epoch": 1.3170563961485557,
"grad_norm": 1.2945960760116577,
"learning_rate": 8.806356012466799e-05,
"loss": 0.8567975,
"memory(GiB)": 67.71,
"step": 1915,
"train_speed(iter/s)": 0.03942
},
{
"acc": 0.75376849,
"epoch": 1.3204951856946354,
"grad_norm": 1.2059944868087769,
"learning_rate": 8.798972156374303e-05,
"loss": 0.86053438,
"memory(GiB)": 67.71,
"step": 1920,
"train_speed(iter/s)": 0.039461
},
{
"acc": 0.75244598,
"epoch": 1.3239339752407153,
"grad_norm": 1.2470142841339111,
"learning_rate": 8.791568648797175e-05,
"loss": 0.84860821,
"memory(GiB)": 67.71,
"step": 1925,
"train_speed(iter/s)": 0.039509
},
{
"acc": 0.76857953,
"epoch": 1.327372764786795,
"grad_norm": 1.074821949005127,
"learning_rate": 8.784145528037633e-05,
"loss": 0.81543255,
"memory(GiB)": 67.71,
"step": 1930,
"train_speed(iter/s)": 0.039548
},
{
"acc": 0.75690975,
"epoch": 1.330811554332875,
"grad_norm": 1.2594019174575806,
"learning_rate": 8.776702832499369e-05,
"loss": 0.85649605,
"memory(GiB)": 67.71,
"step": 1935,
"train_speed(iter/s)": 0.039588
},
{
"acc": 0.75640688,
"epoch": 1.3342503438789546,
"grad_norm": 1.2854877710342407,
"learning_rate": 8.769240600687341e-05,
"loss": 0.85886908,
"memory(GiB)": 67.71,
"step": 1940,
"train_speed(iter/s)": 0.03963
},
{
"acc": 0.76101456,
"epoch": 1.3376891334250343,
"grad_norm": 1.2323275804519653,
"learning_rate": 8.761758871207578e-05,
"loss": 0.85549269,
"memory(GiB)": 67.71,
"step": 1945,
"train_speed(iter/s)": 0.039677
},
{
"acc": 0.75675645,
"epoch": 1.3411279229711142,
"grad_norm": 1.3912837505340576,
"learning_rate": 8.754257682766987e-05,
"loss": 0.86173325,
"memory(GiB)": 67.71,
"step": 1950,
"train_speed(iter/s)": 0.039723
},
{
"acc": 0.75836124,
"epoch": 1.344566712517194,
"grad_norm": 1.325785517692566,
"learning_rate": 8.746737074173139e-05,
"loss": 0.85381556,
"memory(GiB)": 67.71,
"step": 1955,
"train_speed(iter/s)": 0.039767
},
{
"acc": 0.75378246,
"epoch": 1.3480055020632737,
"grad_norm": 1.3383103609085083,
"learning_rate": 8.739197084334078e-05,
"loss": 0.85643635,
"memory(GiB)": 67.71,
"step": 1960,
"train_speed(iter/s)": 0.039815
},
{
"acc": 0.74990363,
"epoch": 1.3514442916093534,
"grad_norm": 1.0907026529312134,
"learning_rate": 8.731637752258122e-05,
"loss": 0.8558506,
"memory(GiB)": 67.71,
"step": 1965,
"train_speed(iter/s)": 0.039861
},
{
"acc": 0.75551319,
"epoch": 1.3548830811554333,
"grad_norm": 1.1416265964508057,
"learning_rate": 8.724059117053647e-05,
"loss": 0.86469622,
"memory(GiB)": 67.71,
"step": 1970,
"train_speed(iter/s)": 0.039902
},
{
"acc": 0.7511488,
"epoch": 1.358321870701513,
"grad_norm": 1.1393564939498901,
"learning_rate": 8.716461217928903e-05,
"loss": 0.85416451,
"memory(GiB)": 67.71,
"step": 1975,
"train_speed(iter/s)": 0.039948
},
{
"acc": 0.76492167,
"epoch": 1.361760660247593,
"grad_norm": 1.0601388216018677,
"learning_rate": 8.708844094191798e-05,
"loss": 0.82022047,
"memory(GiB)": 67.71,
"step": 1980,
"train_speed(iter/s)": 0.039988
},
{
"acc": 0.75548849,
"epoch": 1.3651994497936726,
"grad_norm": 1.1647326946258545,
"learning_rate": 8.701207785249703e-05,
"loss": 0.8785594,
"memory(GiB)": 67.71,
"step": 1985,
"train_speed(iter/s)": 0.040031
},
{
"acc": 0.76257005,
"epoch": 1.3686382393397525,
"grad_norm": 1.208771824836731,
"learning_rate": 8.693552330609235e-05,
"loss": 0.82169209,
"memory(GiB)": 67.71,
"step": 1990,
"train_speed(iter/s)": 0.040075
},
{
"acc": 0.7662899,
"epoch": 1.3720770288858322,
"grad_norm": 1.0375357866287231,
"learning_rate": 8.685877769876074e-05,
"loss": 0.82175579,
"memory(GiB)": 67.71,
"step": 1995,
"train_speed(iter/s)": 0.040117
},
{
"acc": 0.75507236,
"epoch": 1.375515818431912,
"grad_norm": 1.070656180381775,
"learning_rate": 8.678184142754736e-05,
"loss": 0.84867239,
"memory(GiB)": 67.71,
"step": 2000,
"train_speed(iter/s)": 0.040168
},
{
"epoch": 1.375515818431912,
"eval_acc": 0.7514059477212399,
"eval_loss": 0.890434980392456,
"eval_runtime": 1140.0174,
"eval_samples_per_second": 3.757,
"eval_steps_per_second": 0.068,
"step": 2000
},
{
"acc": 0.75042534,
"epoch": 1.3789546079779917,
"grad_norm": 1.181110143661499,
"learning_rate": 8.670471489048382e-05,
"loss": 0.90365086,
"memory(GiB)": 67.71,
"step": 2005,
"train_speed(iter/s)": 0.039309
},
{
"acc": 0.75193415,
"epoch": 1.3823933975240714,
"grad_norm": 1.0542738437652588,
"learning_rate": 8.662739848658605e-05,
"loss": 0.86276369,
"memory(GiB)": 67.71,
"step": 2010,
"train_speed(iter/s)": 0.039351
},
{
"acc": 0.76266041,
"epoch": 1.3858321870701513,
"grad_norm": 1.1464662551879883,
"learning_rate": 8.654989261585231e-05,
"loss": 0.83303232,
"memory(GiB)": 67.71,
"step": 2015,
"train_speed(iter/s)": 0.039396
},
{
"acc": 0.7555974,
"epoch": 1.3892709766162312,
"grad_norm": 1.137511134147644,
"learning_rate": 8.6472197679261e-05,
"loss": 0.87258329,
"memory(GiB)": 67.71,
"step": 2020,
"train_speed(iter/s)": 0.039441
},
{
"acc": 0.75746002,
"epoch": 1.392709766162311,
"grad_norm": 1.1067372560501099,
"learning_rate": 8.639431407876873e-05,
"loss": 0.8575942,
"memory(GiB)": 67.71,
"step": 2025,
"train_speed(iter/s)": 0.039486
},
{
"acc": 0.74920359,
"epoch": 1.3961485557083906,
"grad_norm": 1.1339222192764282,
"learning_rate": 8.631624221730809e-05,
"loss": 0.89333057,
"memory(GiB)": 67.71,
"step": 2030,
"train_speed(iter/s)": 0.039533
},
{
"acc": 0.75785513,
"epoch": 1.3995873452544705,
"grad_norm": 1.193408489227295,
"learning_rate": 8.623798249878573e-05,
"loss": 0.85004654,
"memory(GiB)": 67.71,
"step": 2035,
"train_speed(iter/s)": 0.039579
},
{
"acc": 0.76322355,
"epoch": 1.4030261348005502,
"grad_norm": 1.4497336149215698,
"learning_rate": 8.615953532808008e-05,
"loss": 0.85098343,
"memory(GiB)": 67.71,
"step": 2040,
"train_speed(iter/s)": 0.039624
},
{
"acc": 0.75989523,
"epoch": 1.40646492434663,
"grad_norm": 1.424786925315857,
"learning_rate": 8.608090111103948e-05,
"loss": 0.86450672,
"memory(GiB)": 67.71,
"step": 2045,
"train_speed(iter/s)": 0.039665
},
{
"acc": 0.75607204,
"epoch": 1.4099037138927097,
"grad_norm": 1.1818575859069824,
"learning_rate": 8.600208025447983e-05,
"loss": 0.859338,
"memory(GiB)": 67.71,
"step": 2050,
"train_speed(iter/s)": 0.039707
},
{
"acc": 0.74912252,
"epoch": 1.4133425034387894,
"grad_norm": 1.0915964841842651,
"learning_rate": 8.592307316618272e-05,
"loss": 0.88583393,
"memory(GiB)": 67.71,
"step": 2055,
"train_speed(iter/s)": 0.039753
},
{
"acc": 0.7610446,
"epoch": 1.4167812929848693,
"grad_norm": 1.3371332883834839,
"learning_rate": 8.584388025489314e-05,
"loss": 0.83794365,
"memory(GiB)": 67.71,
"step": 2060,
"train_speed(iter/s)": 0.0398
},
{
"acc": 0.76503677,
"epoch": 1.4202200825309492,
"grad_norm": 1.0140537023544312,
"learning_rate": 8.57645019303175e-05,
"loss": 0.81193466,
"memory(GiB)": 67.71,
"step": 2065,
"train_speed(iter/s)": 0.039843
},
{
"acc": 0.75801926,
"epoch": 1.423658872077029,
"grad_norm": 1.2762821912765503,
"learning_rate": 8.568493860312142e-05,
"loss": 0.85571671,
"memory(GiB)": 67.71,
"step": 2070,
"train_speed(iter/s)": 0.039884
},
{
"acc": 0.75672455,
"epoch": 1.4270976616231086,
"grad_norm": 1.1539915800094604,
"learning_rate": 8.56051906849276e-05,
"loss": 0.86462698,
"memory(GiB)": 67.71,
"step": 2075,
"train_speed(iter/s)": 0.039921
},
{
"acc": 0.75165954,
"epoch": 1.4305364511691885,
"grad_norm": 1.306449055671692,
"learning_rate": 8.55252585883138e-05,
"loss": 0.88216114,
"memory(GiB)": 67.71,
"step": 2080,
"train_speed(iter/s)": 0.039965
},
{
"acc": 0.76647811,
"epoch": 1.4339752407152682,
"grad_norm": 1.1315670013427734,
"learning_rate": 8.544514272681056e-05,
"loss": 0.83219862,
"memory(GiB)": 67.71,
"step": 2085,
"train_speed(iter/s)": 0.040006
},
{
"acc": 0.75151563,
"epoch": 1.437414030261348,
"grad_norm": 1.2553141117095947,
"learning_rate": 8.536484351489918e-05,
"loss": 0.88327541,
"memory(GiB)": 67.71,
"step": 2090,
"train_speed(iter/s)": 0.040054
},
{
"acc": 0.75124393,
"epoch": 1.4408528198074277,
"grad_norm": 1.0970312356948853,
"learning_rate": 8.528436136800955e-05,
"loss": 0.86639719,
"memory(GiB)": 67.71,
"step": 2095,
"train_speed(iter/s)": 0.040094
},
{
"acc": 0.75917168,
"epoch": 1.4442916093535076,
"grad_norm": 1.1937634944915771,
"learning_rate": 8.520369670251787e-05,
"loss": 0.85560112,
"memory(GiB)": 67.71,
"step": 2100,
"train_speed(iter/s)": 0.040143
},
{
"epoch": 1.4442916093535076,
"eval_acc": 0.7529749853781437,
"eval_loss": 0.8845105767250061,
"eval_runtime": 1065.8538,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.072,
"step": 2100
},
{
"acc": 0.75502768,
"epoch": 1.4477303988995873,
"grad_norm": 1.2217875719070435,
"learning_rate": 8.512284993574473e-05,
"loss": 0.86460505,
"memory(GiB)": 67.71,
"step": 2105,
"train_speed(iter/s)": 0.039384
},
{
"acc": 0.7641192,
"epoch": 1.4511691884456672,
"grad_norm": 1.1658051013946533,
"learning_rate": 8.504182148595275e-05,
"loss": 0.82581739,
"memory(GiB)": 67.71,
"step": 2110,
"train_speed(iter/s)": 0.039424
},
{
"acc": 0.75860772,
"epoch": 1.454607977991747,
"grad_norm": 1.4592278003692627,
"learning_rate": 8.496061177234452e-05,
"loss": 0.84182692,
"memory(GiB)": 67.71,
"step": 2115,
"train_speed(iter/s)": 0.039466
},
{
"acc": 0.76232295,
"epoch": 1.4580467675378266,
"grad_norm": 1.2424806356430054,
"learning_rate": 8.487922121506039e-05,
"loss": 0.84641819,
"memory(GiB)": 67.71,
"step": 2120,
"train_speed(iter/s)": 0.039509
},
{
"acc": 0.75302744,
"epoch": 1.4614855570839065,
"grad_norm": 1.1985810995101929,
"learning_rate": 8.479765023517631e-05,
"loss": 0.87050896,
"memory(GiB)": 67.71,
"step": 2125,
"train_speed(iter/s)": 0.039552
},
{
"acc": 0.76178207,
"epoch": 1.4649243466299862,
"grad_norm": 1.108946442604065,
"learning_rate": 8.471589925470166e-05,
"loss": 0.82996387,
"memory(GiB)": 67.71,
"step": 2130,
"train_speed(iter/s)": 0.039594
},
{
"acc": 0.75262017,
"epoch": 1.468363136176066,
"grad_norm": 1.266554832458496,
"learning_rate": 8.463396869657704e-05,
"loss": 0.85832672,
"memory(GiB)": 67.71,
"step": 2135,
"train_speed(iter/s)": 0.039634
},
{
"acc": 0.76968784,
"epoch": 1.4718019257221457,
"grad_norm": 1.2651324272155762,
"learning_rate": 8.455185898467213e-05,
"loss": 0.80993366,
"memory(GiB)": 67.71,
"step": 2140,
"train_speed(iter/s)": 0.039677
},
{
"acc": 0.76287999,
"epoch": 1.4752407152682256,
"grad_norm": 1.4108299016952515,
"learning_rate": 8.446957054378344e-05,
"loss": 0.82752171,
"memory(GiB)": 67.71,
"step": 2145,
"train_speed(iter/s)": 0.039717
},
{
"acc": 0.77508984,
"epoch": 1.4786795048143053,
"grad_norm": 1.1667840480804443,
"learning_rate": 8.438710379963214e-05,
"loss": 0.78502192,
"memory(GiB)": 67.71,
"step": 2150,
"train_speed(iter/s)": 0.039761
},
{
"acc": 0.74883337,
"epoch": 1.4821182943603852,
"grad_norm": 1.1578980684280396,
"learning_rate": 8.430445917886186e-05,
"loss": 0.88730097,
"memory(GiB)": 67.71,
"step": 2155,
"train_speed(iter/s)": 0.039804
},
{
"acc": 0.77094564,
"epoch": 1.485557083906465,
"grad_norm": 1.039753794670105,
"learning_rate": 8.422163710903649e-05,
"loss": 0.80611877,
"memory(GiB)": 67.71,
"step": 2160,
"train_speed(iter/s)": 0.039843
},
{
"acc": 0.76888881,
"epoch": 1.4889958734525446,
"grad_norm": 1.5004595518112183,
"learning_rate": 8.413863801863794e-05,
"loss": 0.80163708,
"memory(GiB)": 67.71,
"step": 2165,
"train_speed(iter/s)": 0.039887
},
{
"acc": 0.76752806,
"epoch": 1.4924346629986245,
"grad_norm": 1.2288601398468018,
"learning_rate": 8.405546233706395e-05,
"loss": 0.82048512,
"memory(GiB)": 67.71,
"step": 2170,
"train_speed(iter/s)": 0.039936
},
{
"acc": 0.76018772,
"epoch": 1.4958734525447044,
"grad_norm": 1.1614660024642944,
"learning_rate": 8.397211049462586e-05,
"loss": 0.84854307,
"memory(GiB)": 67.71,
"step": 2175,
"train_speed(iter/s)": 0.039975
},
{
"acc": 0.77043438,
"epoch": 1.499312242090784,
"grad_norm": 1.3372976779937744,
"learning_rate": 8.388858292254637e-05,
"loss": 0.79604712,
"memory(GiB)": 67.71,
"step": 2180,
"train_speed(iter/s)": 0.040022
},
{
"acc": 0.76631165,
"epoch": 1.5027510316368637,
"grad_norm": 1.1987308263778687,
"learning_rate": 8.380488005295732e-05,
"loss": 0.83228321,
"memory(GiB)": 67.71,
"step": 2185,
"train_speed(iter/s)": 0.040065
},
{
"acc": 0.75986252,
"epoch": 1.5061898211829436,
"grad_norm": 1.0442498922348022,
"learning_rate": 8.37210023188975e-05,
"loss": 0.84417458,
"memory(GiB)": 67.71,
"step": 2190,
"train_speed(iter/s)": 0.040102
},
{
"acc": 0.75435362,
"epoch": 1.5096286107290235,
"grad_norm": 1.0899875164031982,
"learning_rate": 8.363695015431028e-05,
"loss": 0.8657095,
"memory(GiB)": 67.71,
"step": 2195,
"train_speed(iter/s)": 0.04015
},
{
"acc": 0.76132326,
"epoch": 1.5130674002751032,
"grad_norm": 1.076157569885254,
"learning_rate": 8.355272399404156e-05,
"loss": 0.83814745,
"memory(GiB)": 67.71,
"step": 2200,
"train_speed(iter/s)": 0.040192
},
{
"epoch": 1.5130674002751032,
"eval_acc": 0.7542065955819499,
"eval_loss": 0.877788245677948,
"eval_runtime": 1157.3904,
"eval_samples_per_second": 3.701,
"eval_steps_per_second": 0.067,
"step": 2200
},
{
"acc": 0.75972261,
"epoch": 1.516506189821183,
"grad_norm": 1.387868046760559,
"learning_rate": 8.346832427383732e-05,
"loss": 0.84049091,
"memory(GiB)": 67.71,
"step": 2205,
"train_speed(iter/s)": 0.039401
},
{
"acc": 0.74931083,
"epoch": 1.5199449793672626,
"grad_norm": 1.2687524557113647,
"learning_rate": 8.338375143034148e-05,
"loss": 0.87477436,
"memory(GiB)": 67.71,
"step": 2210,
"train_speed(iter/s)": 0.039447
},
{
"acc": 0.76771116,
"epoch": 1.5233837689133425,
"grad_norm": 1.1818050146102905,
"learning_rate": 8.329900590109365e-05,
"loss": 0.81554508,
"memory(GiB)": 67.71,
"step": 2215,
"train_speed(iter/s)": 0.039489
},
{
"acc": 0.76883683,
"epoch": 1.5268225584594224,
"grad_norm": 1.1263651847839355,
"learning_rate": 8.321408812452678e-05,
"loss": 0.82251701,
"memory(GiB)": 67.71,
"step": 2220,
"train_speed(iter/s)": 0.03953
},
{
"acc": 0.76298013,
"epoch": 1.530261348005502,
"grad_norm": 1.1538478136062622,
"learning_rate": 8.312899853996501e-05,
"loss": 0.81565828,
"memory(GiB)": 67.71,
"step": 2225,
"train_speed(iter/s)": 0.039564
},
{
"acc": 0.75918069,
"epoch": 1.5337001375515817,
"grad_norm": 1.2974464893341064,
"learning_rate": 8.304373758762128e-05,
"loss": 0.8574604,
"memory(GiB)": 67.71,
"step": 2230,
"train_speed(iter/s)": 0.039603
},
{
"acc": 0.76306868,
"epoch": 1.5371389270976616,
"grad_norm": 1.2755868434906006,
"learning_rate": 8.295830570859512e-05,
"loss": 0.83660641,
"memory(GiB)": 67.71,
"step": 2235,
"train_speed(iter/s)": 0.039636
},
{
"acc": 0.75555844,
"epoch": 1.5405777166437415,
"grad_norm": 1.196268081665039,
"learning_rate": 8.287270334487034e-05,
"loss": 0.84184723,
"memory(GiB)": 67.71,
"step": 2240,
"train_speed(iter/s)": 0.039674
},
{
"acc": 0.77357531,
"epoch": 1.5440165061898212,
"grad_norm": 1.2208247184753418,
"learning_rate": 8.278693093931282e-05,
"loss": 0.79285612,
"memory(GiB)": 67.71,
"step": 2245,
"train_speed(iter/s)": 0.039716
},
{
"acc": 0.75058088,
"epoch": 1.547455295735901,
"grad_norm": 1.1691052913665771,
"learning_rate": 8.270098893566807e-05,
"loss": 0.864328,
"memory(GiB)": 67.73,
"step": 2250,
"train_speed(iter/s)": 0.03975
},
{
"acc": 0.74813089,
"epoch": 1.5508940852819806,
"grad_norm": 1.300010323524475,
"learning_rate": 8.261487777855909e-05,
"loss": 0.89021215,
"memory(GiB)": 67.73,
"step": 2255,
"train_speed(iter/s)": 0.039788
},
{
"acc": 0.75823145,
"epoch": 1.5543328748280605,
"grad_norm": 1.080557107925415,
"learning_rate": 8.252859791348392e-05,
"loss": 0.86599722,
"memory(GiB)": 67.73,
"step": 2260,
"train_speed(iter/s)": 0.039824
},
{
"acc": 0.75691137,
"epoch": 1.5577716643741404,
"grad_norm": 1.098506212234497,
"learning_rate": 8.244214978681348e-05,
"loss": 0.87960701,
"memory(GiB)": 67.73,
"step": 2265,
"train_speed(iter/s)": 0.039862
},
{
"acc": 0.76327119,
"epoch": 1.56121045392022,
"grad_norm": 1.3256527185440063,
"learning_rate": 8.23555338457892e-05,
"loss": 0.8320919,
"memory(GiB)": 67.73,
"step": 2270,
"train_speed(iter/s)": 0.039901
},
{
"acc": 0.75916958,
"epoch": 1.5646492434662997,
"grad_norm": 1.2192107439041138,
"learning_rate": 8.226875053852066e-05,
"loss": 0.84912138,
"memory(GiB)": 67.73,
"step": 2275,
"train_speed(iter/s)": 0.039941
},
{
"acc": 0.75922327,
"epoch": 1.5680880330123796,
"grad_norm": 1.3527653217315674,
"learning_rate": 8.218180031398334e-05,
"loss": 0.84136915,
"memory(GiB)": 67.73,
"step": 2280,
"train_speed(iter/s)": 0.039986
},
{
"acc": 0.76518865,
"epoch": 1.5715268225584595,
"grad_norm": 1.1937755346298218,
"learning_rate": 8.209468362201627e-05,
"loss": 0.82890606,
"memory(GiB)": 67.73,
"step": 2285,
"train_speed(iter/s)": 0.040027
},
{
"acc": 0.76164193,
"epoch": 1.5749656121045392,
"grad_norm": 1.1419281959533691,
"learning_rate": 8.200740091331969e-05,
"loss": 0.8369875,
"memory(GiB)": 67.73,
"step": 2290,
"train_speed(iter/s)": 0.040071
},
{
"acc": 0.7657156,
"epoch": 1.578404401650619,
"grad_norm": 1.2638212442398071,
"learning_rate": 8.19199526394527e-05,
"loss": 0.82229643,
"memory(GiB)": 67.73,
"step": 2295,
"train_speed(iter/s)": 0.040112
},
{
"acc": 0.76849699,
"epoch": 1.5818431911966988,
"grad_norm": 1.1830896139144897,
"learning_rate": 8.183233925283104e-05,
"loss": 0.79942322,
"memory(GiB)": 67.73,
"step": 2300,
"train_speed(iter/s)": 0.040155
},
{
"epoch": 1.5818431911966988,
"eval_acc": 0.7557418904935439,
"eval_loss": 0.8737921714782715,
"eval_runtime": 1090.3389,
"eval_samples_per_second": 3.928,
"eval_steps_per_second": 0.071,
"step": 2300
},
{
"acc": 0.76840105,
"epoch": 1.5852819807427787,
"grad_norm": 1.1682363748550415,
"learning_rate": 8.17445612067246e-05,
"loss": 0.82419491,
"memory(GiB)": 67.73,
"step": 2305,
"train_speed(iter/s)": 0.039444
},
{
"acc": 0.76440401,
"epoch": 1.5887207702888584,
"grad_norm": 1.2088557481765747,
"learning_rate": 8.165661895525515e-05,
"loss": 0.8236021,
"memory(GiB)": 67.73,
"step": 2310,
"train_speed(iter/s)": 0.039487
},
{
"acc": 0.76112623,
"epoch": 1.592159559834938,
"grad_norm": 1.2835819721221924,
"learning_rate": 8.156851295339401e-05,
"loss": 0.84509296,
"memory(GiB)": 67.73,
"step": 2315,
"train_speed(iter/s)": 0.039527
},
{
"acc": 0.75933437,
"epoch": 1.5955983493810177,
"grad_norm": 1.1950072050094604,
"learning_rate": 8.148024365695961e-05,
"loss": 0.83572178,
"memory(GiB)": 67.73,
"step": 2320,
"train_speed(iter/s)": 0.039562
},
{
"acc": 0.74721594,
"epoch": 1.5990371389270976,
"grad_norm": 1.1385269165039062,
"learning_rate": 8.139181152261524e-05,
"loss": 0.87340145,
"memory(GiB)": 67.73,
"step": 2325,
"train_speed(iter/s)": 0.039597
},
{
"acc": 0.76489792,
"epoch": 1.6024759284731775,
"grad_norm": 1.3601405620574951,
"learning_rate": 8.130321700786662e-05,
"loss": 0.81867256,
"memory(GiB)": 67.73,
"step": 2330,
"train_speed(iter/s)": 0.039642
},
{
"acc": 0.7683671,
"epoch": 1.6059147180192572,
"grad_norm": 1.2533677816390991,
"learning_rate": 8.121446057105955e-05,
"loss": 0.81394958,
"memory(GiB)": 67.73,
"step": 2335,
"train_speed(iter/s)": 0.039682
},
{
"acc": 0.75637655,
"epoch": 1.609353507565337,
"grad_norm": 1.196452260017395,
"learning_rate": 8.112554267137753e-05,
"loss": 0.84699097,
"memory(GiB)": 67.73,
"step": 2340,
"train_speed(iter/s)": 0.03972
},
{
"acc": 0.77178955,
"epoch": 1.6127922971114168,
"grad_norm": 1.0918421745300293,
"learning_rate": 8.103646376883937e-05,
"loss": 0.79872456,
"memory(GiB)": 67.73,
"step": 2345,
"train_speed(iter/s)": 0.039764
},
{
"acc": 0.75204129,
"epoch": 1.6162310866574967,
"grad_norm": 1.2889692783355713,
"learning_rate": 8.094722432429691e-05,
"loss": 0.88343906,
"memory(GiB)": 67.73,
"step": 2350,
"train_speed(iter/s)": 0.039803
},
{
"acc": 0.75908298,
"epoch": 1.6196698762035764,
"grad_norm": 1.1028622388839722,
"learning_rate": 8.085782479943245e-05,
"loss": 0.8362504,
"memory(GiB)": 67.73,
"step": 2355,
"train_speed(iter/s)": 0.039844
},
{
"acc": 0.76125684,
"epoch": 1.623108665749656,
"grad_norm": 1.3756259679794312,
"learning_rate": 8.076826565675657e-05,
"loss": 0.8257452,
"memory(GiB)": 67.73,
"step": 2360,
"train_speed(iter/s)": 0.039885
},
{
"acc": 0.76714849,
"epoch": 1.6265474552957357,
"grad_norm": 1.5173252820968628,
"learning_rate": 8.067854735960555e-05,
"loss": 0.81308384,
"memory(GiB)": 67.73,
"step": 2365,
"train_speed(iter/s)": 0.039928
},
{
"acc": 0.7745882,
"epoch": 1.6299862448418156,
"grad_norm": 1.2738362550735474,
"learning_rate": 8.058867037213916e-05,
"loss": 0.79546738,
"memory(GiB)": 67.73,
"step": 2370,
"train_speed(iter/s)": 0.039964
},
{
"acc": 0.77209988,
"epoch": 1.6334250343878955,
"grad_norm": 1.1855344772338867,
"learning_rate": 8.049863515933802e-05,
"loss": 0.79778285,
"memory(GiB)": 67.73,
"step": 2375,
"train_speed(iter/s)": 0.039997
},
{
"acc": 0.76279697,
"epoch": 1.6368638239339752,
"grad_norm": 1.1562272310256958,
"learning_rate": 8.040844218700147e-05,
"loss": 0.82462883,
"memory(GiB)": 67.73,
"step": 2380,
"train_speed(iter/s)": 0.040033
},
{
"acc": 0.76684308,
"epoch": 1.640302613480055,
"grad_norm": 1.3373991250991821,
"learning_rate": 8.031809192174495e-05,
"loss": 0.81806488,
"memory(GiB)": 67.73,
"step": 2385,
"train_speed(iter/s)": 0.040073
},
{
"acc": 0.75144334,
"epoch": 1.6437414030261348,
"grad_norm": 1.3013478517532349,
"learning_rate": 8.022758483099767e-05,
"loss": 0.86880703,
"memory(GiB)": 67.73,
"step": 2390,
"train_speed(iter/s)": 0.040113
},
{
"acc": 0.7674602,
"epoch": 1.6471801925722147,
"grad_norm": 1.2739620208740234,
"learning_rate": 8.013692138300018e-05,
"loss": 0.82607212,
"memory(GiB)": 67.73,
"step": 2395,
"train_speed(iter/s)": 0.040148
},
{
"acc": 0.75973258,
"epoch": 1.6506189821182944,
"grad_norm": 1.313481330871582,
"learning_rate": 8.004610204680196e-05,
"loss": 0.83364353,
"memory(GiB)": 67.73,
"step": 2400,
"train_speed(iter/s)": 0.040184
},
{
"epoch": 1.6506189821182944,
"eval_acc": 0.7556687812120394,
"eval_loss": 0.8708279728889465,
"eval_runtime": 1138.5113,
"eval_samples_per_second": 3.762,
"eval_steps_per_second": 0.068,
"step": 2400
},
{
"acc": 0.76298213,
"epoch": 1.654057771664374,
"grad_norm": 1.3461359739303589,
"learning_rate": 7.995512729225894e-05,
"loss": 0.82495756,
"memory(GiB)": 67.73,
"step": 2405,
"train_speed(iter/s)": 0.03947
},
{
"acc": 0.76738596,
"epoch": 1.657496561210454,
"grad_norm": 1.272608757019043,
"learning_rate": 7.986399759003119e-05,
"loss": 0.82517872,
"memory(GiB)": 67.73,
"step": 2410,
"train_speed(iter/s)": 0.039512
},
{
"acc": 0.75537925,
"epoch": 1.6609353507565336,
"grad_norm": 1.2757365703582764,
"learning_rate": 7.977271341158035e-05,
"loss": 0.8790472,
"memory(GiB)": 67.73,
"step": 2415,
"train_speed(iter/s)": 0.039549
},
{
"acc": 0.75297923,
"epoch": 1.6643741403026135,
"grad_norm": 1.1887763738632202,
"learning_rate": 7.968127522916723e-05,
"loss": 0.8699337,
"memory(GiB)": 67.73,
"step": 2420,
"train_speed(iter/s)": 0.039586
},
{
"acc": 0.76874723,
"epoch": 1.6678129298486932,
"grad_norm": 1.0565059185028076,
"learning_rate": 7.95896835158494e-05,
"loss": 0.81132565,
"memory(GiB)": 67.73,
"step": 2425,
"train_speed(iter/s)": 0.03962
},
{
"acc": 0.75187912,
"epoch": 1.671251719394773,
"grad_norm": 1.1199684143066406,
"learning_rate": 7.949793874547877e-05,
"loss": 0.89120388,
"memory(GiB)": 67.73,
"step": 2430,
"train_speed(iter/s)": 0.039655
},
{
"acc": 0.7603467,
"epoch": 1.6746905089408528,
"grad_norm": 1.225197196006775,
"learning_rate": 7.940604139269903e-05,
"loss": 0.83448153,
"memory(GiB)": 67.73,
"step": 2435,
"train_speed(iter/s)": 0.039691
},
{
"acc": 0.76669693,
"epoch": 1.6781292984869327,
"grad_norm": 1.2933470010757446,
"learning_rate": 7.931399193294331e-05,
"loss": 0.81721525,
"memory(GiB)": 67.73,
"step": 2440,
"train_speed(iter/s)": 0.039727
},
{
"acc": 0.76869669,
"epoch": 1.6815680880330124,
"grad_norm": 1.311872124671936,
"learning_rate": 7.922179084243161e-05,
"loss": 0.82446499,
"memory(GiB)": 67.73,
"step": 2445,
"train_speed(iter/s)": 0.039768
},
{
"acc": 0.74998088,
"epoch": 1.685006877579092,
"grad_norm": 1.109681248664856,
"learning_rate": 7.912943859816847e-05,
"loss": 0.88515491,
"memory(GiB)": 67.73,
"step": 2450,
"train_speed(iter/s)": 0.039804
},
{
"acc": 0.76143503,
"epoch": 1.688445667125172,
"grad_norm": 1.2891324758529663,
"learning_rate": 7.903693567794035e-05,
"loss": 0.84492321,
"memory(GiB)": 67.73,
"step": 2455,
"train_speed(iter/s)": 0.039838
},
{
"acc": 0.77193007,
"epoch": 1.6918844566712519,
"grad_norm": 1.0803942680358887,
"learning_rate": 7.894428256031332e-05,
"loss": 0.78853378,
"memory(GiB)": 67.73,
"step": 2460,
"train_speed(iter/s)": 0.039872
},
{
"acc": 0.78182478,
"epoch": 1.6953232462173315,
"grad_norm": 1.1716827154159546,
"learning_rate": 7.88514797246304e-05,
"loss": 0.76458054,
"memory(GiB)": 67.73,
"step": 2465,
"train_speed(iter/s)": 0.039909
},
{
"acc": 0.76684537,
"epoch": 1.6987620357634112,
"grad_norm": 1.2370884418487549,
"learning_rate": 7.875852765100926e-05,
"loss": 0.83122387,
"memory(GiB)": 67.73,
"step": 2470,
"train_speed(iter/s)": 0.03994
},
{
"acc": 0.7637423,
"epoch": 1.702200825309491,
"grad_norm": 1.4126001596450806,
"learning_rate": 7.866542682033964e-05,
"loss": 0.83239994,
"memory(GiB)": 67.73,
"step": 2475,
"train_speed(iter/s)": 0.03998
},
{
"acc": 0.7585422,
"epoch": 1.7056396148555708,
"grad_norm": 1.277557373046875,
"learning_rate": 7.857217771428085e-05,
"loss": 0.84281693,
"memory(GiB)": 67.73,
"step": 2480,
"train_speed(iter/s)": 0.040019
},
{
"acc": 0.76609259,
"epoch": 1.7090784044016507,
"grad_norm": 1.2501623630523682,
"learning_rate": 7.847878081525932e-05,
"loss": 0.8170804,
"memory(GiB)": 67.73,
"step": 2485,
"train_speed(iter/s)": 0.040056
},
{
"acc": 0.75793953,
"epoch": 1.7125171939477304,
"grad_norm": 1.5299009084701538,
"learning_rate": 7.838523660646611e-05,
"loss": 0.86527452,
"memory(GiB)": 67.73,
"step": 2490,
"train_speed(iter/s)": 0.040087
},
{
"acc": 0.75957718,
"epoch": 1.71595598349381,
"grad_norm": 1.2600144147872925,
"learning_rate": 7.829154557185438e-05,
"loss": 0.84371586,
"memory(GiB)": 67.73,
"step": 2495,
"train_speed(iter/s)": 0.040119
},
{
"acc": 0.77097268,
"epoch": 1.71939477303989,
"grad_norm": 1.2481366395950317,
"learning_rate": 7.819770819613685e-05,
"loss": 0.79474764,
"memory(GiB)": 67.73,
"step": 2500,
"train_speed(iter/s)": 0.040159
},
{
"epoch": 1.71939477303989,
"eval_acc": 0.755859990102128,
"eval_loss": 0.8665845990180969,
"eval_runtime": 1172.0149,
"eval_samples_per_second": 3.654,
"eval_steps_per_second": 0.066,
"step": 2500
},
{
"acc": 0.76427364,
"epoch": 1.7228335625859699,
"grad_norm": 1.3622970581054688,
"learning_rate": 7.810372496478342e-05,
"loss": 0.83532944,
"memory(GiB)": 67.73,
"step": 2505,
"train_speed(iter/s)": 0.039453
},
{
"acc": 0.75734344,
"epoch": 1.7262723521320495,
"grad_norm": 1.2002394199371338,
"learning_rate": 7.800959636401853e-05,
"loss": 0.85398045,
"memory(GiB)": 67.73,
"step": 2510,
"train_speed(iter/s)": 0.039487
},
{
"acc": 0.76924725,
"epoch": 1.7297111416781292,
"grad_norm": 1.1298774480819702,
"learning_rate": 7.791532288081868e-05,
"loss": 0.81432209,
"memory(GiB)": 67.73,
"step": 2515,
"train_speed(iter/s)": 0.03953
},
{
"acc": 0.76489067,
"epoch": 1.7331499312242091,
"grad_norm": 1.3914809226989746,
"learning_rate": 7.782090500290998e-05,
"loss": 0.83234596,
"memory(GiB)": 67.73,
"step": 2520,
"train_speed(iter/s)": 0.039566
},
{
"acc": 0.75667624,
"epoch": 1.7365887207702888,
"grad_norm": 1.2779312133789062,
"learning_rate": 7.77263432187655e-05,
"loss": 0.84862852,
"memory(GiB)": 67.73,
"step": 2525,
"train_speed(iter/s)": 0.039602
},
{
"acc": 0.76841941,
"epoch": 1.7400275103163687,
"grad_norm": 1.1182903051376343,
"learning_rate": 7.763163801760286e-05,
"loss": 0.80550995,
"memory(GiB)": 67.73,
"step": 2530,
"train_speed(iter/s)": 0.039639
},
{
"acc": 0.75564499,
"epoch": 1.7434662998624484,
"grad_norm": 1.325380802154541,
"learning_rate": 7.753678988938162e-05,
"loss": 0.85131378,
"memory(GiB)": 67.73,
"step": 2535,
"train_speed(iter/s)": 0.039668
},
{
"acc": 0.77792916,
"epoch": 1.746905089408528,
"grad_norm": 1.2355977296829224,
"learning_rate": 7.74417993248008e-05,
"loss": 0.76762047,
"memory(GiB)": 67.73,
"step": 2540,
"train_speed(iter/s)": 0.039703
},
{
"acc": 0.7703722,
"epoch": 1.750343878954608,
"grad_norm": 1.1645699739456177,
"learning_rate": 7.734666681529633e-05,
"loss": 0.80783539,
"memory(GiB)": 67.73,
"step": 2545,
"train_speed(iter/s)": 0.039737
},
{
"acc": 0.76581202,
"epoch": 1.7537826685006879,
"grad_norm": 1.2667499780654907,
"learning_rate": 7.725139285303843e-05,
"loss": 0.81426716,
"memory(GiB)": 67.73,
"step": 2550,
"train_speed(iter/s)": 0.039769
},
{
"acc": 0.75635591,
"epoch": 1.7572214580467675,
"grad_norm": 1.325819492340088,
"learning_rate": 7.71559779309292e-05,
"loss": 0.85436335,
"memory(GiB)": 67.73,
"step": 2555,
"train_speed(iter/s)": 0.039804
},
{
"acc": 0.76963515,
"epoch": 1.7606602475928472,
"grad_norm": 1.1206269264221191,
"learning_rate": 7.70604225426e-05,
"loss": 0.79051266,
"memory(GiB)": 67.73,
"step": 2560,
"train_speed(iter/s)": 0.039844
},
{
"acc": 0.77096367,
"epoch": 1.7640990371389271,
"grad_norm": 1.2406977415084839,
"learning_rate": 7.696472718240883e-05,
"loss": 0.8147171,
"memory(GiB)": 67.73,
"step": 2565,
"train_speed(iter/s)": 0.039879
},
{
"acc": 0.76435289,
"epoch": 1.767537826685007,
"grad_norm": 1.2148582935333252,
"learning_rate": 7.686889234543788e-05,
"loss": 0.82190208,
"memory(GiB)": 67.73,
"step": 2570,
"train_speed(iter/s)": 0.039906
},
{
"acc": 0.76752815,
"epoch": 1.7709766162310867,
"grad_norm": 1.0485085248947144,
"learning_rate": 7.677291852749093e-05,
"loss": 0.81348267,
"memory(GiB)": 67.73,
"step": 2575,
"train_speed(iter/s)": 0.039941
},
{
"acc": 0.75209255,
"epoch": 1.7744154057771664,
"grad_norm": 1.2998508214950562,
"learning_rate": 7.667680622509081e-05,
"loss": 0.85738831,
"memory(GiB)": 67.73,
"step": 2580,
"train_speed(iter/s)": 0.039973
},
{
"acc": 0.77212009,
"epoch": 1.777854195323246,
"grad_norm": 1.1790145635604858,
"learning_rate": 7.65805559354767e-05,
"loss": 0.81079607,
"memory(GiB)": 67.73,
"step": 2585,
"train_speed(iter/s)": 0.040007
},
{
"acc": 0.76209583,
"epoch": 1.781292984869326,
"grad_norm": 1.147714614868164,
"learning_rate": 7.648416815660177e-05,
"loss": 0.82997284,
"memory(GiB)": 67.73,
"step": 2590,
"train_speed(iter/s)": 0.040039
},
{
"acc": 0.76417446,
"epoch": 1.7847317744154059,
"grad_norm": 1.3315579891204834,
"learning_rate": 7.638764338713044e-05,
"loss": 0.81722393,
"memory(GiB)": 67.73,
"step": 2595,
"train_speed(iter/s)": 0.040074
},
{
"acc": 0.76321087,
"epoch": 1.7881705639614855,
"grad_norm": 1.282499074935913,
"learning_rate": 7.629098212643586e-05,
"loss": 0.82541628,
"memory(GiB)": 67.73,
"step": 2600,
"train_speed(iter/s)": 0.040108
},
{
"epoch": 1.7881705639614855,
"eval_acc": 0.7575696225311558,
"eval_loss": 0.8597843050956726,
"eval_runtime": 1165.0354,
"eval_samples_per_second": 3.676,
"eval_steps_per_second": 0.066,
"step": 2600
},
{
"acc": 0.76687446,
"epoch": 1.7916093535075652,
"grad_norm": 1.1122969388961792,
"learning_rate": 7.619418487459733e-05,
"loss": 0.80312977,
"memory(GiB)": 67.73,
"step": 2605,
"train_speed(iter/s)": 0.039432
},
{
"acc": 0.76852121,
"epoch": 1.7950481430536451,
"grad_norm": 1.3779712915420532,
"learning_rate": 7.609725213239771e-05,
"loss": 0.79960012,
"memory(GiB)": 67.73,
"step": 2610,
"train_speed(iter/s)": 0.039468
},
{
"acc": 0.76329231,
"epoch": 1.798486932599725,
"grad_norm": 1.4102786779403687,
"learning_rate": 7.60001844013208e-05,
"loss": 0.83775997,
"memory(GiB)": 67.73,
"step": 2615,
"train_speed(iter/s)": 0.039502
},
{
"acc": 0.76466594,
"epoch": 1.8019257221458047,
"grad_norm": 1.2906368970870972,
"learning_rate": 7.590298218354877e-05,
"loss": 0.82873688,
"memory(GiB)": 67.73,
"step": 2620,
"train_speed(iter/s)": 0.039534
},
{
"acc": 0.76969028,
"epoch": 1.8053645116918844,
"grad_norm": 1.2397427558898926,
"learning_rate": 7.580564598195957e-05,
"loss": 0.81822834,
"memory(GiB)": 67.73,
"step": 2625,
"train_speed(iter/s)": 0.03957
},
{
"acc": 0.7630662,
"epoch": 1.8088033012379643,
"grad_norm": 1.3433514833450317,
"learning_rate": 7.570817630012435e-05,
"loss": 0.82502728,
"memory(GiB)": 67.73,
"step": 2630,
"train_speed(iter/s)": 0.039606
},
{
"acc": 0.76269207,
"epoch": 1.812242090784044,
"grad_norm": 1.0851596593856812,
"learning_rate": 7.561057364230475e-05,
"loss": 0.81567841,
"memory(GiB)": 67.73,
"step": 2635,
"train_speed(iter/s)": 0.03964
},
{
"acc": 0.75853286,
"epoch": 1.8156808803301239,
"grad_norm": 1.2418478727340698,
"learning_rate": 7.551283851345042e-05,
"loss": 0.84409733,
"memory(GiB)": 67.73,
"step": 2640,
"train_speed(iter/s)": 0.039671
},
{
"acc": 0.76311216,
"epoch": 1.8191196698762035,
"grad_norm": 1.1131020784378052,
"learning_rate": 7.541497141919636e-05,
"loss": 0.82704649,
"memory(GiB)": 67.73,
"step": 2645,
"train_speed(iter/s)": 0.039706
},
{
"acc": 0.76137314,
"epoch": 1.8225584594222832,
"grad_norm": 1.378839373588562,
"learning_rate": 7.531697286586024e-05,
"loss": 0.833605,
"memory(GiB)": 67.73,
"step": 2650,
"train_speed(iter/s)": 0.039741
},
{
"acc": 0.76775074,
"epoch": 1.8259972489683631,
"grad_norm": 1.269956350326538,
"learning_rate": 7.521884336043988e-05,
"loss": 0.82008057,
"memory(GiB)": 67.73,
"step": 2655,
"train_speed(iter/s)": 0.039776
},
{
"acc": 0.77038703,
"epoch": 1.829436038514443,
"grad_norm": 1.1615530252456665,
"learning_rate": 7.51205834106106e-05,
"loss": 0.79894481,
"memory(GiB)": 67.73,
"step": 2660,
"train_speed(iter/s)": 0.039811
},
{
"acc": 0.76342058,
"epoch": 1.8328748280605227,
"grad_norm": 1.482840895652771,
"learning_rate": 7.502219352472252e-05,
"loss": 0.85134239,
"memory(GiB)": 67.73,
"step": 2665,
"train_speed(iter/s)": 0.039842
},
{
"acc": 0.77493591,
"epoch": 1.8363136176066024,
"grad_norm": 1.321035385131836,
"learning_rate": 7.492367421179802e-05,
"loss": 0.77966547,
"memory(GiB)": 67.73,
"step": 2670,
"train_speed(iter/s)": 0.039874
},
{
"acc": 0.7690084,
"epoch": 1.8397524071526823,
"grad_norm": 1.1279528141021729,
"learning_rate": 7.482502598152908e-05,
"loss": 0.80104809,
"memory(GiB)": 67.73,
"step": 2675,
"train_speed(iter/s)": 0.039909
},
{
"acc": 0.75845652,
"epoch": 1.8431911966987622,
"grad_norm": 1.340296983718872,
"learning_rate": 7.472624934427461e-05,
"loss": 0.8414402,
"memory(GiB)": 67.73,
"step": 2680,
"train_speed(iter/s)": 0.039938
},
{
"acc": 0.77168741,
"epoch": 1.8466299862448419,
"grad_norm": 1.1220262050628662,
"learning_rate": 7.462734481105788e-05,
"loss": 0.80376453,
"memory(GiB)": 67.73,
"step": 2685,
"train_speed(iter/s)": 0.03997
},
{
"acc": 0.77061701,
"epoch": 1.8500687757909215,
"grad_norm": 1.15684974193573,
"learning_rate": 7.452831289356382e-05,
"loss": 0.80241566,
"memory(GiB)": 67.73,
"step": 2690,
"train_speed(iter/s)": 0.040001
},
{
"acc": 0.77713642,
"epoch": 1.8535075653370012,
"grad_norm": 1.0986595153808594,
"learning_rate": 7.442915410413635e-05,
"loss": 0.76731901,
"memory(GiB)": 67.73,
"step": 2695,
"train_speed(iter/s)": 0.040033
},
{
"acc": 0.77222977,
"epoch": 1.8569463548830811,
"grad_norm": 1.258157730102539,
"learning_rate": 7.43298689557758e-05,
"loss": 0.78445282,
"memory(GiB)": 67.73,
"step": 2700,
"train_speed(iter/s)": 0.040066
},
{
"epoch": 1.8569463548830811,
"eval_acc": 0.7587618661987673,
"eval_loss": 0.8582912087440491,
"eval_runtime": 1138.3587,
"eval_samples_per_second": 3.762,
"eval_steps_per_second": 0.068,
"step": 2700
},
{
"acc": 0.76180067,
"epoch": 1.860385144429161,
"grad_norm": 1.2461254596710205,
"learning_rate": 7.423045796213618e-05,
"loss": 0.84116306,
"memory(GiB)": 67.73,
"step": 2705,
"train_speed(iter/s)": 0.039433
},
{
"acc": 0.76939058,
"epoch": 1.8638239339752407,
"grad_norm": 1.1999588012695312,
"learning_rate": 7.413092163752263e-05,
"loss": 0.79865079,
"memory(GiB)": 67.73,
"step": 2710,
"train_speed(iter/s)": 0.039464
},
{
"acc": 0.76056762,
"epoch": 1.8672627235213204,
"grad_norm": 1.1177629232406616,
"learning_rate": 7.403126049688864e-05,
"loss": 0.83327274,
"memory(GiB)": 67.73,
"step": 2715,
"train_speed(iter/s)": 0.0395
},
{
"acc": 0.76080637,
"epoch": 1.8707015130674003,
"grad_norm": 1.1269170045852661,
"learning_rate": 7.393147505583345e-05,
"loss": 0.84126358,
"memory(GiB)": 67.73,
"step": 2720,
"train_speed(iter/s)": 0.039532
},
{
"acc": 0.76141424,
"epoch": 1.8741403026134802,
"grad_norm": 1.4532649517059326,
"learning_rate": 7.383156583059941e-05,
"loss": 0.83713207,
"memory(GiB)": 67.73,
"step": 2725,
"train_speed(iter/s)": 0.039565
},
{
"acc": 0.76509705,
"epoch": 1.8775790921595599,
"grad_norm": 1.7495522499084473,
"learning_rate": 7.373153333806917e-05,
"loss": 0.82034264,
"memory(GiB)": 67.73,
"step": 2730,
"train_speed(iter/s)": 0.039601
},
{
"acc": 0.7695118,
"epoch": 1.8810178817056395,
"grad_norm": 1.3915905952453613,
"learning_rate": 7.363137809576322e-05,
"loss": 0.81267509,
"memory(GiB)": 67.73,
"step": 2735,
"train_speed(iter/s)": 0.039634
},
{
"acc": 0.76744928,
"epoch": 1.8844566712517192,
"grad_norm": 1.1005185842514038,
"learning_rate": 7.353110062183706e-05,
"loss": 0.82437191,
"memory(GiB)": 67.73,
"step": 2740,
"train_speed(iter/s)": 0.039664
},
{
"acc": 0.75223131,
"epoch": 1.8878954607977991,
"grad_norm": 1.4645339250564575,
"learning_rate": 7.343070143507853e-05,
"loss": 0.84084778,
"memory(GiB)": 67.73,
"step": 2745,
"train_speed(iter/s)": 0.039692
},
{
"acc": 0.76176004,
"epoch": 1.891334250343879,
"grad_norm": 1.2676668167114258,
"learning_rate": 7.333018105490515e-05,
"loss": 0.83568439,
"memory(GiB)": 67.73,
"step": 2750,
"train_speed(iter/s)": 0.039723
},
{
"acc": 0.75615792,
"epoch": 1.8947730398899587,
"grad_norm": 1.2724095582962036,
"learning_rate": 7.322954000136148e-05,
"loss": 0.8729351,
"memory(GiB)": 67.73,
"step": 2755,
"train_speed(iter/s)": 0.03975
},
{
"acc": 0.78407092,
"epoch": 1.8982118294360384,
"grad_norm": 1.0895689725875854,
"learning_rate": 7.312877879511639e-05,
"loss": 0.76207438,
"memory(GiB)": 67.73,
"step": 2760,
"train_speed(iter/s)": 0.039781
},
{
"acc": 0.76615877,
"epoch": 1.9016506189821183,
"grad_norm": 1.2832506895065308,
"learning_rate": 7.30278979574603e-05,
"loss": 0.82421865,
"memory(GiB)": 67.73,
"step": 2765,
"train_speed(iter/s)": 0.039817
},
{
"acc": 0.75757556,
"epoch": 1.9050894085281982,
"grad_norm": 1.069419503211975,
"learning_rate": 7.292689801030262e-05,
"loss": 0.84605732,
"memory(GiB)": 67.73,
"step": 2770,
"train_speed(iter/s)": 0.039848
},
{
"acc": 0.7682868,
"epoch": 1.9085281980742779,
"grad_norm": 1.2053790092468262,
"learning_rate": 7.282577947616894e-05,
"loss": 0.81153774,
"memory(GiB)": 67.73,
"step": 2775,
"train_speed(iter/s)": 0.039879
},
{
"acc": 0.76934462,
"epoch": 1.9119669876203575,
"grad_norm": 1.1767531633377075,
"learning_rate": 7.272454287819833e-05,
"loss": 0.80814152,
"memory(GiB)": 67.73,
"step": 2780,
"train_speed(iter/s)": 0.039913
},
{
"acc": 0.7687346,
"epoch": 1.9154057771664375,
"grad_norm": 1.2420388460159302,
"learning_rate": 7.262318874014073e-05,
"loss": 0.79890871,
"memory(GiB)": 67.73,
"step": 2785,
"train_speed(iter/s)": 0.039945
},
{
"acc": 0.76412306,
"epoch": 1.9188445667125174,
"grad_norm": 1.1197503805160522,
"learning_rate": 7.252171758635413e-05,
"loss": 0.8147377,
"memory(GiB)": 67.73,
"step": 2790,
"train_speed(iter/s)": 0.039979
},
{
"acc": 0.76467457,
"epoch": 1.922283356258597,
"grad_norm": 1.3166663646697998,
"learning_rate": 7.242012994180194e-05,
"loss": 0.80287476,
"memory(GiB)": 67.73,
"step": 2795,
"train_speed(iter/s)": 0.040006
},
{
"acc": 0.75744901,
"epoch": 1.9257221458046767,
"grad_norm": 1.226488709449768,
"learning_rate": 7.231842633205018e-05,
"loss": 0.84611988,
"memory(GiB)": 67.73,
"step": 2800,
"train_speed(iter/s)": 0.040037
},
{
"epoch": 1.9257221458046767,
"eval_acc": 0.7585087956089441,
"eval_loss": 0.8532779812812805,
"eval_runtime": 1120.0858,
"eval_samples_per_second": 3.824,
"eval_steps_per_second": 0.069,
"step": 2800
},
{
"acc": 0.75332041,
"epoch": 1.9291609353507564,
"grad_norm": 1.3092836141586304,
"learning_rate": 7.221660728326488e-05,
"loss": 0.86540194,
"memory(GiB)": 67.73,
"step": 2805,
"train_speed(iter/s)": 0.039438
},
{
"acc": 0.75658636,
"epoch": 1.9325997248968363,
"grad_norm": 1.507814645767212,
"learning_rate": 7.211467332220929e-05,
"loss": 0.84582443,
"memory(GiB)": 67.73,
"step": 2810,
"train_speed(iter/s)": 0.039471
},
{
"acc": 0.77044678,
"epoch": 1.9360385144429162,
"grad_norm": 1.1073263883590698,
"learning_rate": 7.201262497624113e-05,
"loss": 0.80068121,
"memory(GiB)": 67.73,
"step": 2815,
"train_speed(iter/s)": 0.0395
},
{
"acc": 0.76184196,
"epoch": 1.9394773039889959,
"grad_norm": 1.2935534715652466,
"learning_rate": 7.191046277330989e-05,
"loss": 0.80897961,
"memory(GiB)": 67.73,
"step": 2820,
"train_speed(iter/s)": 0.039535
},
{
"acc": 0.77721043,
"epoch": 1.9429160935350756,
"grad_norm": 1.291559100151062,
"learning_rate": 7.180818724195413e-05,
"loss": 0.78424644,
"memory(GiB)": 67.73,
"step": 2825,
"train_speed(iter/s)": 0.039567
},
{
"acc": 0.76044025,
"epoch": 1.9463548830811555,
"grad_norm": 1.3120973110198975,
"learning_rate": 7.170579891129872e-05,
"loss": 0.83824387,
"memory(GiB)": 67.73,
"step": 2830,
"train_speed(iter/s)": 0.039597
},
{
"acc": 0.768398,
"epoch": 1.9497936726272354,
"grad_norm": 1.4003841876983643,
"learning_rate": 7.160329831105207e-05,
"loss": 0.81456871,
"memory(GiB)": 67.73,
"step": 2835,
"train_speed(iter/s)": 0.039628
},
{
"acc": 0.76318188,
"epoch": 1.953232462173315,
"grad_norm": 1.2559746503829956,
"learning_rate": 7.150068597150343e-05,
"loss": 0.81296453,
"memory(GiB)": 67.73,
"step": 2840,
"train_speed(iter/s)": 0.039657
},
{
"acc": 0.76650887,
"epoch": 1.9566712517193947,
"grad_norm": 1.4540189504623413,
"learning_rate": 7.139796242352016e-05,
"loss": 0.80663786,
"memory(GiB)": 67.73,
"step": 2845,
"train_speed(iter/s)": 0.039685
},
{
"acc": 0.75642557,
"epoch": 1.9601100412654744,
"grad_norm": 1.2288488149642944,
"learning_rate": 7.129512819854492e-05,
"loss": 0.85932999,
"memory(GiB)": 67.73,
"step": 2850,
"train_speed(iter/s)": 0.039713
},
{
"acc": 0.76574574,
"epoch": 1.9635488308115543,
"grad_norm": 1.0715101957321167,
"learning_rate": 7.1192183828593e-05,
"loss": 0.81189203,
"memory(GiB)": 67.73,
"step": 2855,
"train_speed(iter/s)": 0.039744
},
{
"acc": 0.76550779,
"epoch": 1.9669876203576342,
"grad_norm": 1.1467492580413818,
"learning_rate": 7.108912984624951e-05,
"loss": 0.816045,
"memory(GiB)": 67.73,
"step": 2860,
"train_speed(iter/s)": 0.039771
},
{
"acc": 0.77190948,
"epoch": 1.9704264099037139,
"grad_norm": 1.3903864622116089,
"learning_rate": 7.098596678466663e-05,
"loss": 0.79169202,
"memory(GiB)": 67.73,
"step": 2865,
"train_speed(iter/s)": 0.039802
},
{
"acc": 0.77034965,
"epoch": 1.9738651994497936,
"grad_norm": 1.4153941869735718,
"learning_rate": 7.088269517756085e-05,
"loss": 0.8023941,
"memory(GiB)": 67.73,
"step": 2870,
"train_speed(iter/s)": 0.039833
},
{
"acc": 0.7611412,
"epoch": 1.9773039889958735,
"grad_norm": 1.420799732208252,
"learning_rate": 7.07793155592103e-05,
"loss": 0.82977829,
"memory(GiB)": 67.73,
"step": 2875,
"train_speed(iter/s)": 0.039863
},
{
"acc": 0.7672267,
"epoch": 1.9807427785419534,
"grad_norm": 1.3404732942581177,
"learning_rate": 7.06758284644518e-05,
"loss": 0.82698822,
"memory(GiB)": 67.73,
"step": 2880,
"train_speed(iter/s)": 0.039895
},
{
"acc": 0.7625721,
"epoch": 1.984181568088033,
"grad_norm": 1.215389370918274,
"learning_rate": 7.057223442867828e-05,
"loss": 0.81949692,
"memory(GiB)": 67.73,
"step": 2885,
"train_speed(iter/s)": 0.039923
},
{
"acc": 0.76380196,
"epoch": 1.9876203576341127,
"grad_norm": 1.1068233251571655,
"learning_rate": 7.046853398783595e-05,
"loss": 0.82260599,
"memory(GiB)": 67.73,
"step": 2890,
"train_speed(iter/s)": 0.039951
},
{
"acc": 0.77088547,
"epoch": 1.9910591471801926,
"grad_norm": 1.2111361026763916,
"learning_rate": 7.03647276784214e-05,
"loss": 0.79111667,
"memory(GiB)": 67.73,
"step": 2895,
"train_speed(iter/s)": 0.039979
},
{
"acc": 0.77545271,
"epoch": 1.9944979367262725,
"grad_norm": 1.5610926151275635,
"learning_rate": 7.026081603747905e-05,
"loss": 0.78897448,
"memory(GiB)": 67.73,
"step": 2900,
"train_speed(iter/s)": 0.040011
},
{
"epoch": 1.9944979367262725,
"eval_acc": 0.7610057587618662,
"eval_loss": 0.8448835015296936,
"eval_runtime": 1173.7451,
"eval_samples_per_second": 3.649,
"eval_steps_per_second": 0.066,
"step": 2900
},
{
"acc": 0.76741076,
"epoch": 1.9979367262723522,
"grad_norm": 1.279598593711853,
"learning_rate": 7.01567996025982e-05,
"loss": 0.81564512,
"memory(GiB)": 67.73,
"step": 2905,
"train_speed(iter/s)": 0.039405
},
{
"acc": 0.76975098,
"epoch": 2.001375515818432,
"grad_norm": 1.2036709785461426,
"learning_rate": 7.00526789119103e-05,
"loss": 0.80328093,
"memory(GiB)": 67.73,
"step": 2910,
"train_speed(iter/s)": 0.039417
},
{
"acc": 0.77956657,
"epoch": 2.0048143053645116,
"grad_norm": 1.2451400756835938,
"learning_rate": 6.994845450408618e-05,
"loss": 0.77778225,
"memory(GiB)": 67.73,
"step": 2915,
"train_speed(iter/s)": 0.039446
},
{
"acc": 0.77100277,
"epoch": 2.0082530949105917,
"grad_norm": 1.501703143119812,
"learning_rate": 6.984412691833326e-05,
"loss": 0.78023448,
"memory(GiB)": 67.73,
"step": 2920,
"train_speed(iter/s)": 0.039476
},
{
"acc": 0.78834424,
"epoch": 2.0116918844566714,
"grad_norm": 1.3272697925567627,
"learning_rate": 6.973969669439275e-05,
"loss": 0.72417383,
"memory(GiB)": 67.73,
"step": 2925,
"train_speed(iter/s)": 0.039503
},
{
"acc": 0.78664284,
"epoch": 2.015130674002751,
"grad_norm": 1.2188291549682617,
"learning_rate": 6.963516437253684e-05,
"loss": 0.72837029,
"memory(GiB)": 67.73,
"step": 2930,
"train_speed(iter/s)": 0.039531
},
{
"acc": 0.78195653,
"epoch": 2.0185694635488307,
"grad_norm": 1.386407494544983,
"learning_rate": 6.953053049356597e-05,
"loss": 0.74600391,
"memory(GiB)": 67.73,
"step": 2935,
"train_speed(iter/s)": 0.039557
},
{
"acc": 0.77869625,
"epoch": 2.0220082530949104,
"grad_norm": 1.3403911590576172,
"learning_rate": 6.942579559880596e-05,
"loss": 0.75640421,
"memory(GiB)": 67.73,
"step": 2940,
"train_speed(iter/s)": 0.039588
},
{
"acc": 0.78800364,
"epoch": 2.0254470426409905,
"grad_norm": 1.2047083377838135,
"learning_rate": 6.932096023010522e-05,
"loss": 0.73669438,
"memory(GiB)": 67.73,
"step": 2945,
"train_speed(iter/s)": 0.039621
},
{
"acc": 0.79104028,
"epoch": 2.02888583218707,
"grad_norm": 1.208552360534668,
"learning_rate": 6.921602492983198e-05,
"loss": 0.71291051,
"memory(GiB)": 67.73,
"step": 2950,
"train_speed(iter/s)": 0.039651
},
{
"acc": 0.77535133,
"epoch": 2.03232462173315,
"grad_norm": 1.5250719785690308,
"learning_rate": 6.911099024087153e-05,
"loss": 0.76551533,
"memory(GiB)": 67.73,
"step": 2955,
"train_speed(iter/s)": 0.039681
},
{
"acc": 0.76844397,
"epoch": 2.0357634112792296,
"grad_norm": 1.2615329027175903,
"learning_rate": 6.900585670662321e-05,
"loss": 0.79221487,
"memory(GiB)": 67.73,
"step": 2960,
"train_speed(iter/s)": 0.039706
},
{
"acc": 0.7686276,
"epoch": 2.0392022008253097,
"grad_norm": 1.3869153261184692,
"learning_rate": 6.890062487099788e-05,
"loss": 0.79985504,
"memory(GiB)": 67.73,
"step": 2965,
"train_speed(iter/s)": 0.039733
},
{
"acc": 0.79315829,
"epoch": 2.0426409903713894,
"grad_norm": 1.3412541151046753,
"learning_rate": 6.87952952784149e-05,
"loss": 0.70044346,
"memory(GiB)": 67.73,
"step": 2970,
"train_speed(iter/s)": 0.039764
},
{
"acc": 0.76822557,
"epoch": 2.046079779917469,
"grad_norm": 1.4066526889801025,
"learning_rate": 6.868986847379934e-05,
"loss": 0.79726977,
"memory(GiB)": 67.73,
"step": 2975,
"train_speed(iter/s)": 0.03979
},
{
"acc": 0.77974381,
"epoch": 2.0495185694635487,
"grad_norm": 1.2747722864151,
"learning_rate": 6.858434500257929e-05,
"loss": 0.75174856,
"memory(GiB)": 67.73,
"step": 2980,
"train_speed(iter/s)": 0.039819
},
{
"acc": 0.7757237,
"epoch": 2.0529573590096284,
"grad_norm": 1.3321025371551514,
"learning_rate": 6.847872541068289e-05,
"loss": 0.76223741,
"memory(GiB)": 67.73,
"step": 2985,
"train_speed(iter/s)": 0.039845
},
{
"acc": 0.78108168,
"epoch": 2.0563961485557085,
"grad_norm": 1.3092007637023926,
"learning_rate": 6.837301024453556e-05,
"loss": 0.75712924,
"memory(GiB)": 67.73,
"step": 2990,
"train_speed(iter/s)": 0.03987
},
{
"acc": 0.78598289,
"epoch": 2.059834938101788,
"grad_norm": 1.305657982826233,
"learning_rate": 6.826720005105718e-05,
"loss": 0.73581972,
"memory(GiB)": 67.73,
"step": 2995,
"train_speed(iter/s)": 0.039898
},
{
"acc": 0.78907838,
"epoch": 2.063273727647868,
"grad_norm": 1.4367668628692627,
"learning_rate": 6.816129537765928e-05,
"loss": 0.72936554,
"memory(GiB)": 67.73,
"step": 3000,
"train_speed(iter/s)": 0.039927
},
{
"epoch": 2.063273727647868,
"eval_acc": 0.7620799028208936,
"eval_loss": 0.8483734726905823,
"eval_runtime": 1164.2299,
"eval_samples_per_second": 3.679,
"eval_steps_per_second": 0.066,
"step": 3000
},
{
"acc": 0.78301954,
"epoch": 2.0667125171939476,
"grad_norm": 1.5559133291244507,
"learning_rate": 6.805529677224216e-05,
"loss": 0.7379096,
"memory(GiB)": 67.73,
"step": 3005,
"train_speed(iter/s)": 0.039348
},
{
"acc": 0.77635798,
"epoch": 2.0701513067400277,
"grad_norm": 1.266300916671753,
"learning_rate": 6.79492047831921e-05,
"loss": 0.74843826,
"memory(GiB)": 67.73,
"step": 3010,
"train_speed(iter/s)": 0.039374
},
{
"acc": 0.78403974,
"epoch": 2.0735900962861074,
"grad_norm": 1.3442883491516113,
"learning_rate": 6.784301995937846e-05,
"loss": 0.73924718,
"memory(GiB)": 67.73,
"step": 3015,
"train_speed(iter/s)": 0.039405
},
{
"acc": 0.7821476,
"epoch": 2.077028885832187,
"grad_norm": 1.3284059762954712,
"learning_rate": 6.773674285015092e-05,
"loss": 0.731111,
"memory(GiB)": 67.73,
"step": 3020,
"train_speed(iter/s)": 0.039435
},
{
"acc": 0.78100576,
"epoch": 2.0804676753782667,
"grad_norm": 1.2452079057693481,
"learning_rate": 6.76303740053366e-05,
"loss": 0.75636292,
"memory(GiB)": 67.73,
"step": 3025,
"train_speed(iter/s)": 0.039465
},
{
"acc": 0.77952466,
"epoch": 2.083906464924347,
"grad_norm": 1.5737926959991455,
"learning_rate": 6.752391397523725e-05,
"loss": 0.75301075,
"memory(GiB)": 67.73,
"step": 3030,
"train_speed(iter/s)": 0.039494
},
{
"acc": 0.78698683,
"epoch": 2.0873452544704265,
"grad_norm": 1.337703824043274,
"learning_rate": 6.741736331062626e-05,
"loss": 0.73099127,
"memory(GiB)": 67.73,
"step": 3035,
"train_speed(iter/s)": 0.039519
},
{
"acc": 0.79346113,
"epoch": 2.090784044016506,
"grad_norm": 1.203200340270996,
"learning_rate": 6.731072256274604e-05,
"loss": 0.70464416,
"memory(GiB)": 67.73,
"step": 3040,
"train_speed(iter/s)": 0.039547
},
{
"acc": 0.78062749,
"epoch": 2.094222833562586,
"grad_norm": 1.5236440896987915,
"learning_rate": 6.720399228330494e-05,
"loss": 0.75513244,
"memory(GiB)": 67.73,
"step": 3045,
"train_speed(iter/s)": 0.039576
},
{
"acc": 0.78337817,
"epoch": 2.0976616231086656,
"grad_norm": 1.533868670463562,
"learning_rate": 6.709717302447462e-05,
"loss": 0.73356051,
"memory(GiB)": 67.73,
"step": 3050,
"train_speed(iter/s)": 0.039604
},
{
"acc": 0.7775434,
"epoch": 2.1011004126547457,
"grad_norm": 1.5052993297576904,
"learning_rate": 6.699026533888696e-05,
"loss": 0.75711803,
"memory(GiB)": 67.73,
"step": 3055,
"train_speed(iter/s)": 0.039632
},
{
"acc": 0.76806664,
"epoch": 2.1045392022008254,
"grad_norm": 1.5001362562179565,
"learning_rate": 6.688326977963142e-05,
"loss": 0.78131714,
"memory(GiB)": 67.73,
"step": 3060,
"train_speed(iter/s)": 0.039658
},
{
"acc": 0.76824741,
"epoch": 2.107977991746905,
"grad_norm": 1.422959327697754,
"learning_rate": 6.677618690025201e-05,
"loss": 0.79324121,
"memory(GiB)": 67.73,
"step": 3065,
"train_speed(iter/s)": 0.039682
},
{
"acc": 0.79779997,
"epoch": 2.1114167812929847,
"grad_norm": 1.4451581239700317,
"learning_rate": 6.666901725474453e-05,
"loss": 0.69419732,
"memory(GiB)": 67.73,
"step": 3070,
"train_speed(iter/s)": 0.039716
},
{
"acc": 0.77174778,
"epoch": 2.114855570839065,
"grad_norm": 1.47465980052948,
"learning_rate": 6.656176139755361e-05,
"loss": 0.79069195,
"memory(GiB)": 67.73,
"step": 3075,
"train_speed(iter/s)": 0.039741
},
{
"acc": 0.77605443,
"epoch": 2.1182943603851445,
"grad_norm": 1.3267581462860107,
"learning_rate": 6.645441988356998e-05,
"loss": 0.74461274,
"memory(GiB)": 67.73,
"step": 3080,
"train_speed(iter/s)": 0.03977
},
{
"acc": 0.77349963,
"epoch": 2.121733149931224,
"grad_norm": 1.391455054283142,
"learning_rate": 6.634699326812746e-05,
"loss": 0.77960453,
"memory(GiB)": 67.73,
"step": 3085,
"train_speed(iter/s)": 0.039803
},
{
"acc": 0.79598751,
"epoch": 2.125171939477304,
"grad_norm": 1.2205495834350586,
"learning_rate": 6.623948210700017e-05,
"loss": 0.70987749,
"memory(GiB)": 67.73,
"step": 3090,
"train_speed(iter/s)": 0.039834
},
{
"acc": 0.77750764,
"epoch": 2.128610729023384,
"grad_norm": 1.4609593152999878,
"learning_rate": 6.613188695639961e-05,
"loss": 0.75586929,
"memory(GiB)": 67.73,
"step": 3095,
"train_speed(iter/s)": 0.039862
},
{
"acc": 0.78546953,
"epoch": 2.1320495185694637,
"grad_norm": 1.264543890953064,
"learning_rate": 6.602420837297181e-05,
"loss": 0.71513643,
"memory(GiB)": 67.73,
"step": 3100,
"train_speed(iter/s)": 0.039888
},
{
"epoch": 2.1320495185694637,
"eval_acc": 0.7627378863544338,
"eval_loss": 0.8477216958999634,
"eval_runtime": 1140.3429,
"eval_samples_per_second": 3.756,
"eval_steps_per_second": 0.068,
"step": 3100
},
{
"acc": 0.79360695,
"epoch": 2.1354883081155434,
"grad_norm": 1.446578025817871,
"learning_rate": 6.591644691379445e-05,
"loss": 0.69685826,
"memory(GiB)": 67.73,
"step": 3105,
"train_speed(iter/s)": 0.039338
},
{
"acc": 0.78709059,
"epoch": 2.138927097661623,
"grad_norm": 1.3313159942626953,
"learning_rate": 6.580860313637395e-05,
"loss": 0.71339779,
"memory(GiB)": 67.73,
"step": 3110,
"train_speed(iter/s)": 0.039367
},
{
"acc": 0.77740135,
"epoch": 2.1423658872077027,
"grad_norm": 1.2659940719604492,
"learning_rate": 6.57006775986426e-05,
"loss": 0.77046852,
"memory(GiB)": 67.73,
"step": 3115,
"train_speed(iter/s)": 0.039389
},
{
"acc": 0.77650619,
"epoch": 2.145804676753783,
"grad_norm": 1.4188055992126465,
"learning_rate": 6.559267085895575e-05,
"loss": 0.77363644,
"memory(GiB)": 67.73,
"step": 3120,
"train_speed(iter/s)": 0.039413
},
{
"acc": 0.7744916,
"epoch": 2.1492434662998625,
"grad_norm": 1.5709620714187622,
"learning_rate": 6.548458347608877e-05,
"loss": 0.78516483,
"memory(GiB)": 67.73,
"step": 3125,
"train_speed(iter/s)": 0.039441
},
{
"acc": 0.79064846,
"epoch": 2.152682255845942,
"grad_norm": 1.4528831243515015,
"learning_rate": 6.537641600923424e-05,
"loss": 0.72166934,
"memory(GiB)": 67.73,
"step": 3130,
"train_speed(iter/s)": 0.039473
},
{
"acc": 0.78410482,
"epoch": 2.156121045392022,
"grad_norm": 1.2021089792251587,
"learning_rate": 6.52681690179991e-05,
"loss": 0.72740555,
"memory(GiB)": 67.73,
"step": 3135,
"train_speed(iter/s)": 0.039502
},
{
"acc": 0.78779097,
"epoch": 2.1595598349381016,
"grad_norm": 1.3785122632980347,
"learning_rate": 6.515984306240171e-05,
"loss": 0.7183506,
"memory(GiB)": 67.73,
"step": 3140,
"train_speed(iter/s)": 0.039532
},
{
"acc": 0.78875618,
"epoch": 2.1629986244841817,
"grad_norm": 1.4517391920089722,
"learning_rate": 6.505143870286892e-05,
"loss": 0.72756548,
"memory(GiB)": 67.73,
"step": 3145,
"train_speed(iter/s)": 0.039559
},
{
"acc": 0.77699008,
"epoch": 2.1664374140302614,
"grad_norm": 1.3971807956695557,
"learning_rate": 6.494295650023326e-05,
"loss": 0.76216259,
"memory(GiB)": 67.73,
"step": 3150,
"train_speed(iter/s)": 0.039584
},
{
"acc": 0.78072176,
"epoch": 2.169876203576341,
"grad_norm": 1.3789912462234497,
"learning_rate": 6.483439701572987e-05,
"loss": 0.74354634,
"memory(GiB)": 67.73,
"step": 3155,
"train_speed(iter/s)": 0.039607
},
{
"acc": 0.78208303,
"epoch": 2.1733149931224207,
"grad_norm": 1.4556952714920044,
"learning_rate": 6.472576081099384e-05,
"loss": 0.74753394,
"memory(GiB)": 67.73,
"step": 3160,
"train_speed(iter/s)": 0.03963
},
{
"acc": 0.77552352,
"epoch": 2.176753782668501,
"grad_norm": 1.3656238317489624,
"learning_rate": 6.461704844805711e-05,
"loss": 0.76849761,
"memory(GiB)": 67.73,
"step": 3165,
"train_speed(iter/s)": 0.039651
},
{
"acc": 0.79463406,
"epoch": 2.1801925722145805,
"grad_norm": 1.6309324502944946,
"learning_rate": 6.450826048934564e-05,
"loss": 0.71653328,
"memory(GiB)": 67.73,
"step": 3170,
"train_speed(iter/s)": 0.03968
},
{
"acc": 0.78766985,
"epoch": 2.18363136176066,
"grad_norm": 1.5563981533050537,
"learning_rate": 6.439939749767646e-05,
"loss": 0.72953587,
"memory(GiB)": 67.73,
"step": 3175,
"train_speed(iter/s)": 0.039706
},
{
"acc": 0.78887863,
"epoch": 2.18707015130674,
"grad_norm": 1.4095101356506348,
"learning_rate": 6.429046003625481e-05,
"loss": 0.71779909,
"memory(GiB)": 67.73,
"step": 3180,
"train_speed(iter/s)": 0.039734
},
{
"acc": 0.78688583,
"epoch": 2.19050894085282,
"grad_norm": 1.3465501070022583,
"learning_rate": 6.418144866867121e-05,
"loss": 0.73801022,
"memory(GiB)": 67.73,
"step": 3185,
"train_speed(iter/s)": 0.03976
},
{
"acc": 0.78006182,
"epoch": 2.1939477303988997,
"grad_norm": 1.4523009061813354,
"learning_rate": 6.407236395889853e-05,
"loss": 0.75504117,
"memory(GiB)": 67.73,
"step": 3190,
"train_speed(iter/s)": 0.039787
},
{
"acc": 0.77997456,
"epoch": 2.1973865199449794,
"grad_norm": 1.4462857246398926,
"learning_rate": 6.396320647128904e-05,
"loss": 0.7402379,
"memory(GiB)": 67.73,
"step": 3195,
"train_speed(iter/s)": 0.039814
},
{
"acc": 0.78546247,
"epoch": 2.200825309491059,
"grad_norm": 1.3453855514526367,
"learning_rate": 6.385397677057158e-05,
"loss": 0.74274435,
"memory(GiB)": 67.73,
"step": 3200,
"train_speed(iter/s)": 0.039843
},
{
"epoch": 2.200825309491059,
"eval_acc": 0.7633396319791245,
"eval_loss": 0.8417202234268188,
"eval_runtime": 1162.9585,
"eval_samples_per_second": 3.683,
"eval_steps_per_second": 0.066,
"step": 3200
},
{
"acc": 0.7864996,
"epoch": 2.2042640990371387,
"grad_norm": 1.5509644746780396,
"learning_rate": 6.374467542184858e-05,
"loss": 0.7381216,
"memory(GiB)": 67.73,
"step": 3205,
"train_speed(iter/s)": 0.0393
},
{
"acc": 0.7754878,
"epoch": 2.207702888583219,
"grad_norm": 1.4426201581954956,
"learning_rate": 6.363530299059309e-05,
"loss": 0.76541862,
"memory(GiB)": 67.73,
"step": 3210,
"train_speed(iter/s)": 0.039326
},
{
"acc": 0.78851786,
"epoch": 2.2111416781292985,
"grad_norm": 1.4319977760314941,
"learning_rate": 6.352586004264595e-05,
"loss": 0.71185198,
"memory(GiB)": 67.73,
"step": 3215,
"train_speed(iter/s)": 0.039353
},
{
"acc": 0.78093901,
"epoch": 2.214580467675378,
"grad_norm": 1.4418880939483643,
"learning_rate": 6.341634714421283e-05,
"loss": 0.73851643,
"memory(GiB)": 67.73,
"step": 3220,
"train_speed(iter/s)": 0.039379
},
{
"acc": 0.78669729,
"epoch": 2.218019257221458,
"grad_norm": 1.270719289779663,
"learning_rate": 6.330676486186129e-05,
"loss": 0.71485491,
"memory(GiB)": 67.73,
"step": 3225,
"train_speed(iter/s)": 0.039408
},
{
"acc": 0.79144497,
"epoch": 2.221458046767538,
"grad_norm": 1.4314090013504028,
"learning_rate": 6.31971137625178e-05,
"loss": 0.7153636,
"memory(GiB)": 67.73,
"step": 3230,
"train_speed(iter/s)": 0.039435
},
{
"acc": 0.77727938,
"epoch": 2.2248968363136177,
"grad_norm": 1.4725736379623413,
"learning_rate": 6.308739441346485e-05,
"loss": 0.76273413,
"memory(GiB)": 67.73,
"step": 3235,
"train_speed(iter/s)": 0.039462
},
{
"acc": 0.78571582,
"epoch": 2.2283356258596974,
"grad_norm": 1.512195348739624,
"learning_rate": 6.297760738233815e-05,
"loss": 0.72405062,
"memory(GiB)": 67.73,
"step": 3240,
"train_speed(iter/s)": 0.039488
},
{
"acc": 0.79117641,
"epoch": 2.231774415405777,
"grad_norm": 1.3315008878707886,
"learning_rate": 6.286775323712345e-05,
"loss": 0.69954386,
"memory(GiB)": 67.73,
"step": 3245,
"train_speed(iter/s)": 0.039513
},
{
"acc": 0.7794364,
"epoch": 2.235213204951857,
"grad_norm": 1.3924676179885864,
"learning_rate": 6.275783254615373e-05,
"loss": 0.73521681,
"memory(GiB)": 67.73,
"step": 3250,
"train_speed(iter/s)": 0.039537
},
{
"acc": 0.77737999,
"epoch": 2.238651994497937,
"grad_norm": 1.2619364261627197,
"learning_rate": 6.264784587810623e-05,
"loss": 0.74972701,
"memory(GiB)": 67.73,
"step": 3255,
"train_speed(iter/s)": 0.039565
},
{
"acc": 0.7738802,
"epoch": 2.2420907840440165,
"grad_norm": 1.5401955842971802,
"learning_rate": 6.25377938019996e-05,
"loss": 0.79901037,
"memory(GiB)": 67.73,
"step": 3260,
"train_speed(iter/s)": 0.039591
},
{
"acc": 0.78558297,
"epoch": 2.245529573590096,
"grad_norm": 1.2368144989013672,
"learning_rate": 6.24276768871908e-05,
"loss": 0.72351027,
"memory(GiB)": 67.73,
"step": 3265,
"train_speed(iter/s)": 0.039617
},
{
"acc": 0.78128538,
"epoch": 2.248968363136176,
"grad_norm": 1.4421414136886597,
"learning_rate": 6.23174957033723e-05,
"loss": 0.72427325,
"memory(GiB)": 67.73,
"step": 3270,
"train_speed(iter/s)": 0.039638
},
{
"acc": 0.79050694,
"epoch": 2.252407152682256,
"grad_norm": 1.473962426185608,
"learning_rate": 6.220725082056901e-05,
"loss": 0.70825963,
"memory(GiB)": 67.73,
"step": 3275,
"train_speed(iter/s)": 0.039665
},
{
"acc": 0.78690662,
"epoch": 2.2558459422283357,
"grad_norm": 1.4135388135910034,
"learning_rate": 6.209694280913539e-05,
"loss": 0.71959724,
"memory(GiB)": 67.73,
"step": 3280,
"train_speed(iter/s)": 0.039691
},
{
"acc": 0.78945398,
"epoch": 2.2592847317744154,
"grad_norm": 1.604658842086792,
"learning_rate": 6.198657223975257e-05,
"loss": 0.71993084,
"memory(GiB)": 67.73,
"step": 3285,
"train_speed(iter/s)": 0.039719
},
{
"acc": 0.78186049,
"epoch": 2.262723521320495,
"grad_norm": 1.5232980251312256,
"learning_rate": 6.187613968342524e-05,
"loss": 0.75053821,
"memory(GiB)": 67.73,
"step": 3290,
"train_speed(iter/s)": 0.039741
},
{
"acc": 0.78012853,
"epoch": 2.266162310866575,
"grad_norm": 1.2351890802383423,
"learning_rate": 6.176564571147882e-05,
"loss": 0.76610746,
"memory(GiB)": 67.73,
"step": 3295,
"train_speed(iter/s)": 0.039772
},
{
"acc": 0.79100294,
"epoch": 2.269601100412655,
"grad_norm": 1.2078155279159546,
"learning_rate": 6.165509089555647e-05,
"loss": 0.69998646,
"memory(GiB)": 67.73,
"step": 3300,
"train_speed(iter/s)": 0.039798
},
{
"epoch": 2.269601100412655,
"eval_acc": 0.7635139694965583,
"eval_loss": 0.8397796750068665,
"eval_runtime": 1111.351,
"eval_samples_per_second": 3.854,
"eval_steps_per_second": 0.069,
"step": 3300
},
{
"acc": 0.78393035,
"epoch": 2.2730398899587345,
"grad_norm": 1.453913688659668,
"learning_rate": 6.154447580761612e-05,
"loss": 0.72859416,
"memory(GiB)": 67.73,
"step": 3305,
"train_speed(iter/s)": 0.039299
},
{
"acc": 0.78495998,
"epoch": 2.276478679504814,
"grad_norm": 1.4776540994644165,
"learning_rate": 6.143380101992752e-05,
"loss": 0.7363111,
"memory(GiB)": 67.73,
"step": 3310,
"train_speed(iter/s)": 0.039328
},
{
"acc": 0.78339643,
"epoch": 2.2799174690508943,
"grad_norm": 1.5362030267715454,
"learning_rate": 6.132306710506926e-05,
"loss": 0.7379076,
"memory(GiB)": 67.73,
"step": 3315,
"train_speed(iter/s)": 0.039357
},
{
"acc": 0.7797461,
"epoch": 2.283356258596974,
"grad_norm": 1.5111163854599,
"learning_rate": 6.121227463592585e-05,
"loss": 0.76058264,
"memory(GiB)": 67.73,
"step": 3320,
"train_speed(iter/s)": 0.039381
},
{
"acc": 0.77675905,
"epoch": 2.2867950481430537,
"grad_norm": 1.4410961866378784,
"learning_rate": 6.11014241856847e-05,
"loss": 0.7604672,
"memory(GiB)": 67.73,
"step": 3325,
"train_speed(iter/s)": 0.039401
},
{
"acc": 0.78640685,
"epoch": 2.2902338376891334,
"grad_norm": 1.7038127183914185,
"learning_rate": 6.099051632783327e-05,
"loss": 0.72746906,
"memory(GiB)": 67.73,
"step": 3330,
"train_speed(iter/s)": 0.039426
},
{
"acc": 0.78558645,
"epoch": 2.293672627235213,
"grad_norm": 1.2854883670806885,
"learning_rate": 6.0879551636155894e-05,
"loss": 0.74064126,
"memory(GiB)": 67.73,
"step": 3335,
"train_speed(iter/s)": 0.039449
},
{
"acc": 0.781954,
"epoch": 2.297111416781293,
"grad_norm": 1.7075494527816772,
"learning_rate": 6.076853068473104e-05,
"loss": 0.73064299,
"memory(GiB)": 67.73,
"step": 3340,
"train_speed(iter/s)": 0.039476
},
{
"acc": 0.7864274,
"epoch": 2.300550206327373,
"grad_norm": 1.3785117864608765,
"learning_rate": 6.065745404792816e-05,
"loss": 0.72167091,
"memory(GiB)": 67.73,
"step": 3345,
"train_speed(iter/s)": 0.039497
},
{
"acc": 0.77905812,
"epoch": 2.3039889958734525,
"grad_norm": 1.2673250436782837,
"learning_rate": 6.054632230040489e-05,
"loss": 0.75723281,
"memory(GiB)": 67.73,
"step": 3350,
"train_speed(iter/s)": 0.039522
},
{
"acc": 0.78323727,
"epoch": 2.307427785419532,
"grad_norm": 1.5396391153335571,
"learning_rate": 6.043513601710391e-05,
"loss": 0.75284595,
"memory(GiB)": 67.73,
"step": 3355,
"train_speed(iter/s)": 0.039546
},
{
"acc": 0.77735343,
"epoch": 2.310866574965612,
"grad_norm": 1.4035242795944214,
"learning_rate": 6.032389577325004e-05,
"loss": 0.76564093,
"memory(GiB)": 67.73,
"step": 3360,
"train_speed(iter/s)": 0.039572
},
{
"acc": 0.78433137,
"epoch": 2.314305364511692,
"grad_norm": 1.3705512285232544,
"learning_rate": 6.0212602144347295e-05,
"loss": 0.74389467,
"memory(GiB)": 67.73,
"step": 3365,
"train_speed(iter/s)": 0.039597
},
{
"acc": 0.7952363,
"epoch": 2.3177441540577717,
"grad_norm": 1.601040005683899,
"learning_rate": 6.010125570617587e-05,
"loss": 0.69709463,
"memory(GiB)": 67.73,
"step": 3370,
"train_speed(iter/s)": 0.039624
},
{
"acc": 0.78479719,
"epoch": 2.3211829436038514,
"grad_norm": 1.6512783765792847,
"learning_rate": 5.998985703478916e-05,
"loss": 0.7351841,
"memory(GiB)": 67.73,
"step": 3375,
"train_speed(iter/s)": 0.039648
},
{
"acc": 0.7886528,
"epoch": 2.324621733149931,
"grad_norm": 1.4211089611053467,
"learning_rate": 5.9878406706510834e-05,
"loss": 0.72703929,
"memory(GiB)": 67.73,
"step": 3380,
"train_speed(iter/s)": 0.039672
},
{
"acc": 0.79997978,
"epoch": 2.328060522696011,
"grad_norm": 1.5382604598999023,
"learning_rate": 5.976690529793175e-05,
"loss": 0.68571553,
"memory(GiB)": 67.73,
"step": 3385,
"train_speed(iter/s)": 0.039698
},
{
"acc": 0.78704443,
"epoch": 2.331499312242091,
"grad_norm": 1.4559470415115356,
"learning_rate": 5.9655353385907055e-05,
"loss": 0.7418499,
"memory(GiB)": 67.73,
"step": 3390,
"train_speed(iter/s)": 0.039721
},
{
"acc": 0.77985802,
"epoch": 2.3349381017881705,
"grad_norm": 1.4432960748672485,
"learning_rate": 5.954375154755316e-05,
"loss": 0.75312023,
"memory(GiB)": 67.73,
"step": 3395,
"train_speed(iter/s)": 0.039747
},
{
"acc": 0.79101877,
"epoch": 2.33837689133425,
"grad_norm": 1.3332661390304565,
"learning_rate": 5.9432100360244816e-05,
"loss": 0.71166148,
"memory(GiB)": 67.73,
"step": 3400,
"train_speed(iter/s)": 0.039772
},
{
"epoch": 2.33837689133425,
"eval_acc": 0.7662640032393035,
"eval_loss": 0.8363276124000549,
"eval_runtime": 1137.5857,
"eval_samples_per_second": 3.765,
"eval_steps_per_second": 0.068,
"step": 3400
},
{
"acc": 0.77607446,
"epoch": 2.3418156808803303,
"grad_norm": 1.4800801277160645,
"learning_rate": 5.9320400401612055e-05,
"loss": 0.75290956,
"memory(GiB)": 67.73,
"step": 3405,
"train_speed(iter/s)": 0.039276
},
{
"acc": 0.79126697,
"epoch": 2.34525447042641,
"grad_norm": 1.4802907705307007,
"learning_rate": 5.9208652249537224e-05,
"loss": 0.71252222,
"memory(GiB)": 67.73,
"step": 3410,
"train_speed(iter/s)": 0.039305
},
{
"acc": 0.79908376,
"epoch": 2.3486932599724897,
"grad_norm": 1.5732394456863403,
"learning_rate": 5.909685648215198e-05,
"loss": 0.68212581,
"memory(GiB)": 67.73,
"step": 3415,
"train_speed(iter/s)": 0.039334
},
{
"acc": 0.79249792,
"epoch": 2.3521320495185694,
"grad_norm": 1.5170937776565552,
"learning_rate": 5.8985013677834376e-05,
"loss": 0.69914331,
"memory(GiB)": 67.73,
"step": 3420,
"train_speed(iter/s)": 0.039361
},
{
"acc": 0.76918459,
"epoch": 2.355570839064649,
"grad_norm": 1.4346693754196167,
"learning_rate": 5.887312441520577e-05,
"loss": 0.7888124,
"memory(GiB)": 67.73,
"step": 3425,
"train_speed(iter/s)": 0.039379
},
{
"acc": 0.77460504,
"epoch": 2.359009628610729,
"grad_norm": 1.5379953384399414,
"learning_rate": 5.876118927312788e-05,
"loss": 0.77686357,
"memory(GiB)": 67.73,
"step": 3430,
"train_speed(iter/s)": 0.039402
},
{
"acc": 0.78930125,
"epoch": 2.362448418156809,
"grad_norm": 1.5620882511138916,
"learning_rate": 5.8649208830699776e-05,
"loss": 0.72846537,
"memory(GiB)": 67.73,
"step": 3435,
"train_speed(iter/s)": 0.039427
},
{
"acc": 0.7783771,
"epoch": 2.3658872077028885,
"grad_norm": 1.3905718326568604,
"learning_rate": 5.85371836672549e-05,
"loss": 0.74923038,
"memory(GiB)": 67.73,
"step": 3440,
"train_speed(iter/s)": 0.039449
},
{
"acc": 0.77820864,
"epoch": 2.369325997248968,
"grad_norm": 1.2664304971694946,
"learning_rate": 5.842511436235807e-05,
"loss": 0.77669792,
"memory(GiB)": 67.73,
"step": 3445,
"train_speed(iter/s)": 0.039476
},
{
"acc": 0.78637152,
"epoch": 2.3727647867950483,
"grad_norm": 1.3347089290618896,
"learning_rate": 5.831300149580245e-05,
"loss": 0.72091188,
"memory(GiB)": 67.73,
"step": 3450,
"train_speed(iter/s)": 0.039501
},
{
"acc": 0.77844844,
"epoch": 2.376203576341128,
"grad_norm": 1.4759833812713623,
"learning_rate": 5.820084564760657e-05,
"loss": 0.76853113,
"memory(GiB)": 67.73,
"step": 3455,
"train_speed(iter/s)": 0.039528
},
{
"acc": 0.7886765,
"epoch": 2.3796423658872077,
"grad_norm": 1.7147775888442993,
"learning_rate": 5.808864739801136e-05,
"loss": 0.73901868,
"memory(GiB)": 67.73,
"step": 3460,
"train_speed(iter/s)": 0.039549
},
{
"acc": 0.78849745,
"epoch": 2.3830811554332874,
"grad_norm": 1.4572715759277344,
"learning_rate": 5.797640732747707e-05,
"loss": 0.71194096,
"memory(GiB)": 67.73,
"step": 3465,
"train_speed(iter/s)": 0.039573
},
{
"acc": 0.79340534,
"epoch": 2.3865199449793675,
"grad_norm": 1.8968569040298462,
"learning_rate": 5.7864126016680354e-05,
"loss": 0.69921117,
"memory(GiB)": 67.73,
"step": 3470,
"train_speed(iter/s)": 0.039599
},
{
"acc": 0.7905076,
"epoch": 2.389958734525447,
"grad_norm": 1.6759856939315796,
"learning_rate": 5.7751804046511185e-05,
"loss": 0.71475925,
"memory(GiB)": 67.73,
"step": 3475,
"train_speed(iter/s)": 0.039625
},
{
"acc": 0.78112564,
"epoch": 2.393397524071527,
"grad_norm": 1.301619052886963,
"learning_rate": 5.763944199806991e-05,
"loss": 0.74565101,
"memory(GiB)": 67.73,
"step": 3480,
"train_speed(iter/s)": 0.039651
},
{
"acc": 0.77826796,
"epoch": 2.3968363136176065,
"grad_norm": 1.726420283317566,
"learning_rate": 5.7527040452664215e-05,
"loss": 0.76559715,
"memory(GiB)": 67.73,
"step": 3485,
"train_speed(iter/s)": 0.039679
},
{
"acc": 0.78291936,
"epoch": 2.400275103163686,
"grad_norm": 1.7139410972595215,
"learning_rate": 5.741459999180612e-05,
"loss": 0.74492068,
"memory(GiB)": 67.73,
"step": 3490,
"train_speed(iter/s)": 0.039704
},
{
"acc": 0.7825911,
"epoch": 2.4037138927097663,
"grad_norm": 1.6379047632217407,
"learning_rate": 5.7302121197209e-05,
"loss": 0.7245317,
"memory(GiB)": 67.73,
"step": 3495,
"train_speed(iter/s)": 0.039726
},
{
"acc": 0.78516874,
"epoch": 2.407152682255846,
"grad_norm": 1.6368329524993896,
"learning_rate": 5.7189604650784524e-05,
"loss": 0.71679149,
"memory(GiB)": 67.73,
"step": 3500,
"train_speed(iter/s)": 0.039751
},
{
"epoch": 2.407152682255846,
"eval_acc": 0.7661683987942592,
"eval_loss": 0.8319239020347595,
"eval_runtime": 1159.9932,
"eval_samples_per_second": 3.692,
"eval_steps_per_second": 0.066,
"step": 3500
},
{
"acc": 0.77984338,
"epoch": 2.4105914718019257,
"grad_norm": 1.3497166633605957,
"learning_rate": 5.707705093463969e-05,
"loss": 0.76508818,
"memory(GiB)": 67.73,
"step": 3505,
"train_speed(iter/s)": 0.039259
},
{
"acc": 0.77530251,
"epoch": 2.4140302613480054,
"grad_norm": 1.4699667692184448,
"learning_rate": 5.6964460631073736e-05,
"loss": 0.7737175,
"memory(GiB)": 67.73,
"step": 3510,
"train_speed(iter/s)": 0.039282
},
{
"acc": 0.77435117,
"epoch": 2.417469050894085,
"grad_norm": 1.3567484617233276,
"learning_rate": 5.685183432257529e-05,
"loss": 0.76602321,
"memory(GiB)": 67.73,
"step": 3515,
"train_speed(iter/s)": 0.039309
},
{
"acc": 0.77687979,
"epoch": 2.420907840440165,
"grad_norm": 1.6473392248153687,
"learning_rate": 5.6739172591819187e-05,
"loss": 0.76530232,
"memory(GiB)": 67.73,
"step": 3520,
"train_speed(iter/s)": 0.039334
},
{
"acc": 0.78237972,
"epoch": 2.424346629986245,
"grad_norm": 1.5507371425628662,
"learning_rate": 5.662647602166351e-05,
"loss": 0.75080051,
"memory(GiB)": 67.73,
"step": 3525,
"train_speed(iter/s)": 0.039356
},
{
"acc": 0.78154564,
"epoch": 2.4277854195323245,
"grad_norm": 1.4991931915283203,
"learning_rate": 5.65137451951466e-05,
"loss": 0.75352793,
"memory(GiB)": 67.73,
"step": 3530,
"train_speed(iter/s)": 0.039378
},
{
"acc": 0.77997007,
"epoch": 2.4312242090784046,
"grad_norm": 1.6739842891693115,
"learning_rate": 5.640098069548404e-05,
"loss": 0.76081572,
"memory(GiB)": 67.73,
"step": 3535,
"train_speed(iter/s)": 0.039401
},
{
"acc": 0.78768792,
"epoch": 2.4346629986244843,
"grad_norm": 1.7248750925064087,
"learning_rate": 5.628818310606561e-05,
"loss": 0.7255187,
"memory(GiB)": 67.73,
"step": 3540,
"train_speed(iter/s)": 0.039426
},
{
"acc": 0.79025412,
"epoch": 2.438101788170564,
"grad_norm": 1.62557053565979,
"learning_rate": 5.617535301045228e-05,
"loss": 0.70803857,
"memory(GiB)": 67.73,
"step": 3545,
"train_speed(iter/s)": 0.039454
},
{
"acc": 0.78769302,
"epoch": 2.4415405777166437,
"grad_norm": 1.4635558128356934,
"learning_rate": 5.606249099237318e-05,
"loss": 0.73414497,
"memory(GiB)": 67.73,
"step": 3550,
"train_speed(iter/s)": 0.039479
},
{
"acc": 0.79148045,
"epoch": 2.4449793672627234,
"grad_norm": 1.3263139724731445,
"learning_rate": 5.594959763572263e-05,
"loss": 0.71763167,
"memory(GiB)": 67.73,
"step": 3555,
"train_speed(iter/s)": 0.039504
},
{
"acc": 0.79330978,
"epoch": 2.4484181568088035,
"grad_norm": 1.5712461471557617,
"learning_rate": 5.5836673524557e-05,
"loss": 0.71670427,
"memory(GiB)": 67.73,
"step": 3560,
"train_speed(iter/s)": 0.039529
},
{
"acc": 0.78728065,
"epoch": 2.451856946354883,
"grad_norm": 1.610227108001709,
"learning_rate": 5.572371924309188e-05,
"loss": 0.71835189,
"memory(GiB)": 67.73,
"step": 3565,
"train_speed(iter/s)": 0.039555
},
{
"acc": 0.7980279,
"epoch": 2.455295735900963,
"grad_norm": 1.3638176918029785,
"learning_rate": 5.5610735375698863e-05,
"loss": 0.68947468,
"memory(GiB)": 67.73,
"step": 3570,
"train_speed(iter/s)": 0.039583
},
{
"acc": 0.79382896,
"epoch": 2.4587345254470425,
"grad_norm": 1.671080470085144,
"learning_rate": 5.549772250690264e-05,
"loss": 0.6997715,
"memory(GiB)": 67.73,
"step": 3575,
"train_speed(iter/s)": 0.039609
},
{
"acc": 0.78277836,
"epoch": 2.462173314993122,
"grad_norm": 1.5335805416107178,
"learning_rate": 5.538468122137791e-05,
"loss": 0.75039816,
"memory(GiB)": 67.73,
"step": 3580,
"train_speed(iter/s)": 0.039637
},
{
"acc": 0.78852177,
"epoch": 2.4656121045392023,
"grad_norm": 1.5238432884216309,
"learning_rate": 5.527161210394645e-05,
"loss": 0.72055502,
"memory(GiB)": 67.73,
"step": 3585,
"train_speed(iter/s)": 0.039663
},
{
"acc": 0.77950158,
"epoch": 2.469050894085282,
"grad_norm": 1.3826063871383667,
"learning_rate": 5.515851573957397e-05,
"loss": 0.74620533,
"memory(GiB)": 67.73,
"step": 3590,
"train_speed(iter/s)": 0.039684
},
{
"acc": 0.78283319,
"epoch": 2.4724896836313617,
"grad_norm": 1.637787938117981,
"learning_rate": 5.504539271336714e-05,
"loss": 0.75348463,
"memory(GiB)": 67.73,
"step": 3595,
"train_speed(iter/s)": 0.039709
},
{
"acc": 0.79472337,
"epoch": 2.4759284731774414,
"grad_norm": 1.4630149602890015,
"learning_rate": 5.493224361057062e-05,
"loss": 0.70524812,
"memory(GiB)": 67.73,
"step": 3600,
"train_speed(iter/s)": 0.039738
},
{
"epoch": 2.4759284731774414,
"eval_acc": 0.7665226976200117,
"eval_loss": 0.8275927901268005,
"eval_runtime": 1123.227,
"eval_samples_per_second": 3.813,
"eval_steps_per_second": 0.069,
"step": 3600
},
{
"acc": 0.79677072,
"epoch": 2.4793672627235215,
"grad_norm": 1.354331374168396,
"learning_rate": 5.481906901656389e-05,
"loss": 0.70224314,
"memory(GiB)": 67.73,
"step": 3605,
"train_speed(iter/s)": 0.039277
},
{
"acc": 0.78522711,
"epoch": 2.482806052269601,
"grad_norm": 1.4437576532363892,
"learning_rate": 5.470586951685842e-05,
"loss": 0.74459286,
"memory(GiB)": 67.73,
"step": 3610,
"train_speed(iter/s)": 0.039302
},
{
"acc": 0.79154515,
"epoch": 2.486244841815681,
"grad_norm": 1.358216643333435,
"learning_rate": 5.4592645697094434e-05,
"loss": 0.70617638,
"memory(GiB)": 67.73,
"step": 3615,
"train_speed(iter/s)": 0.039323
},
{
"acc": 0.78887815,
"epoch": 2.4896836313617605,
"grad_norm": 1.6288851499557495,
"learning_rate": 5.447939814303803e-05,
"loss": 0.72137556,
"memory(GiB)": 67.73,
"step": 3620,
"train_speed(iter/s)": 0.039345
},
{
"acc": 0.7799171,
"epoch": 2.4931224209078406,
"grad_norm": 1.4252561330795288,
"learning_rate": 5.4366127440578063e-05,
"loss": 0.75225086,
"memory(GiB)": 67.73,
"step": 3625,
"train_speed(iter/s)": 0.039369
},
{
"acc": 0.78901777,
"epoch": 2.4965612104539203,
"grad_norm": 1.3052763938903809,
"learning_rate": 5.42528341757232e-05,
"loss": 0.73022747,
"memory(GiB)": 67.73,
"step": 3630,
"train_speed(iter/s)": 0.039392
},
{
"acc": 0.77776222,
"epoch": 2.5,
"grad_norm": 1.3259241580963135,
"learning_rate": 5.413951893459877e-05,
"loss": 0.74716744,
"memory(GiB)": 67.73,
"step": 3635,
"train_speed(iter/s)": 0.039413
},
{
"acc": 0.79203482,
"epoch": 2.5034387895460797,
"grad_norm": 1.491448998451233,
"learning_rate": 5.4026182303443826e-05,
"loss": 0.71442933,
"memory(GiB)": 67.73,
"step": 3640,
"train_speed(iter/s)": 0.039433
},
{
"acc": 0.78454857,
"epoch": 2.5068775790921594,
"grad_norm": 1.6916753053665161,
"learning_rate": 5.391282486860809e-05,
"loss": 0.74134259,
"memory(GiB)": 67.73,
"step": 3645,
"train_speed(iter/s)": 0.039458
},
{
"acc": 0.78789535,
"epoch": 2.5103163686382395,
"grad_norm": 1.5004796981811523,
"learning_rate": 5.3799447216548907e-05,
"loss": 0.7244381,
"memory(GiB)": 67.73,
"step": 3650,
"train_speed(iter/s)": 0.039483
},
{
"acc": 0.80307121,
"epoch": 2.513755158184319,
"grad_norm": 1.3776211738586426,
"learning_rate": 5.368604993382822e-05,
"loss": 0.67283263,
"memory(GiB)": 67.73,
"step": 3655,
"train_speed(iter/s)": 0.039509
},
{
"acc": 0.78840837,
"epoch": 2.517193947730399,
"grad_norm": 1.489513635635376,
"learning_rate": 5.357263360710951e-05,
"loss": 0.73468142,
"memory(GiB)": 67.73,
"step": 3660,
"train_speed(iter/s)": 0.039533
},
{
"acc": 0.78454609,
"epoch": 2.5206327372764785,
"grad_norm": 1.5130376815795898,
"learning_rate": 5.345919882315481e-05,
"loss": 0.74815798,
"memory(GiB)": 67.73,
"step": 3665,
"train_speed(iter/s)": 0.039558
},
{
"acc": 0.77992659,
"epoch": 2.524071526822558,
"grad_norm": 1.5401512384414673,
"learning_rate": 5.3345746168821634e-05,
"loss": 0.74576526,
"memory(GiB)": 67.73,
"step": 3670,
"train_speed(iter/s)": 0.039581
},
{
"acc": 0.78538713,
"epoch": 2.5275103163686383,
"grad_norm": 1.6626590490341187,
"learning_rate": 5.3232276231059905e-05,
"loss": 0.72729344,
"memory(GiB)": 67.73,
"step": 3675,
"train_speed(iter/s)": 0.039608
},
{
"acc": 0.77599993,
"epoch": 2.530949105914718,
"grad_norm": 1.4081122875213623,
"learning_rate": 5.311878959690906e-05,
"loss": 0.76209216,
"memory(GiB)": 67.73,
"step": 3680,
"train_speed(iter/s)": 0.039628
},
{
"acc": 0.78826327,
"epoch": 2.5343878954607977,
"grad_norm": 1.474022626876831,
"learning_rate": 5.3005286853494854e-05,
"loss": 0.71333871,
"memory(GiB)": 67.73,
"step": 3685,
"train_speed(iter/s)": 0.039652
},
{
"acc": 0.78787079,
"epoch": 2.537826685006878,
"grad_norm": 1.648646354675293,
"learning_rate": 5.289176858802634e-05,
"loss": 0.72448759,
"memory(GiB)": 67.73,
"step": 3690,
"train_speed(iter/s)": 0.039677
},
{
"acc": 0.78479404,
"epoch": 2.5412654745529575,
"grad_norm": 1.4439847469329834,
"learning_rate": 5.277823538779295e-05,
"loss": 0.72407675,
"memory(GiB)": 67.73,
"step": 3695,
"train_speed(iter/s)": 0.039702
},
{
"acc": 0.77956858,
"epoch": 2.544704264099037,
"grad_norm": 1.2535481452941895,
"learning_rate": 5.2664687840161364e-05,
"loss": 0.74480648,
"memory(GiB)": 67.73,
"step": 3700,
"train_speed(iter/s)": 0.039725
},
{
"epoch": 2.544704264099037,
"eval_acc": 0.7681029828586854,
"eval_loss": 0.8239570260047913,
"eval_runtime": 1156.7503,
"eval_samples_per_second": 3.703,
"eval_steps_per_second": 0.067,
"step": 3700
},
{
"acc": 0.78182096,
"epoch": 2.548143053645117,
"grad_norm": 1.328555703163147,
"learning_rate": 5.255112653257247e-05,
"loss": 0.75617981,
"memory(GiB)": 67.73,
"step": 3705,
"train_speed(iter/s)": 0.039261
},
{
"acc": 0.78516607,
"epoch": 2.5515818431911965,
"grad_norm": 1.5017790794372559,
"learning_rate": 5.243755205253834e-05,
"loss": 0.73223658,
"memory(GiB)": 67.73,
"step": 3710,
"train_speed(iter/s)": 0.039286
},
{
"acc": 0.78861194,
"epoch": 2.5550206327372766,
"grad_norm": 1.309441089630127,
"learning_rate": 5.232396498763923e-05,
"loss": 0.7213201,
"memory(GiB)": 67.73,
"step": 3715,
"train_speed(iter/s)": 0.039308
},
{
"acc": 0.78652673,
"epoch": 2.5584594222833563,
"grad_norm": 2.0742311477661133,
"learning_rate": 5.2210365925520445e-05,
"loss": 0.73911443,
"memory(GiB)": 67.73,
"step": 3720,
"train_speed(iter/s)": 0.039335
},
{
"acc": 0.78357706,
"epoch": 2.561898211829436,
"grad_norm": 1.4650071859359741,
"learning_rate": 5.2096755453889404e-05,
"loss": 0.74594064,
"memory(GiB)": 67.73,
"step": 3725,
"train_speed(iter/s)": 0.039357
},
{
"acc": 0.78125381,
"epoch": 2.5653370013755157,
"grad_norm": 1.7474429607391357,
"learning_rate": 5.198313416051257e-05,
"loss": 0.75290685,
"memory(GiB)": 67.73,
"step": 3730,
"train_speed(iter/s)": 0.039381
},
{
"acc": 0.77420011,
"epoch": 2.5687757909215954,
"grad_norm": 1.6091666221618652,
"learning_rate": 5.186950263321233e-05,
"loss": 0.79236693,
"memory(GiB)": 67.73,
"step": 3735,
"train_speed(iter/s)": 0.039403
},
{
"acc": 0.77931113,
"epoch": 2.5722145804676755,
"grad_norm": 1.9077335596084595,
"learning_rate": 5.1755861459864064e-05,
"loss": 0.74636703,
"memory(GiB)": 67.73,
"step": 3740,
"train_speed(iter/s)": 0.039425
},
{
"acc": 0.7796699,
"epoch": 2.575653370013755,
"grad_norm": 1.6318970918655396,
"learning_rate": 5.164221122839306e-05,
"loss": 0.76515536,
"memory(GiB)": 67.73,
"step": 3745,
"train_speed(iter/s)": 0.039444
},
{
"acc": 0.77925997,
"epoch": 2.579092159559835,
"grad_norm": 1.563817024230957,
"learning_rate": 5.1528552526771425e-05,
"loss": 0.74128981,
"memory(GiB)": 67.73,
"step": 3750,
"train_speed(iter/s)": 0.039469
},
{
"acc": 0.78557882,
"epoch": 2.582530949105915,
"grad_norm": 1.3365668058395386,
"learning_rate": 5.141488594301512e-05,
"loss": 0.72270107,
"memory(GiB)": 67.73,
"step": 3755,
"train_speed(iter/s)": 0.039493
},
{
"acc": 0.78748364,
"epoch": 2.5859697386519946,
"grad_norm": 1.33451247215271,
"learning_rate": 5.1301212065180895e-05,
"loss": 0.74060202,
"memory(GiB)": 67.73,
"step": 3760,
"train_speed(iter/s)": 0.039516
},
{
"acc": 0.79184856,
"epoch": 2.5894085281980743,
"grad_norm": 1.4293380975723267,
"learning_rate": 5.118753148136318e-05,
"loss": 0.7231204,
"memory(GiB)": 67.73,
"step": 3765,
"train_speed(iter/s)": 0.039538
},
{
"acc": 0.7917345,
"epoch": 2.592847317744154,
"grad_norm": 1.4640839099884033,
"learning_rate": 5.107384477969117e-05,
"loss": 0.72228947,
"memory(GiB)": 67.73,
"step": 3770,
"train_speed(iter/s)": 0.039558
},
{
"acc": 0.78101654,
"epoch": 2.5962861072902337,
"grad_norm": 1.5235430002212524,
"learning_rate": 5.0960152548325676e-05,
"loss": 0.75548983,
"memory(GiB)": 67.73,
"step": 3775,
"train_speed(iter/s)": 0.039578
},
{
"acc": 0.79459238,
"epoch": 2.599724896836314,
"grad_norm": 1.3068392276763916,
"learning_rate": 5.08464553754561e-05,
"loss": 0.70593162,
"memory(GiB)": 67.73,
"step": 3780,
"train_speed(iter/s)": 0.039602
},
{
"acc": 0.79126248,
"epoch": 2.6031636863823935,
"grad_norm": 1.3516395092010498,
"learning_rate": 5.0732753849297434e-05,
"loss": 0.72088032,
"memory(GiB)": 67.73,
"step": 3785,
"train_speed(iter/s)": 0.039626
},
{
"acc": 0.79167919,
"epoch": 2.606602475928473,
"grad_norm": 1.7003644704818726,
"learning_rate": 5.06190485580872e-05,
"loss": 0.68689594,
"memory(GiB)": 67.73,
"step": 3790,
"train_speed(iter/s)": 0.039647
},
{
"acc": 0.78057427,
"epoch": 2.610041265474553,
"grad_norm": 1.7799345254898071,
"learning_rate": 5.0505340090082376e-05,
"loss": 0.75313406,
"memory(GiB)": 67.73,
"step": 3795,
"train_speed(iter/s)": 0.03967
},
{
"acc": 0.78115511,
"epoch": 2.6134800550206325,
"grad_norm": 1.3012539148330688,
"learning_rate": 5.039162903355639e-05,
"loss": 0.75619287,
"memory(GiB)": 67.73,
"step": 3800,
"train_speed(iter/s)": 0.039687
},
{
"epoch": 2.6134800550206325,
"eval_acc": 0.7681029828586854,
"eval_loss": 0.8214000463485718,
"eval_runtime": 1118.8883,
"eval_samples_per_second": 3.828,
"eval_steps_per_second": 0.069,
"step": 3800
},
{
"acc": 0.78926849,
"epoch": 2.6169188445667126,
"grad_norm": 1.3302139043807983,
"learning_rate": 5.027791597679603e-05,
"loss": 0.72202902,
"memory(GiB)": 67.73,
"step": 3805,
"train_speed(iter/s)": 0.039252
},
{
"acc": 0.788554,
"epoch": 2.6203576341127923,
"grad_norm": 1.3796292543411255,
"learning_rate": 5.0164201508098486e-05,
"loss": 0.73341327,
"memory(GiB)": 67.73,
"step": 3810,
"train_speed(iter/s)": 0.039275
},
{
"acc": 0.78986712,
"epoch": 2.623796423658872,
"grad_norm": 1.5008918046951294,
"learning_rate": 5.00504862157682e-05,
"loss": 0.70993729,
"memory(GiB)": 67.73,
"step": 3815,
"train_speed(iter/s)": 0.0393
},
{
"acc": 0.79516368,
"epoch": 2.627235213204952,
"grad_norm": 1.3220473527908325,
"learning_rate": 4.9936770688113924e-05,
"loss": 0.70671806,
"memory(GiB)": 67.73,
"step": 3820,
"train_speed(iter/s)": 0.039326
},
{
"acc": 0.77930651,
"epoch": 2.6306740027510314,
"grad_norm": 1.3324934244155884,
"learning_rate": 4.982305551344558e-05,
"loss": 0.76113019,
"memory(GiB)": 67.73,
"step": 3825,
"train_speed(iter/s)": 0.039345
},
{
"acc": 0.78931274,
"epoch": 2.6341127922971115,
"grad_norm": 1.561617374420166,
"learning_rate": 4.970934128007131e-05,
"loss": 0.73203354,
"memory(GiB)": 67.73,
"step": 3830,
"train_speed(iter/s)": 0.039369
},
{
"acc": 0.78196325,
"epoch": 2.637551581843191,
"grad_norm": 1.5396491289138794,
"learning_rate": 4.959562857629432e-05,
"loss": 0.74629278,
"memory(GiB)": 67.73,
"step": 3835,
"train_speed(iter/s)": 0.039389
},
{
"acc": 0.79152188,
"epoch": 2.640990371389271,
"grad_norm": 1.5757373571395874,
"learning_rate": 4.948191799041e-05,
"loss": 0.71405354,
"memory(GiB)": 67.73,
"step": 3840,
"train_speed(iter/s)": 0.039408
},
{
"acc": 0.78608985,
"epoch": 2.644429160935351,
"grad_norm": 1.47767174243927,
"learning_rate": 4.936821011070271e-05,
"loss": 0.72424574,
"memory(GiB)": 67.73,
"step": 3845,
"train_speed(iter/s)": 0.039432
},
{
"acc": 0.78729639,
"epoch": 2.6478679504814306,
"grad_norm": 1.2262262105941772,
"learning_rate": 4.925450552544281e-05,
"loss": 0.72731237,
"memory(GiB)": 67.73,
"step": 3850,
"train_speed(iter/s)": 0.039453
},
{
"acc": 0.78679304,
"epoch": 2.6513067400275103,
"grad_norm": 1.4017452001571655,
"learning_rate": 4.914080482288365e-05,
"loss": 0.71175966,
"memory(GiB)": 67.73,
"step": 3855,
"train_speed(iter/s)": 0.039476
},
{
"acc": 0.79027119,
"epoch": 2.65474552957359,
"grad_norm": 1.5579813718795776,
"learning_rate": 4.902710859125846e-05,
"loss": 0.71102552,
"memory(GiB)": 67.98,
"step": 3860,
"train_speed(iter/s)": 0.039498
},
{
"acc": 0.79366422,
"epoch": 2.6581843191196697,
"grad_norm": 1.3325603008270264,
"learning_rate": 4.8913417418777377e-05,
"loss": 0.69916854,
"memory(GiB)": 67.98,
"step": 3865,
"train_speed(iter/s)": 0.039517
},
{
"acc": 0.78973618,
"epoch": 2.66162310866575,
"grad_norm": 1.4464627504348755,
"learning_rate": 4.879973189362433e-05,
"loss": 0.72573528,
"memory(GiB)": 67.98,
"step": 3870,
"train_speed(iter/s)": 0.039542
},
{
"acc": 0.78937593,
"epoch": 2.6650618982118295,
"grad_norm": 1.4809215068817139,
"learning_rate": 4.8686052603954065e-05,
"loss": 0.72520885,
"memory(GiB)": 67.98,
"step": 3875,
"train_speed(iter/s)": 0.039562
},
{
"acc": 0.7916564,
"epoch": 2.668500687757909,
"grad_norm": 1.4060372114181519,
"learning_rate": 4.857238013788902e-05,
"loss": 0.71384468,
"memory(GiB)": 67.98,
"step": 3880,
"train_speed(iter/s)": 0.039586
},
{
"acc": 0.78981237,
"epoch": 2.671939477303989,
"grad_norm": 1.481585144996643,
"learning_rate": 4.845871508351637e-05,
"loss": 0.72426672,
"memory(GiB)": 67.98,
"step": 3885,
"train_speed(iter/s)": 0.039608
},
{
"acc": 0.79329553,
"epoch": 2.6753782668500685,
"grad_norm": 1.6132746934890747,
"learning_rate": 4.834505802888493e-05,
"loss": 0.70904198,
"memory(GiB)": 67.98,
"step": 3890,
"train_speed(iter/s)": 0.039631
},
{
"acc": 0.78727617,
"epoch": 2.6788170563961486,
"grad_norm": 1.845495343208313,
"learning_rate": 4.8231409562002164e-05,
"loss": 0.72750425,
"memory(GiB)": 67.98,
"step": 3895,
"train_speed(iter/s)": 0.039654
},
{
"acc": 0.78334684,
"epoch": 2.6822558459422283,
"grad_norm": 1.6697547435760498,
"learning_rate": 4.811777027083104e-05,
"loss": 0.74594717,
"memory(GiB)": 67.98,
"step": 3900,
"train_speed(iter/s)": 0.039676
},
{
"epoch": 2.6822558459422283,
"eval_acc": 0.7695764160705448,
"eval_loss": 0.8164530396461487,
"eval_runtime": 1094.9986,
"eval_samples_per_second": 3.911,
"eval_steps_per_second": 0.07,
"step": 3900
},
{
"acc": 0.78133011,
"epoch": 2.685694635488308,
"grad_norm": 1.5049043893814087,
"learning_rate": 4.80041407432871e-05,
"loss": 0.74013877,
"memory(GiB)": 67.98,
"step": 3905,
"train_speed(iter/s)": 0.039262
},
{
"acc": 0.78779163,
"epoch": 2.689133425034388,
"grad_norm": 1.292845606803894,
"learning_rate": 4.7890521567235375e-05,
"loss": 0.73777471,
"memory(GiB)": 67.98,
"step": 3910,
"train_speed(iter/s)": 0.039285
},
{
"acc": 0.78793478,
"epoch": 2.692572214580468,
"grad_norm": 1.6969997882843018,
"learning_rate": 4.7776913330487335e-05,
"loss": 0.72460685,
"memory(GiB)": 67.98,
"step": 3915,
"train_speed(iter/s)": 0.039309
},
{
"acc": 0.78481874,
"epoch": 2.6960110041265475,
"grad_norm": 1.6642791032791138,
"learning_rate": 4.766331662079784e-05,
"loss": 0.73782244,
"memory(GiB)": 67.98,
"step": 3920,
"train_speed(iter/s)": 0.039331
},
{
"acc": 0.77672281,
"epoch": 2.699449793672627,
"grad_norm": 1.464065670967102,
"learning_rate": 4.754973202586213e-05,
"loss": 0.77285328,
"memory(GiB)": 67.98,
"step": 3925,
"train_speed(iter/s)": 0.039351
},
{
"acc": 0.78013086,
"epoch": 2.702888583218707,
"grad_norm": 1.6267447471618652,
"learning_rate": 4.7436160133312756e-05,
"loss": 0.77444224,
"memory(GiB)": 67.98,
"step": 3930,
"train_speed(iter/s)": 0.039372
},
{
"acc": 0.79396415,
"epoch": 2.706327372764787,
"grad_norm": 1.377986192703247,
"learning_rate": 4.7322601530716593e-05,
"loss": 0.69987969,
"memory(GiB)": 67.98,
"step": 3935,
"train_speed(iter/s)": 0.039392
},
{
"acc": 0.78015747,
"epoch": 2.7097661623108666,
"grad_norm": 1.5132167339324951,
"learning_rate": 4.72090568055717e-05,
"loss": 0.73972359,
"memory(GiB)": 67.98,
"step": 3940,
"train_speed(iter/s)": 0.039413
},
{
"acc": 0.78305364,
"epoch": 2.7132049518569463,
"grad_norm": 1.3939101696014404,
"learning_rate": 4.709552654530438e-05,
"loss": 0.74475136,
"memory(GiB)": 67.98,
"step": 3945,
"train_speed(iter/s)": 0.039437
},
{
"acc": 0.79228973,
"epoch": 2.716643741403026,
"grad_norm": 1.5657391548156738,
"learning_rate": 4.69820113372661e-05,
"loss": 0.70100541,
"memory(GiB)": 67.98,
"step": 3950,
"train_speed(iter/s)": 0.039459
},
{
"acc": 0.79344339,
"epoch": 2.7200825309491057,
"grad_norm": 1.480087399482727,
"learning_rate": 4.686851176873045e-05,
"loss": 0.70072994,
"memory(GiB)": 67.98,
"step": 3955,
"train_speed(iter/s)": 0.039483
},
{
"acc": 0.79308243,
"epoch": 2.723521320495186,
"grad_norm": 1.5921666622161865,
"learning_rate": 4.6755028426890096e-05,
"loss": 0.70272703,
"memory(GiB)": 67.98,
"step": 3960,
"train_speed(iter/s)": 0.039506
},
{
"acc": 0.79001474,
"epoch": 2.7269601100412655,
"grad_norm": 1.3979772329330444,
"learning_rate": 4.664156189885376e-05,
"loss": 0.69688091,
"memory(GiB)": 67.98,
"step": 3965,
"train_speed(iter/s)": 0.039529
},
{
"acc": 0.77611008,
"epoch": 2.730398899587345,
"grad_norm": 1.5082849264144897,
"learning_rate": 4.65281127716432e-05,
"loss": 0.774436,
"memory(GiB)": 67.98,
"step": 3970,
"train_speed(iter/s)": 0.039554
},
{
"acc": 0.78162088,
"epoch": 2.7338376891334253,
"grad_norm": 1.5324316024780273,
"learning_rate": 4.64146816321901e-05,
"loss": 0.73829603,
"memory(GiB)": 67.98,
"step": 3975,
"train_speed(iter/s)": 0.039575
},
{
"acc": 0.78739605,
"epoch": 2.737276478679505,
"grad_norm": 1.5039098262786865,
"learning_rate": 4.630126906733315e-05,
"loss": 0.73118725,
"memory(GiB)": 67.98,
"step": 3980,
"train_speed(iter/s)": 0.039598
},
{
"acc": 0.7873105,
"epoch": 2.7407152682255846,
"grad_norm": 1.6895498037338257,
"learning_rate": 4.6187875663814886e-05,
"loss": 0.72477093,
"memory(GiB)": 67.98,
"step": 3985,
"train_speed(iter/s)": 0.039618
},
{
"acc": 0.78072052,
"epoch": 2.7441540577716643,
"grad_norm": 1.350480318069458,
"learning_rate": 4.607450200827874e-05,
"loss": 0.73954563,
"memory(GiB)": 67.98,
"step": 3990,
"train_speed(iter/s)": 0.039639
},
{
"acc": 0.78461032,
"epoch": 2.747592847317744,
"grad_norm": 1.5248438119888306,
"learning_rate": 4.596114868726598e-05,
"loss": 0.7439085,
"memory(GiB)": 67.98,
"step": 3995,
"train_speed(iter/s)": 0.039656
},
{
"acc": 0.7952045,
"epoch": 2.751031636863824,
"grad_norm": 1.2919889688491821,
"learning_rate": 4.5847816287212645e-05,
"loss": 0.70409346,
"memory(GiB)": 67.98,
"step": 4000,
"train_speed(iter/s)": 0.03968
},
{
"epoch": 2.751031636863824,
"eval_acc": 0.7708024024834661,
"eval_loss": 0.8120156526565552,
"eval_runtime": 1144.2771,
"eval_samples_per_second": 3.743,
"eval_steps_per_second": 0.067,
"step": 4000
},
{
"acc": 0.78138909,
"epoch": 2.754470426409904,
"grad_norm": 1.685054063796997,
"learning_rate": 4.57345053944466e-05,
"loss": 0.76331453,
"memory(GiB)": 67.98,
"step": 4005,
"train_speed(iter/s)": 0.039257
},
{
"acc": 0.79411173,
"epoch": 2.7579092159559835,
"grad_norm": 2.0349268913269043,
"learning_rate": 4.562121659518438e-05,
"loss": 0.71027813,
"memory(GiB)": 67.98,
"step": 4010,
"train_speed(iter/s)": 0.039282
},
{
"acc": 0.78988757,
"epoch": 2.761348005502063,
"grad_norm": 1.3015258312225342,
"learning_rate": 4.5507950475528236e-05,
"loss": 0.71334782,
"memory(GiB)": 67.98,
"step": 4015,
"train_speed(iter/s)": 0.039304
},
{
"acc": 0.79387317,
"epoch": 2.764786795048143,
"grad_norm": 1.4291696548461914,
"learning_rate": 4.539470762146308e-05,
"loss": 0.70652847,
"memory(GiB)": 67.98,
"step": 4020,
"train_speed(iter/s)": 0.03932
},
{
"acc": 0.78285937,
"epoch": 2.768225584594223,
"grad_norm": 1.477131962776184,
"learning_rate": 4.5281488618853503e-05,
"loss": 0.75896859,
"memory(GiB)": 67.98,
"step": 4025,
"train_speed(iter/s)": 0.039342
},
{
"acc": 0.78991375,
"epoch": 2.7716643741403026,
"grad_norm": 1.352389931678772,
"learning_rate": 4.516829405344063e-05,
"loss": 0.71030273,
"memory(GiB)": 67.98,
"step": 4030,
"train_speed(iter/s)": 0.039364
},
{
"acc": 0.79130993,
"epoch": 2.7751031636863823,
"grad_norm": 1.5674926042556763,
"learning_rate": 4.505512451083922e-05,
"loss": 0.71874084,
"memory(GiB)": 67.98,
"step": 4035,
"train_speed(iter/s)": 0.039386
},
{
"acc": 0.79276628,
"epoch": 2.7785419532324624,
"grad_norm": 1.943419098854065,
"learning_rate": 4.494198057653455e-05,
"loss": 0.71133614,
"memory(GiB)": 67.98,
"step": 4040,
"train_speed(iter/s)": 0.039408
},
{
"acc": 0.80222769,
"epoch": 2.7819807427785417,
"grad_norm": 1.6925394535064697,
"learning_rate": 4.482886283587938e-05,
"loss": 0.67353868,
"memory(GiB)": 67.98,
"step": 4045,
"train_speed(iter/s)": 0.039433
},
{
"acc": 0.80383835,
"epoch": 2.785419532324622,
"grad_norm": 1.4405827522277832,
"learning_rate": 4.471577187409103e-05,
"loss": 0.66345797,
"memory(GiB)": 67.98,
"step": 4050,
"train_speed(iter/s)": 0.039457
},
{
"acc": 0.80842638,
"epoch": 2.7888583218707015,
"grad_norm": 1.674682378768921,
"learning_rate": 4.460270827624821e-05,
"loss": 0.66658139,
"memory(GiB)": 67.98,
"step": 4055,
"train_speed(iter/s)": 0.039479
},
{
"acc": 0.79156666,
"epoch": 2.792297111416781,
"grad_norm": 1.3792381286621094,
"learning_rate": 4.4489672627288124e-05,
"loss": 0.73030577,
"memory(GiB)": 67.98,
"step": 4060,
"train_speed(iter/s)": 0.039504
},
{
"acc": 0.79346962,
"epoch": 2.7957359009628613,
"grad_norm": 1.404285192489624,
"learning_rate": 4.4376665512003304e-05,
"loss": 0.70117588,
"memory(GiB)": 67.98,
"step": 4065,
"train_speed(iter/s)": 0.039526
},
{
"acc": 0.790658,
"epoch": 2.799174690508941,
"grad_norm": 1.417019248008728,
"learning_rate": 4.4263687515038755e-05,
"loss": 0.70299535,
"memory(GiB)": 67.98,
"step": 4070,
"train_speed(iter/s)": 0.039548
},
{
"acc": 0.78736067,
"epoch": 2.8026134800550206,
"grad_norm": 1.5088238716125488,
"learning_rate": 4.415073922088876e-05,
"loss": 0.73802028,
"memory(GiB)": 67.98,
"step": 4075,
"train_speed(iter/s)": 0.039565
},
{
"acc": 0.79492655,
"epoch": 2.8060522696011003,
"grad_norm": 1.443625569343567,
"learning_rate": 4.4037821213893964e-05,
"loss": 0.71042171,
"memory(GiB)": 67.98,
"step": 4080,
"train_speed(iter/s)": 0.039586
},
{
"acc": 0.79075756,
"epoch": 2.80949105914718,
"grad_norm": 1.464545726776123,
"learning_rate": 4.392493407823832e-05,
"loss": 0.70024977,
"memory(GiB)": 67.98,
"step": 4085,
"train_speed(iter/s)": 0.039605
},
{
"acc": 0.79780464,
"epoch": 2.81292984869326,
"grad_norm": 1.6561044454574585,
"learning_rate": 4.3812078397946074e-05,
"loss": 0.69342613,
"memory(GiB)": 67.98,
"step": 4090,
"train_speed(iter/s)": 0.039624
},
{
"acc": 0.78435755,
"epoch": 2.81636863823934,
"grad_norm": 1.3976974487304688,
"learning_rate": 4.369925475687873e-05,
"loss": 0.71552553,
"memory(GiB)": 67.98,
"step": 4095,
"train_speed(iter/s)": 0.039648
},
{
"acc": 0.7799448,
"epoch": 2.8198074277854195,
"grad_norm": 1.3698362112045288,
"learning_rate": 4.358646373873203e-05,
"loss": 0.75982933,
"memory(GiB)": 67.98,
"step": 4100,
"train_speed(iter/s)": 0.039669
},
{
"epoch": 2.8198074277854195,
"eval_acc": 0.7719721509875377,
"eval_loss": 0.8082969784736633,
"eval_runtime": 1151.3186,
"eval_samples_per_second": 3.72,
"eval_steps_per_second": 0.067,
"step": 4100
},
{
"acc": 0.78468771,
"epoch": 2.823246217331499,
"grad_norm": 1.7453495264053345,
"learning_rate": 4.3473705927032957e-05,
"loss": 0.73120604,
"memory(GiB)": 67.98,
"step": 4105,
"train_speed(iter/s)": 0.039254
},
{
"acc": 0.77324467,
"epoch": 2.826685006877579,
"grad_norm": 1.309380292892456,
"learning_rate": 4.336098190513667e-05,
"loss": 0.7686954,
"memory(GiB)": 67.98,
"step": 4110,
"train_speed(iter/s)": 0.039274
},
{
"acc": 0.78504181,
"epoch": 2.830123796423659,
"grad_norm": 1.3735424280166626,
"learning_rate": 4.324829225622355e-05,
"loss": 0.72278986,
"memory(GiB)": 67.98,
"step": 4115,
"train_speed(iter/s)": 0.039295
},
{
"acc": 0.78531666,
"epoch": 2.8335625859697386,
"grad_norm": 1.3972020149230957,
"learning_rate": 4.3135637563296157e-05,
"loss": 0.74182968,
"memory(GiB)": 67.98,
"step": 4120,
"train_speed(iter/s)": 0.039316
},
{
"acc": 0.78637772,
"epoch": 2.8370013755158183,
"grad_norm": 1.5424326658248901,
"learning_rate": 4.3023018409176145e-05,
"loss": 0.74376593,
"memory(GiB)": 67.98,
"step": 4125,
"train_speed(iter/s)": 0.039338
},
{
"acc": 0.79664993,
"epoch": 2.8404401650618984,
"grad_norm": 1.3284099102020264,
"learning_rate": 4.2910435376501365e-05,
"loss": 0.67242994,
"memory(GiB)": 67.98,
"step": 4130,
"train_speed(iter/s)": 0.039363
},
{
"acc": 0.78375196,
"epoch": 2.843878954607978,
"grad_norm": 1.4063657522201538,
"learning_rate": 4.279788904772275e-05,
"loss": 0.73797774,
"memory(GiB)": 67.98,
"step": 4135,
"train_speed(iter/s)": 0.039386
},
{
"acc": 0.80310926,
"epoch": 2.847317744154058,
"grad_norm": 1.6251460313796997,
"learning_rate": 4.268538000510139e-05,
"loss": 0.67094946,
"memory(GiB)": 67.98,
"step": 4140,
"train_speed(iter/s)": 0.039411
},
{
"acc": 0.78242793,
"epoch": 2.8507565337001375,
"grad_norm": 1.4719781875610352,
"learning_rate": 4.257290883070545e-05,
"loss": 0.7414422,
"memory(GiB)": 67.98,
"step": 4145,
"train_speed(iter/s)": 0.039435
},
{
"acc": 0.79309282,
"epoch": 2.854195323246217,
"grad_norm": 1.491889238357544,
"learning_rate": 4.246047610640717e-05,
"loss": 0.69513445,
"memory(GiB)": 67.98,
"step": 4150,
"train_speed(iter/s)": 0.039458
},
{
"acc": 0.79532785,
"epoch": 2.8576341127922973,
"grad_norm": 1.4044826030731201,
"learning_rate": 4.2348082413879894e-05,
"loss": 0.69395657,
"memory(GiB)": 67.98,
"step": 4155,
"train_speed(iter/s)": 0.039483
},
{
"acc": 0.79063025,
"epoch": 2.861072902338377,
"grad_norm": 1.4058098793029785,
"learning_rate": 4.223572833459501e-05,
"loss": 0.71690941,
"memory(GiB)": 67.98,
"step": 4160,
"train_speed(iter/s)": 0.039508
},
{
"acc": 0.79869499,
"epoch": 2.8645116918844566,
"grad_norm": 1.6210905313491821,
"learning_rate": 4.212341444981898e-05,
"loss": 0.6896822,
"memory(GiB)": 67.98,
"step": 4165,
"train_speed(iter/s)": 0.039532
},
{
"acc": 0.79149799,
"epoch": 2.8679504814305363,
"grad_norm": 1.3731998205184937,
"learning_rate": 4.2011141340610326e-05,
"loss": 0.7168128,
"memory(GiB)": 67.98,
"step": 4170,
"train_speed(iter/s)": 0.039554
},
{
"acc": 0.78879414,
"epoch": 2.871389270976616,
"grad_norm": 1.632126808166504,
"learning_rate": 4.189890958781662e-05,
"loss": 0.72364569,
"memory(GiB)": 67.98,
"step": 4175,
"train_speed(iter/s)": 0.039576
},
{
"acc": 0.78361959,
"epoch": 2.874828060522696,
"grad_norm": 1.4791241884231567,
"learning_rate": 4.178671977207143e-05,
"loss": 0.73310771,
"memory(GiB)": 67.98,
"step": 4180,
"train_speed(iter/s)": 0.039599
},
{
"acc": 0.79908352,
"epoch": 2.878266850068776,
"grad_norm": 1.7965590953826904,
"learning_rate": 4.1674572473791395e-05,
"loss": 0.69370174,
"memory(GiB)": 67.98,
"step": 4185,
"train_speed(iter/s)": 0.039621
},
{
"acc": 0.78735409,
"epoch": 2.8817056396148555,
"grad_norm": 1.6834094524383545,
"learning_rate": 4.156246827317322e-05,
"loss": 0.72156515,
"memory(GiB)": 67.98,
"step": 4190,
"train_speed(iter/s)": 0.039644
},
{
"acc": 0.78155212,
"epoch": 2.8851444291609356,
"grad_norm": 1.872073769569397,
"learning_rate": 4.14504077501906e-05,
"loss": 0.74036779,
"memory(GiB)": 67.98,
"step": 4195,
"train_speed(iter/s)": 0.039667
},
{
"acc": 0.79145999,
"epoch": 2.8885832187070153,
"grad_norm": 1.3122477531433105,
"learning_rate": 4.133839148459126e-05,
"loss": 0.71245356,
"memory(GiB)": 67.98,
"step": 4200,
"train_speed(iter/s)": 0.039688
},
{
"epoch": 2.8885832187070153,
"eval_acc": 0.7714435146443515,
"eval_loss": 0.805468738079071,
"eval_runtime": 1087.9192,
"eval_samples_per_second": 3.937,
"eval_steps_per_second": 0.071,
"step": 4200
},
{
"acc": 0.79164152,
"epoch": 2.892022008253095,
"grad_norm": 1.5151678323745728,
"learning_rate": 4.122642005589398e-05,
"loss": 0.71430082,
"memory(GiB)": 67.98,
"step": 4205,
"train_speed(iter/s)": 0.039306
},
{
"acc": 0.79683599,
"epoch": 2.8954607977991746,
"grad_norm": 1.5568134784698486,
"learning_rate": 4.111449404338556e-05,
"loss": 0.69535141,
"memory(GiB)": 67.98,
"step": 4210,
"train_speed(iter/s)": 0.039331
},
{
"acc": 0.78143187,
"epoch": 2.8988995873452543,
"grad_norm": 1.6322216987609863,
"learning_rate": 4.100261402611785e-05,
"loss": 0.74795027,
"memory(GiB)": 67.98,
"step": 4215,
"train_speed(iter/s)": 0.039355
},
{
"acc": 0.779213,
"epoch": 2.9023383768913344,
"grad_norm": 1.479254126548767,
"learning_rate": 4.089078058290476e-05,
"loss": 0.76658916,
"memory(GiB)": 67.98,
"step": 4220,
"train_speed(iter/s)": 0.039376
},
{
"acc": 0.7864768,
"epoch": 2.905777166437414,
"grad_norm": 1.4543869495391846,
"learning_rate": 4.077899429231921e-05,
"loss": 0.71652775,
"memory(GiB)": 67.98,
"step": 4225,
"train_speed(iter/s)": 0.039397
},
{
"acc": 0.78852596,
"epoch": 2.909215955983494,
"grad_norm": 1.5353100299835205,
"learning_rate": 4.066725573269019e-05,
"loss": 0.7080534,
"memory(GiB)": 67.98,
"step": 4230,
"train_speed(iter/s)": 0.039421
},
{
"acc": 0.78499179,
"epoch": 2.9126547455295735,
"grad_norm": 1.7298237085342407,
"learning_rate": 4.055556548209975e-05,
"loss": 0.73987002,
"memory(GiB)": 67.98,
"step": 4235,
"train_speed(iter/s)": 0.039446
},
{
"acc": 0.79733381,
"epoch": 2.916093535075653,
"grad_norm": 1.3336453437805176,
"learning_rate": 4.044392411838003e-05,
"loss": 0.6844718,
"memory(GiB)": 67.98,
"step": 4240,
"train_speed(iter/s)": 0.039468
},
{
"acc": 0.78939738,
"epoch": 2.9195323246217333,
"grad_norm": 1.5154653787612915,
"learning_rate": 4.033233221911023e-05,
"loss": 0.72056727,
"memory(GiB)": 67.98,
"step": 4245,
"train_speed(iter/s)": 0.039491
},
{
"acc": 0.78145633,
"epoch": 2.922971114167813,
"grad_norm": 1.6946913003921509,
"learning_rate": 4.022079036161366e-05,
"loss": 0.74741826,
"memory(GiB)": 67.98,
"step": 4250,
"train_speed(iter/s)": 0.039515
},
{
"acc": 0.78894501,
"epoch": 2.9264099037138926,
"grad_norm": 1.7859429121017456,
"learning_rate": 4.0109299122954716e-05,
"loss": 0.71477051,
"memory(GiB)": 67.98,
"step": 4255,
"train_speed(iter/s)": 0.039538
},
{
"acc": 0.80096769,
"epoch": 2.9298486932599723,
"grad_norm": 1.550113558769226,
"learning_rate": 3.999785907993594e-05,
"loss": 0.66986256,
"memory(GiB)": 67.98,
"step": 4260,
"train_speed(iter/s)": 0.039561
},
{
"acc": 0.79326687,
"epoch": 2.933287482806052,
"grad_norm": 1.3913989067077637,
"learning_rate": 3.9886470809095015e-05,
"loss": 0.70431404,
"memory(GiB)": 67.98,
"step": 4265,
"train_speed(iter/s)": 0.039585
},
{
"acc": 0.78397541,
"epoch": 2.936726272352132,
"grad_norm": 1.7210358381271362,
"learning_rate": 3.9775134886701754e-05,
"loss": 0.74710093,
"memory(GiB)": 67.98,
"step": 4270,
"train_speed(iter/s)": 0.039608
},
{
"acc": 0.79305878,
"epoch": 2.940165061898212,
"grad_norm": 1.7996710538864136,
"learning_rate": 3.966385188875515e-05,
"loss": 0.70518632,
"memory(GiB)": 67.98,
"step": 4275,
"train_speed(iter/s)": 0.039629
},
{
"acc": 0.79449868,
"epoch": 2.9436038514442915,
"grad_norm": 1.8419127464294434,
"learning_rate": 3.9552622390980425e-05,
"loss": 0.69353704,
"memory(GiB)": 67.98,
"step": 4280,
"train_speed(iter/s)": 0.039653
},
{
"acc": 0.79312563,
"epoch": 2.9470426409903716,
"grad_norm": 1.6806973218917847,
"learning_rate": 3.944144696882598e-05,
"loss": 0.70997305,
"memory(GiB)": 67.98,
"step": 4285,
"train_speed(iter/s)": 0.039675
},
{
"acc": 0.77975159,
"epoch": 2.9504814305364513,
"grad_norm": 1.5093615055084229,
"learning_rate": 3.9330326197460466e-05,
"loss": 0.7535347,
"memory(GiB)": 67.98,
"step": 4290,
"train_speed(iter/s)": 0.039698
},
{
"acc": 0.77885957,
"epoch": 2.953920220082531,
"grad_norm": 1.7408277988433838,
"learning_rate": 3.921926065176977e-05,
"loss": 0.75995541,
"memory(GiB)": 67.98,
"step": 4295,
"train_speed(iter/s)": 0.03972
},
{
"acc": 0.78874741,
"epoch": 2.9573590096286106,
"grad_norm": 1.6146240234375,
"learning_rate": 3.9108250906354117e-05,
"loss": 0.71309519,
"memory(GiB)": 67.98,
"step": 4300,
"train_speed(iter/s)": 0.039742
},
{
"epoch": 2.9573590096286106,
"eval_acc": 0.7727257389661223,
"eval_loss": 0.8019844889640808,
"eval_runtime": 1140.1199,
"eval_samples_per_second": 3.757,
"eval_steps_per_second": 0.068,
"step": 4300
},
{
"acc": 0.78785725,
"epoch": 2.9607977991746903,
"grad_norm": 1.555442452430725,
"learning_rate": 3.8997297535525026e-05,
"loss": 0.72890291,
"memory(GiB)": 67.98,
"step": 4305,
"train_speed(iter/s)": 0.039349
},
{
"acc": 0.78933182,
"epoch": 2.9642365887207704,
"grad_norm": 1.710303783416748,
"learning_rate": 3.888640111330235e-05,
"loss": 0.73036714,
"memory(GiB)": 67.98,
"step": 4310,
"train_speed(iter/s)": 0.039373
},
{
"acc": 0.79446011,
"epoch": 2.96767537826685,
"grad_norm": 1.7401241064071655,
"learning_rate": 3.877556221341133e-05,
"loss": 0.70017486,
"memory(GiB)": 67.98,
"step": 4315,
"train_speed(iter/s)": 0.039398
},
{
"acc": 0.78983717,
"epoch": 2.97111416781293,
"grad_norm": 1.5789563655853271,
"learning_rate": 3.866478140927961e-05,
"loss": 0.70362015,
"memory(GiB)": 67.98,
"step": 4320,
"train_speed(iter/s)": 0.039419
},
{
"acc": 0.79765377,
"epoch": 2.9745529573590095,
"grad_norm": 2.0560176372528076,
"learning_rate": 3.8554059274034246e-05,
"loss": 0.68930745,
"memory(GiB)": 67.98,
"step": 4325,
"train_speed(iter/s)": 0.039442
},
{
"acc": 0.79753799,
"epoch": 2.977991746905089,
"grad_norm": 1.5742462873458862,
"learning_rate": 3.844339638049885e-05,
"loss": 0.68201818,
"memory(GiB)": 67.98,
"step": 4330,
"train_speed(iter/s)": 0.039466
},
{
"acc": 0.7916151,
"epoch": 2.9814305364511693,
"grad_norm": 1.7083474397659302,
"learning_rate": 3.8332793301190456e-05,
"loss": 0.6970108,
"memory(GiB)": 67.98,
"step": 4335,
"train_speed(iter/s)": 0.03949
},
{
"acc": 0.7908206,
"epoch": 2.984869325997249,
"grad_norm": 1.6145273447036743,
"learning_rate": 3.822225060831669e-05,
"loss": 0.72308092,
"memory(GiB)": 67.98,
"step": 4340,
"train_speed(iter/s)": 0.039512
},
{
"acc": 0.79732313,
"epoch": 2.9883081155433286,
"grad_norm": 1.3791991472244263,
"learning_rate": 3.8111768873772757e-05,
"loss": 0.68552351,
"memory(GiB)": 67.98,
"step": 4345,
"train_speed(iter/s)": 0.039537
},
{
"acc": 0.78215866,
"epoch": 2.9917469050894088,
"grad_norm": 1.587035059928894,
"learning_rate": 3.800134866913852e-05,
"loss": 0.74166784,
"memory(GiB)": 67.98,
"step": 4350,
"train_speed(iter/s)": 0.03956
},
{
"acc": 0.7990098,
"epoch": 2.9951856946354884,
"grad_norm": 1.8290317058563232,
"learning_rate": 3.7890990565675476e-05,
"loss": 0.68875532,
"memory(GiB)": 67.98,
"step": 4355,
"train_speed(iter/s)": 0.039584
},
{
"acc": 0.78591781,
"epoch": 2.998624484181568,
"grad_norm": 1.8819842338562012,
"learning_rate": 3.778069513432386e-05,
"loss": 0.72816386,
"memory(GiB)": 67.98,
"step": 4360,
"train_speed(iter/s)": 0.039604
},
{
"acc": 0.80687866,
"epoch": 3.002063273727648,
"grad_norm": 1.3995342254638672,
"learning_rate": 3.767046294569967e-05,
"loss": 0.64414482,
"memory(GiB)": 67.98,
"step": 4365,
"train_speed(iter/s)": 0.039611
},
{
"acc": 0.80390854,
"epoch": 3.0055020632737275,
"grad_norm": 1.5679051876068115,
"learning_rate": 3.75602945700917e-05,
"loss": 0.66774035,
"memory(GiB)": 67.98,
"step": 4370,
"train_speed(iter/s)": 0.039629
},
{
"acc": 0.79944701,
"epoch": 3.0089408528198076,
"grad_norm": 1.531205177307129,
"learning_rate": 3.7450190577458635e-05,
"loss": 0.67704058,
"memory(GiB)": 67.98,
"step": 4375,
"train_speed(iter/s)": 0.039649
},
{
"acc": 0.80703545,
"epoch": 3.0123796423658873,
"grad_norm": 6.210807800292969,
"learning_rate": 3.734015153742605e-05,
"loss": 0.64957862,
"memory(GiB)": 67.98,
"step": 4380,
"train_speed(iter/s)": 0.039672
},
{
"acc": 0.80491982,
"epoch": 3.015818431911967,
"grad_norm": 1.6315518617630005,
"learning_rate": 3.7230178019283506e-05,
"loss": 0.65046768,
"memory(GiB)": 67.98,
"step": 4385,
"train_speed(iter/s)": 0.039693
},
{
"acc": 0.8061985,
"epoch": 3.0192572214580466,
"grad_norm": 1.478652000427246,
"learning_rate": 3.712027059198157e-05,
"loss": 0.64048343,
"memory(GiB)": 67.98,
"step": 4390,
"train_speed(iter/s)": 0.039708
},
{
"acc": 0.81162281,
"epoch": 3.0226960110041263,
"grad_norm": 1.623420238494873,
"learning_rate": 3.701042982412889e-05,
"loss": 0.62963314,
"memory(GiB)": 67.98,
"step": 4395,
"train_speed(iter/s)": 0.03973
},
{
"acc": 0.80488195,
"epoch": 3.0261348005502064,
"grad_norm": 1.6778922080993652,
"learning_rate": 3.690065628398926e-05,
"loss": 0.65336089,
"memory(GiB)": 67.98,
"step": 4400,
"train_speed(iter/s)": 0.039751
},
{
"epoch": 3.0261348005502064,
"eval_acc": 0.7711454537274486,
"eval_loss": 0.8140049576759338,
"eval_runtime": 1141.0798,
"eval_samples_per_second": 3.753,
"eval_steps_per_second": 0.067,
"step": 4400
},
{
"acc": 0.80764694,
"epoch": 3.029573590096286,
"grad_norm": 1.6117892265319824,
"learning_rate": 3.679095053947864e-05,
"loss": 0.6384645,
"memory(GiB)": 67.98,
"step": 4405,
"train_speed(iter/s)": 0.039366
},
{
"acc": 0.80960245,
"epoch": 3.033012379642366,
"grad_norm": 1.5972310304641724,
"learning_rate": 3.668131315816228e-05,
"loss": 0.63809519,
"memory(GiB)": 67.98,
"step": 4410,
"train_speed(iter/s)": 0.039387
},
{
"acc": 0.80579681,
"epoch": 3.0364511691884455,
"grad_norm": 1.6774109601974487,
"learning_rate": 3.657174470725173e-05,
"loss": 0.64105072,
"memory(GiB)": 67.98,
"step": 4415,
"train_speed(iter/s)": 0.039406
},
{
"acc": 0.81135626,
"epoch": 3.0398899587345256,
"grad_norm": 1.710260033607483,
"learning_rate": 3.646224575360194e-05,
"loss": 0.6407239,
"memory(GiB)": 67.98,
"step": 4420,
"train_speed(iter/s)": 0.039428
},
{
"acc": 0.81669779,
"epoch": 3.0433287482806053,
"grad_norm": 1.5772171020507812,
"learning_rate": 3.635281686370832e-05,
"loss": 0.61197987,
"memory(GiB)": 67.98,
"step": 4425,
"train_speed(iter/s)": 0.039449
},
{
"acc": 0.81082649,
"epoch": 3.046767537826685,
"grad_norm": 2.1017799377441406,
"learning_rate": 3.624345860370379e-05,
"loss": 0.63282819,
"memory(GiB)": 67.98,
"step": 4430,
"train_speed(iter/s)": 0.039468
},
{
"acc": 0.80507336,
"epoch": 3.0502063273727646,
"grad_norm": 1.904692530632019,
"learning_rate": 3.613417153935585e-05,
"loss": 0.63742828,
"memory(GiB)": 67.98,
"step": 4435,
"train_speed(iter/s)": 0.039486
},
{
"acc": 0.79859557,
"epoch": 3.0536451169188448,
"grad_norm": 1.673336148262024,
"learning_rate": 3.60249562360637e-05,
"loss": 0.67739854,
"memory(GiB)": 67.98,
"step": 4440,
"train_speed(iter/s)": 0.039507
},
{
"acc": 0.8053956,
"epoch": 3.0570839064649244,
"grad_norm": 1.6409105062484741,
"learning_rate": 3.591581325885528e-05,
"loss": 0.64070592,
"memory(GiB)": 67.98,
"step": 4445,
"train_speed(iter/s)": 0.039524
},
{
"acc": 0.81321754,
"epoch": 3.060522696011004,
"grad_norm": 1.599678874015808,
"learning_rate": 3.5806743172384325e-05,
"loss": 0.62494526,
"memory(GiB)": 67.98,
"step": 4450,
"train_speed(iter/s)": 0.039544
},
{
"acc": 0.81660137,
"epoch": 3.063961485557084,
"grad_norm": 1.527250051498413,
"learning_rate": 3.569774654092749e-05,
"loss": 0.61917772,
"memory(GiB)": 67.98,
"step": 4455,
"train_speed(iter/s)": 0.039565
},
{
"acc": 0.80815334,
"epoch": 3.0674002751031635,
"grad_norm": 1.9215754270553589,
"learning_rate": 3.5588823928381385e-05,
"loss": 0.64416943,
"memory(GiB)": 67.98,
"step": 4460,
"train_speed(iter/s)": 0.039584
},
{
"acc": 0.81522007,
"epoch": 3.0708390646492436,
"grad_norm": 1.771016240119934,
"learning_rate": 3.54799758982597e-05,
"loss": 0.62254939,
"memory(GiB)": 67.98,
"step": 4465,
"train_speed(iter/s)": 0.039604
},
{
"acc": 0.81300201,
"epoch": 3.0742778541953233,
"grad_norm": 1.5185010433197021,
"learning_rate": 3.537120301369029e-05,
"loss": 0.63570495,
"memory(GiB)": 67.98,
"step": 4470,
"train_speed(iter/s)": 0.039623
},
{
"acc": 0.79795976,
"epoch": 3.077716643741403,
"grad_norm": 1.7474913597106934,
"learning_rate": 3.526250583741219e-05,
"loss": 0.67301879,
"memory(GiB)": 67.98,
"step": 4475,
"train_speed(iter/s)": 0.039644
},
{
"acc": 0.80364552,
"epoch": 3.0811554332874826,
"grad_norm": 1.611039638519287,
"learning_rate": 3.51538849317728e-05,
"loss": 0.6553544,
"memory(GiB)": 67.98,
"step": 4480,
"train_speed(iter/s)": 0.039664
},
{
"acc": 0.80711832,
"epoch": 3.0845942228335628,
"grad_norm": 1.956214189529419,
"learning_rate": 3.504534085872491e-05,
"loss": 0.65441723,
"memory(GiB)": 67.98,
"step": 4485,
"train_speed(iter/s)": 0.039686
},
{
"acc": 0.80393448,
"epoch": 3.0880330123796425,
"grad_norm": 1.7758394479751587,
"learning_rate": 3.493687417982382e-05,
"loss": 0.63968649,
"memory(GiB)": 67.98,
"step": 4490,
"train_speed(iter/s)": 0.039704
},
{
"acc": 0.80570278,
"epoch": 3.091471801925722,
"grad_norm": 1.878055453300476,
"learning_rate": 3.4828485456224454e-05,
"loss": 0.64807596,
"memory(GiB)": 67.98,
"step": 4495,
"train_speed(iter/s)": 0.039724
},
{
"acc": 0.80985212,
"epoch": 3.094910591471802,
"grad_norm": 1.647511601448059,
"learning_rate": 3.47201752486784e-05,
"loss": 0.63398943,
"memory(GiB)": 67.98,
"step": 4500,
"train_speed(iter/s)": 0.039743
},
{
"epoch": 3.094910591471802,
"eval_acc": 0.7721239933414316,
"eval_loss": 0.81331866979599,
"eval_runtime": 1133.398,
"eval_samples_per_second": 3.779,
"eval_steps_per_second": 0.068,
"step": 4500
},
{
"acc": 0.80513477,
"epoch": 3.098349381017882,
"grad_norm": 1.8428512811660767,
"learning_rate": 3.461194411753105e-05,
"loss": 0.64937515,
"memory(GiB)": 67.98,
"step": 4505,
"train_speed(iter/s)": 0.03937
},
{
"acc": 0.81019039,
"epoch": 3.1017881705639616,
"grad_norm": 1.6519265174865723,
"learning_rate": 3.450379262271869e-05,
"loss": 0.63972459,
"memory(GiB)": 67.98,
"step": 4510,
"train_speed(iter/s)": 0.039392
},
{
"acc": 0.8086174,
"epoch": 3.1052269601100413,
"grad_norm": 1.7133119106292725,
"learning_rate": 3.439572132376563e-05,
"loss": 0.64712973,
"memory(GiB)": 67.98,
"step": 4515,
"train_speed(iter/s)": 0.039407
},
{
"acc": 0.79508266,
"epoch": 3.108665749656121,
"grad_norm": 1.6571804285049438,
"learning_rate": 3.428773077978125e-05,
"loss": 0.68026247,
"memory(GiB)": 67.98,
"step": 4520,
"train_speed(iter/s)": 0.039425
},
{
"acc": 0.8028862,
"epoch": 3.1121045392022006,
"grad_norm": 2.0089550018310547,
"learning_rate": 3.4179821549457166e-05,
"loss": 0.66466484,
"memory(GiB)": 67.98,
"step": 4525,
"train_speed(iter/s)": 0.039441
},
{
"acc": 0.7982996,
"epoch": 3.1155433287482808,
"grad_norm": 1.620611548423767,
"learning_rate": 3.407199419106429e-05,
"loss": 0.67201767,
"memory(GiB)": 67.98,
"step": 4530,
"train_speed(iter/s)": 0.03946
},
{
"acc": 0.81185446,
"epoch": 3.1189821182943605,
"grad_norm": 1.5307915210723877,
"learning_rate": 3.396424926244999e-05,
"loss": 0.62855453,
"memory(GiB)": 67.98,
"step": 4535,
"train_speed(iter/s)": 0.039478
},
{
"acc": 0.80879059,
"epoch": 3.12242090784044,
"grad_norm": 1.9358049631118774,
"learning_rate": 3.3856587321035206e-05,
"loss": 0.63443809,
"memory(GiB)": 67.98,
"step": 4540,
"train_speed(iter/s)": 0.0395
},
{
"acc": 0.81181793,
"epoch": 3.12585969738652,
"grad_norm": 1.9281483888626099,
"learning_rate": 3.374900892381146e-05,
"loss": 0.62519212,
"memory(GiB)": 67.98,
"step": 4545,
"train_speed(iter/s)": 0.039518
},
{
"acc": 0.79986091,
"epoch": 3.1292984869326,
"grad_norm": 1.8126670122146606,
"learning_rate": 3.3641514627338166e-05,
"loss": 0.67471228,
"memory(GiB)": 67.98,
"step": 4550,
"train_speed(iter/s)": 0.039538
},
{
"acc": 0.81441412,
"epoch": 3.1327372764786796,
"grad_norm": 1.9482190608978271,
"learning_rate": 3.353410498773954e-05,
"loss": 0.62350183,
"memory(GiB)": 67.98,
"step": 4555,
"train_speed(iter/s)": 0.039558
},
{
"acc": 0.80743856,
"epoch": 3.1361760660247593,
"grad_norm": 1.8278954029083252,
"learning_rate": 3.342678056070189e-05,
"loss": 0.65586147,
"memory(GiB)": 67.98,
"step": 4560,
"train_speed(iter/s)": 0.03958
},
{
"acc": 0.80009956,
"epoch": 3.139614855570839,
"grad_norm": 1.9203051328659058,
"learning_rate": 3.331954190147065e-05,
"loss": 0.67459331,
"memory(GiB)": 67.98,
"step": 4565,
"train_speed(iter/s)": 0.039599
},
{
"acc": 0.79797955,
"epoch": 3.1430536451169186,
"grad_norm": 1.78507399559021,
"learning_rate": 3.321238956484752e-05,
"loss": 0.68094501,
"memory(GiB)": 67.98,
"step": 4570,
"train_speed(iter/s)": 0.039619
},
{
"acc": 0.80958462,
"epoch": 3.1464924346629988,
"grad_norm": 1.8514398336410522,
"learning_rate": 3.310532410518765e-05,
"loss": 0.63833261,
"memory(GiB)": 67.98,
"step": 4575,
"train_speed(iter/s)": 0.039639
},
{
"acc": 0.8103529,
"epoch": 3.1499312242090785,
"grad_norm": 2.1083662509918213,
"learning_rate": 3.2998346076396664e-05,
"loss": 0.63392391,
"memory(GiB)": 67.98,
"step": 4580,
"train_speed(iter/s)": 0.039659
},
{
"acc": 0.80684109,
"epoch": 3.153370013755158,
"grad_norm": 2.081134080886841,
"learning_rate": 3.289145603192793e-05,
"loss": 0.65391574,
"memory(GiB)": 67.98,
"step": 4585,
"train_speed(iter/s)": 0.03968
},
{
"acc": 0.80696983,
"epoch": 3.156808803301238,
"grad_norm": 1.7588388919830322,
"learning_rate": 3.2784654524779587e-05,
"loss": 0.65089002,
"memory(GiB)": 67.98,
"step": 4590,
"train_speed(iter/s)": 0.039697
},
{
"acc": 0.80319796,
"epoch": 3.160247592847318,
"grad_norm": 1.8731495141983032,
"learning_rate": 3.267794210749173e-05,
"loss": 0.66944408,
"memory(GiB)": 67.98,
"step": 4595,
"train_speed(iter/s)": 0.039715
},
{
"acc": 0.80229826,
"epoch": 3.1636863823933976,
"grad_norm": 1.863386869430542,
"learning_rate": 3.2571319332143516e-05,
"loss": 0.67615876,
"memory(GiB)": 67.98,
"step": 4600,
"train_speed(iter/s)": 0.039734
},
{
"epoch": 3.1636863823933976,
"eval_acc": 0.7733612273361228,
"eval_loss": 0.8061870336532593,
"eval_runtime": 1127.8615,
"eval_samples_per_second": 3.797,
"eval_steps_per_second": 0.068,
"step": 4600
},
{
"acc": 0.81224995,
"epoch": 3.1671251719394773,
"grad_norm": 1.7116352319717407,
"learning_rate": 3.2464786750350434e-05,
"loss": 0.62269239,
"memory(GiB)": 67.98,
"step": 4605,
"train_speed(iter/s)": 0.03937
},
{
"acc": 0.80045443,
"epoch": 3.170563961485557,
"grad_norm": 1.838098406791687,
"learning_rate": 3.235834491326126e-05,
"loss": 0.65012379,
"memory(GiB)": 67.98,
"step": 4610,
"train_speed(iter/s)": 0.039391
},
{
"acc": 0.80470877,
"epoch": 3.1740027510316366,
"grad_norm": 1.8031960725784302,
"learning_rate": 3.225199437155532e-05,
"loss": 0.65979033,
"memory(GiB)": 67.98,
"step": 4615,
"train_speed(iter/s)": 0.039409
},
{
"acc": 0.80560265,
"epoch": 3.1774415405777168,
"grad_norm": 1.7068849802017212,
"learning_rate": 3.214573567543964e-05,
"loss": 0.63796139,
"memory(GiB)": 67.98,
"step": 4620,
"train_speed(iter/s)": 0.039427
},
{
"acc": 0.79233809,
"epoch": 3.1808803301237965,
"grad_norm": 1.7398771047592163,
"learning_rate": 3.203956937464607e-05,
"loss": 0.67283368,
"memory(GiB)": 67.98,
"step": 4625,
"train_speed(iter/s)": 0.039446
},
{
"acc": 0.81366425,
"epoch": 3.184319119669876,
"grad_norm": 1.7081953287124634,
"learning_rate": 3.1933496018428446e-05,
"loss": 0.62146492,
"memory(GiB)": 67.98,
"step": 4630,
"train_speed(iter/s)": 0.039466
},
{
"acc": 0.80281668,
"epoch": 3.187757909215956,
"grad_norm": 1.6009129285812378,
"learning_rate": 3.1827516155559786e-05,
"loss": 0.66720371,
"memory(GiB)": 67.98,
"step": 4635,
"train_speed(iter/s)": 0.039486
},
{
"acc": 0.80487442,
"epoch": 3.191196698762036,
"grad_norm": 1.8239426612854004,
"learning_rate": 3.1721630334329366e-05,
"loss": 0.64386883,
"memory(GiB)": 67.98,
"step": 4640,
"train_speed(iter/s)": 0.039505
},
{
"acc": 0.80696297,
"epoch": 3.1946354883081156,
"grad_norm": 1.906916856765747,
"learning_rate": 3.161583910253998e-05,
"loss": 0.64987645,
"memory(GiB)": 67.98,
"step": 4645,
"train_speed(iter/s)": 0.039524
},
{
"acc": 0.80997219,
"epoch": 3.1980742778541953,
"grad_norm": 2.060511350631714,
"learning_rate": 3.1510143007505016e-05,
"loss": 0.63655567,
"memory(GiB)": 67.98,
"step": 4650,
"train_speed(iter/s)": 0.039543
},
{
"acc": 0.79812059,
"epoch": 3.201513067400275,
"grad_norm": 1.793277382850647,
"learning_rate": 3.14045425960457e-05,
"loss": 0.68602118,
"memory(GiB)": 67.98,
"step": 4655,
"train_speed(iter/s)": 0.039564
},
{
"acc": 0.79850287,
"epoch": 3.204951856946355,
"grad_norm": 1.6924282312393188,
"learning_rate": 3.129903841448827e-05,
"loss": 0.67275462,
"memory(GiB)": 67.98,
"step": 4660,
"train_speed(iter/s)": 0.039583
},
{
"acc": 0.80627632,
"epoch": 3.2083906464924348,
"grad_norm": 1.678781509399414,
"learning_rate": 3.119363100866106e-05,
"loss": 0.65286617,
"memory(GiB)": 67.98,
"step": 4665,
"train_speed(iter/s)": 0.039602
},
{
"acc": 0.81515961,
"epoch": 3.2118294360385145,
"grad_norm": 1.9915016889572144,
"learning_rate": 3.108832092389172e-05,
"loss": 0.59764929,
"memory(GiB)": 67.98,
"step": 4670,
"train_speed(iter/s)": 0.039621
},
{
"acc": 0.80686855,
"epoch": 3.215268225584594,
"grad_norm": 1.8249253034591675,
"learning_rate": 3.098310870500448e-05,
"loss": 0.64462824,
"memory(GiB)": 67.98,
"step": 4675,
"train_speed(iter/s)": 0.03964
},
{
"acc": 0.80929985,
"epoch": 3.218707015130674,
"grad_norm": 1.7660592794418335,
"learning_rate": 3.087799489631721e-05,
"loss": 0.6324172,
"memory(GiB)": 67.98,
"step": 4680,
"train_speed(iter/s)": 0.039659
},
{
"acc": 0.80256157,
"epoch": 3.222145804676754,
"grad_norm": 1.9033777713775635,
"learning_rate": 3.077298004163865e-05,
"loss": 0.67533493,
"memory(GiB)": 67.98,
"step": 4685,
"train_speed(iter/s)": 0.039677
},
{
"acc": 0.80715237,
"epoch": 3.2255845942228336,
"grad_norm": 1.6797436475753784,
"learning_rate": 3.066806468426561e-05,
"loss": 0.64756646,
"memory(GiB)": 67.98,
"step": 4690,
"train_speed(iter/s)": 0.039699
},
{
"acc": 0.79631739,
"epoch": 3.2290233837689133,
"grad_norm": 1.6722263097763062,
"learning_rate": 3.056324936698014e-05,
"loss": 0.68136206,
"memory(GiB)": 67.98,
"step": 4695,
"train_speed(iter/s)": 0.03972
},
{
"acc": 0.81018467,
"epoch": 3.232462173314993,
"grad_norm": 1.9351452589035034,
"learning_rate": 3.0458534632046766e-05,
"loss": 0.63391657,
"memory(GiB)": 67.98,
"step": 4700,
"train_speed(iter/s)": 0.039741
},
{
"epoch": 3.232462173314993,
"eval_acc": 0.7737605164889548,
"eval_loss": 0.806867241859436,
"eval_runtime": 1123.2355,
"eval_samples_per_second": 3.813,
"eval_steps_per_second": 0.069,
"step": 4700
},
{
"acc": 0.81098757,
"epoch": 3.235900962861073,
"grad_norm": 1.8516818284988403,
"learning_rate": 3.0353921021209598e-05,
"loss": 0.63078384,
"memory(GiB)": 67.98,
"step": 4705,
"train_speed(iter/s)": 0.039386
},
{
"acc": 0.80097027,
"epoch": 3.2393397524071528,
"grad_norm": 1.9018975496292114,
"learning_rate": 3.02494090756896e-05,
"loss": 0.67307758,
"memory(GiB)": 67.98,
"step": 4710,
"train_speed(iter/s)": 0.039403
},
{
"acc": 0.78721581,
"epoch": 3.2427785419532325,
"grad_norm": 1.8945331573486328,
"learning_rate": 3.014499933618176e-05,
"loss": 0.71489978,
"memory(GiB)": 67.98,
"step": 4715,
"train_speed(iter/s)": 0.039419
},
{
"acc": 0.81447954,
"epoch": 3.246217331499312,
"grad_norm": 1.873476505279541,
"learning_rate": 3.004069234285235e-05,
"loss": 0.62558355,
"memory(GiB)": 67.98,
"step": 4720,
"train_speed(iter/s)": 0.039438
},
{
"acc": 0.80101833,
"epoch": 3.2496561210453923,
"grad_norm": 1.9191193580627441,
"learning_rate": 2.993648863533602e-05,
"loss": 0.65777245,
"memory(GiB)": 67.98,
"step": 4725,
"train_speed(iter/s)": 0.039458
},
{
"acc": 0.81086941,
"epoch": 3.253094910591472,
"grad_norm": 1.973708987236023,
"learning_rate": 2.983238875273308e-05,
"loss": 0.63210435,
"memory(GiB)": 67.98,
"step": 4730,
"train_speed(iter/s)": 0.039477
},
{
"acc": 0.80301018,
"epoch": 3.2565337001375516,
"grad_norm": 1.7471644878387451,
"learning_rate": 2.9728393233606715e-05,
"loss": 0.66623907,
"memory(GiB)": 67.98,
"step": 4735,
"train_speed(iter/s)": 0.039495
},
{
"acc": 0.80888157,
"epoch": 3.2599724896836313,
"grad_norm": 1.7000857591629028,
"learning_rate": 2.9624502615980177e-05,
"loss": 0.64117575,
"memory(GiB)": 67.98,
"step": 4740,
"train_speed(iter/s)": 0.039513
},
{
"acc": 0.81031885,
"epoch": 3.263411279229711,
"grad_norm": 1.760911226272583,
"learning_rate": 2.9520717437334024e-05,
"loss": 0.65109177,
"memory(GiB)": 67.98,
"step": 4745,
"train_speed(iter/s)": 0.039529
},
{
"acc": 0.81288662,
"epoch": 3.266850068775791,
"grad_norm": 1.7138432264328003,
"learning_rate": 2.941703823460329e-05,
"loss": 0.62872763,
"memory(GiB)": 67.98,
"step": 4750,
"train_speed(iter/s)": 0.039549
},
{
"acc": 0.80744476,
"epoch": 3.2702888583218708,
"grad_norm": 1.9831231832504272,
"learning_rate": 2.9313465544174756e-05,
"loss": 0.63904066,
"memory(GiB)": 67.98,
"step": 4755,
"train_speed(iter/s)": 0.039566
},
{
"acc": 0.80821819,
"epoch": 3.2737276478679505,
"grad_norm": 1.79635488986969,
"learning_rate": 2.9209999901884165e-05,
"loss": 0.64807615,
"memory(GiB)": 67.98,
"step": 4760,
"train_speed(iter/s)": 0.039585
},
{
"acc": 0.81542759,
"epoch": 3.27716643741403,
"grad_norm": 1.7462048530578613,
"learning_rate": 2.910664184301346e-05,
"loss": 0.61296053,
"memory(GiB)": 67.98,
"step": 4765,
"train_speed(iter/s)": 0.039602
},
{
"acc": 0.81904421,
"epoch": 3.28060522696011,
"grad_norm": 1.6204197406768799,
"learning_rate": 2.900339190228796e-05,
"loss": 0.60652199,
"memory(GiB)": 67.98,
"step": 4770,
"train_speed(iter/s)": 0.039622
},
{
"acc": 0.8142024,
"epoch": 3.28404401650619,
"grad_norm": 2.0843801498413086,
"learning_rate": 2.890025061387362e-05,
"loss": 0.61951303,
"memory(GiB)": 67.98,
"step": 4775,
"train_speed(iter/s)": 0.039638
},
{
"acc": 0.80437889,
"epoch": 3.2874828060522696,
"grad_norm": 1.9848445653915405,
"learning_rate": 2.879721851137438e-05,
"loss": 0.65048337,
"memory(GiB)": 67.98,
"step": 4780,
"train_speed(iter/s)": 0.039655
},
{
"acc": 0.8088932,
"epoch": 3.2909215955983493,
"grad_norm": 1.7368524074554443,
"learning_rate": 2.8694296127829177e-05,
"loss": 0.64408207,
"memory(GiB)": 67.98,
"step": 4785,
"train_speed(iter/s)": 0.039674
},
{
"acc": 0.79394779,
"epoch": 3.294360385144429,
"grad_norm": 1.72417414188385,
"learning_rate": 2.8591483995709407e-05,
"loss": 0.68265638,
"memory(GiB)": 67.98,
"step": 4790,
"train_speed(iter/s)": 0.039689
},
{
"acc": 0.81347179,
"epoch": 3.297799174690509,
"grad_norm": 1.7844178676605225,
"learning_rate": 2.8488782646916024e-05,
"loss": 0.61397967,
"memory(GiB)": 67.98,
"step": 4795,
"train_speed(iter/s)": 0.039709
},
{
"acc": 0.80788279,
"epoch": 3.3012379642365888,
"grad_norm": 1.7968957424163818,
"learning_rate": 2.838619261277686e-05,
"loss": 0.64608054,
"memory(GiB)": 67.98,
"step": 4800,
"train_speed(iter/s)": 0.039729
},
{
"epoch": 3.3012379642365888,
"eval_acc": 0.7751046025104602,
"eval_loss": 0.804237425327301,
"eval_runtime": 1089.1926,
"eval_samples_per_second": 3.932,
"eval_steps_per_second": 0.071,
"step": 4800
},
{
"acc": 0.81613159,
"epoch": 3.3046767537826685,
"grad_norm": 1.6313848495483398,
"learning_rate": 2.828371442404386e-05,
"loss": 0.62472601,
"memory(GiB)": 67.98,
"step": 4805,
"train_speed(iter/s)": 0.039393
},
{
"acc": 0.81500292,
"epoch": 3.308115543328748,
"grad_norm": 1.8520140647888184,
"learning_rate": 2.8181348610890345e-05,
"loss": 0.62366076,
"memory(GiB)": 67.98,
"step": 4810,
"train_speed(iter/s)": 0.039414
},
{
"acc": 0.79354863,
"epoch": 3.3115543328748283,
"grad_norm": 1.8981624841690063,
"learning_rate": 2.8079095702908214e-05,
"loss": 0.69254041,
"memory(GiB)": 67.98,
"step": 4815,
"train_speed(iter/s)": 0.03943
},
{
"acc": 0.8094223,
"epoch": 3.314993122420908,
"grad_norm": 1.9359115362167358,
"learning_rate": 2.7976956229105322e-05,
"loss": 0.64053526,
"memory(GiB)": 67.98,
"step": 4820,
"train_speed(iter/s)": 0.039449
},
{
"acc": 0.79914575,
"epoch": 3.3184319119669876,
"grad_norm": 1.8818870782852173,
"learning_rate": 2.7874930717902603e-05,
"loss": 0.68240814,
"memory(GiB)": 67.98,
"step": 4825,
"train_speed(iter/s)": 0.039467
},
{
"acc": 0.80387897,
"epoch": 3.3218707015130673,
"grad_norm": 1.9390044212341309,
"learning_rate": 2.7773019697131435e-05,
"loss": 0.65107994,
"memory(GiB)": 67.98,
"step": 4830,
"train_speed(iter/s)": 0.039486
},
{
"acc": 0.81106586,
"epoch": 3.325309491059147,
"grad_norm": 1.6695841550827026,
"learning_rate": 2.767122369403088e-05,
"loss": 0.63033338,
"memory(GiB)": 67.98,
"step": 4835,
"train_speed(iter/s)": 0.039505
},
{
"acc": 0.82067537,
"epoch": 3.328748280605227,
"grad_norm": 1.7732053995132446,
"learning_rate": 2.756954323524491e-05,
"loss": 0.61327543,
"memory(GiB)": 67.98,
"step": 4840,
"train_speed(iter/s)": 0.039521
},
{
"acc": 0.80741024,
"epoch": 3.3321870701513068,
"grad_norm": 1.7796927690505981,
"learning_rate": 2.7467978846819775e-05,
"loss": 0.63265486,
"memory(GiB)": 67.98,
"step": 4845,
"train_speed(iter/s)": 0.039541
},
{
"acc": 0.80555611,
"epoch": 3.3356258596973865,
"grad_norm": 1.739590048789978,
"learning_rate": 2.7366531054201243e-05,
"loss": 0.64431,
"memory(GiB)": 67.98,
"step": 4850,
"train_speed(iter/s)": 0.03956
},
{
"acc": 0.79774094,
"epoch": 3.339064649243466,
"grad_norm": 1.9023163318634033,
"learning_rate": 2.726520038223182e-05,
"loss": 0.68374538,
"memory(GiB)": 67.98,
"step": 4855,
"train_speed(iter/s)": 0.03958
},
{
"acc": 0.81651649,
"epoch": 3.3425034387895463,
"grad_norm": 1.862848162651062,
"learning_rate": 2.716398735514812e-05,
"loss": 0.62106805,
"memory(GiB)": 67.98,
"step": 4860,
"train_speed(iter/s)": 0.039595
},
{
"acc": 0.8125948,
"epoch": 3.345942228335626,
"grad_norm": 1.7548292875289917,
"learning_rate": 2.7062892496578096e-05,
"loss": 0.62365727,
"memory(GiB)": 67.98,
"step": 4865,
"train_speed(iter/s)": 0.039615
},
{
"acc": 0.81203623,
"epoch": 3.3493810178817056,
"grad_norm": 1.8868883848190308,
"learning_rate": 2.696191632953835e-05,
"loss": 0.63214188,
"memory(GiB)": 67.98,
"step": 4870,
"train_speed(iter/s)": 0.039636
},
{
"acc": 0.7978539,
"epoch": 3.3528198074277853,
"grad_norm": 1.854641318321228,
"learning_rate": 2.6861059376431485e-05,
"loss": 0.66800289,
"memory(GiB)": 67.98,
"step": 4875,
"train_speed(iter/s)": 0.039653
},
{
"acc": 0.80050983,
"epoch": 3.3562585969738654,
"grad_norm": 2.1327366828918457,
"learning_rate": 2.6760322159043293e-05,
"loss": 0.68278418,
"memory(GiB)": 67.98,
"step": 4880,
"train_speed(iter/s)": 0.039669
},
{
"acc": 0.80420437,
"epoch": 3.359697386519945,
"grad_norm": 1.9406790733337402,
"learning_rate": 2.6659705198540137e-05,
"loss": 0.65569339,
"memory(GiB)": 67.98,
"step": 4885,
"train_speed(iter/s)": 0.039687
},
{
"acc": 0.81132812,
"epoch": 3.3631361760660248,
"grad_norm": 2.0002591609954834,
"learning_rate": 2.6559209015466198e-05,
"loss": 0.64171629,
"memory(GiB)": 67.98,
"step": 4890,
"train_speed(iter/s)": 0.039704
},
{
"acc": 0.80664577,
"epoch": 3.3665749656121045,
"grad_norm": 1.8464481830596924,
"learning_rate": 2.6458834129740834e-05,
"loss": 0.63870592,
"memory(GiB)": 67.98,
"step": 4895,
"train_speed(iter/s)": 0.039724
},
{
"acc": 0.80081406,
"epoch": 3.370013755158184,
"grad_norm": 1.992497444152832,
"learning_rate": 2.635858106065588e-05,
"loss": 0.67669377,
"memory(GiB)": 67.98,
"step": 4900,
"train_speed(iter/s)": 0.039739
},
{
"epoch": 3.370013755158184,
"eval_acc": 0.775284563818779,
"eval_loss": 0.8015691637992859,
"eval_runtime": 1150.5731,
"eval_samples_per_second": 3.722,
"eval_steps_per_second": 0.067,
"step": 4900
},
{
"acc": 0.80654058,
"epoch": 3.3734525447042643,
"grad_norm": 1.6790952682495117,
"learning_rate": 2.625845032687293e-05,
"loss": 0.66655011,
"memory(GiB)": 67.98,
"step": 4905,
"train_speed(iter/s)": 0.03939
},
{
"acc": 0.81852398,
"epoch": 3.376891334250344,
"grad_norm": 1.7393443584442139,
"learning_rate": 2.6158442446420673e-05,
"loss": 0.61265764,
"memory(GiB)": 67.98,
"step": 4910,
"train_speed(iter/s)": 0.039408
},
{
"acc": 0.80897388,
"epoch": 3.3803301237964236,
"grad_norm": 1.757190465927124,
"learning_rate": 2.605855793669223e-05,
"loss": 0.63301859,
"memory(GiB)": 67.98,
"step": 4915,
"train_speed(iter/s)": 0.039424
},
{
"acc": 0.81477318,
"epoch": 3.3837689133425033,
"grad_norm": 1.7762666940689087,
"learning_rate": 2.595879731444242e-05,
"loss": 0.63501825,
"memory(GiB)": 67.98,
"step": 4920,
"train_speed(iter/s)": 0.039441
},
{
"acc": 0.80826883,
"epoch": 3.387207702888583,
"grad_norm": 1.8915072679519653,
"learning_rate": 2.5859161095785204e-05,
"loss": 0.64570541,
"memory(GiB)": 67.98,
"step": 4925,
"train_speed(iter/s)": 0.039458
},
{
"acc": 0.80283833,
"epoch": 3.390646492434663,
"grad_norm": 1.8202823400497437,
"learning_rate": 2.5759649796190873e-05,
"loss": 0.65588207,
"memory(GiB)": 67.98,
"step": 4930,
"train_speed(iter/s)": 0.039476
},
{
"acc": 0.80436974,
"epoch": 3.3940852819807428,
"grad_norm": 1.8674787282943726,
"learning_rate": 2.5660263930483468e-05,
"loss": 0.63378534,
"memory(GiB)": 67.98,
"step": 4935,
"train_speed(iter/s)": 0.039489
},
{
"acc": 0.80554743,
"epoch": 3.3975240715268225,
"grad_norm": 1.7539056539535522,
"learning_rate": 2.5561004012838067e-05,
"loss": 0.65574193,
"memory(GiB)": 67.98,
"step": 4940,
"train_speed(iter/s)": 0.039506
},
{
"acc": 0.81471024,
"epoch": 3.4009628610729026,
"grad_norm": 1.9018100500106812,
"learning_rate": 2.5461870556778218e-05,
"loss": 0.61126738,
"memory(GiB)": 67.98,
"step": 4945,
"train_speed(iter/s)": 0.039526
},
{
"acc": 0.80630493,
"epoch": 3.4044016506189823,
"grad_norm": 1.7900938987731934,
"learning_rate": 2.5362864075173153e-05,
"loss": 0.63573794,
"memory(GiB)": 67.98,
"step": 4950,
"train_speed(iter/s)": 0.039543
},
{
"acc": 0.80556068,
"epoch": 3.407840440165062,
"grad_norm": 2.0504183769226074,
"learning_rate": 2.526398508023523e-05,
"loss": 0.6546957,
"memory(GiB)": 67.98,
"step": 4955,
"train_speed(iter/s)": 0.039561
},
{
"acc": 0.80205326,
"epoch": 3.4112792297111416,
"grad_norm": 1.9150274991989136,
"learning_rate": 2.5165234083517246e-05,
"loss": 0.64255061,
"memory(GiB)": 67.98,
"step": 4960,
"train_speed(iter/s)": 0.039575
},
{
"acc": 0.80601921,
"epoch": 3.4147180192572213,
"grad_norm": 1.8331859111785889,
"learning_rate": 2.5066611595909784e-05,
"loss": 0.64326835,
"memory(GiB)": 67.98,
"step": 4965,
"train_speed(iter/s)": 0.039591
},
{
"acc": 0.80851765,
"epoch": 3.4181568088033014,
"grad_norm": 1.8799371719360352,
"learning_rate": 2.49681181276386e-05,
"loss": 0.63813715,
"memory(GiB)": 67.98,
"step": 4970,
"train_speed(iter/s)": 0.039605
},
{
"acc": 0.80517483,
"epoch": 3.421595598349381,
"grad_norm": 1.8553872108459473,
"learning_rate": 2.486975418826196e-05,
"loss": 0.66684914,
"memory(GiB)": 67.98,
"step": 4975,
"train_speed(iter/s)": 0.039623
},
{
"acc": 0.82046995,
"epoch": 3.4250343878954608,
"grad_norm": 1.695779800415039,
"learning_rate": 2.477152028666798e-05,
"loss": 0.60830936,
"memory(GiB)": 67.98,
"step": 4980,
"train_speed(iter/s)": 0.039643
},
{
"acc": 0.7990911,
"epoch": 3.4284731774415405,
"grad_norm": 1.7533307075500488,
"learning_rate": 2.4673416931072094e-05,
"loss": 0.67933016,
"memory(GiB)": 67.98,
"step": 4985,
"train_speed(iter/s)": 0.039658
},
{
"acc": 0.80797586,
"epoch": 3.43191196698762,
"grad_norm": 2.2120864391326904,
"learning_rate": 2.4575444629014292e-05,
"loss": 0.65290236,
"memory(GiB)": 67.98,
"step": 4990,
"train_speed(iter/s)": 0.039679
},
{
"acc": 0.80715389,
"epoch": 3.4353507565337003,
"grad_norm": 1.7007701396942139,
"learning_rate": 2.447760388735657e-05,
"loss": 0.64799299,
"memory(GiB)": 67.98,
"step": 4995,
"train_speed(iter/s)": 0.039694
},
{
"acc": 0.80209885,
"epoch": 3.43878954607978,
"grad_norm": 2.1484506130218506,
"learning_rate": 2.4379895212280297e-05,
"loss": 0.6714016,
"memory(GiB)": 67.98,
"step": 5000,
"train_speed(iter/s)": 0.039712
},
{
"epoch": 3.43878954607978,
"eval_acc": 0.7760493993791335,
"eval_loss": 0.7988596558570862,
"eval_runtime": 1141.4518,
"eval_samples_per_second": 3.752,
"eval_steps_per_second": 0.067,
"step": 5000
},
{
"acc": 0.80421772,
"epoch": 3.4422283356258596,
"grad_norm": 1.683592438697815,
"learning_rate": 2.428231910928358e-05,
"loss": 0.65520515,
"memory(GiB)": 67.98,
"step": 5005,
"train_speed(iter/s)": 0.039374
},
{
"acc": 0.80245571,
"epoch": 3.4456671251719393,
"grad_norm": 1.8841793537139893,
"learning_rate": 2.418487608317867e-05,
"loss": 0.67175484,
"memory(GiB)": 67.98,
"step": 5010,
"train_speed(iter/s)": 0.039392
},
{
"acc": 0.8125226,
"epoch": 3.4491059147180194,
"grad_norm": 1.949098825454712,
"learning_rate": 2.408756663808937e-05,
"loss": 0.61799521,
"memory(GiB)": 67.98,
"step": 5015,
"train_speed(iter/s)": 0.039412
},
{
"acc": 0.81072598,
"epoch": 3.452544704264099,
"grad_norm": 1.7873549461364746,
"learning_rate": 2.399039127744836e-05,
"loss": 0.64322014,
"memory(GiB)": 67.98,
"step": 5020,
"train_speed(iter/s)": 0.039425
},
{
"acc": 0.80515785,
"epoch": 3.4559834938101788,
"grad_norm": 2.0002734661102295,
"learning_rate": 2.389335050399464e-05,
"loss": 0.6395524,
"memory(GiB)": 67.98,
"step": 5025,
"train_speed(iter/s)": 0.039445
},
{
"acc": 0.80893536,
"epoch": 3.4594222833562585,
"grad_norm": 1.9517066478729248,
"learning_rate": 2.3796444819770926e-05,
"loss": 0.63445306,
"memory(GiB)": 67.98,
"step": 5030,
"train_speed(iter/s)": 0.03946
},
{
"acc": 0.79918771,
"epoch": 3.4628610729023386,
"grad_norm": 1.7724376916885376,
"learning_rate": 2.3699674726121022e-05,
"loss": 0.68629122,
"memory(GiB)": 67.98,
"step": 5035,
"train_speed(iter/s)": 0.039476
},
{
"acc": 0.80571623,
"epoch": 3.4662998624484183,
"grad_norm": 1.769455075263977,
"learning_rate": 2.3603040723687315e-05,
"loss": 0.65023713,
"memory(GiB)": 67.98,
"step": 5040,
"train_speed(iter/s)": 0.039495
},
{
"acc": 0.81652203,
"epoch": 3.469738651994498,
"grad_norm": 2.4029428958892822,
"learning_rate": 2.3506543312408055e-05,
"loss": 0.62751317,
"memory(GiB)": 67.98,
"step": 5045,
"train_speed(iter/s)": 0.039514
},
{
"acc": 0.8143034,
"epoch": 3.4731774415405776,
"grad_norm": 1.7803950309753418,
"learning_rate": 2.3410182991514863e-05,
"loss": 0.62447834,
"memory(GiB)": 67.98,
"step": 5050,
"train_speed(iter/s)": 0.039532
},
{
"acc": 0.81082478,
"epoch": 3.4766162310866573,
"grad_norm": 1.8696342706680298,
"learning_rate": 2.3313960259530114e-05,
"loss": 0.63704772,
"memory(GiB)": 67.98,
"step": 5055,
"train_speed(iter/s)": 0.039552
},
{
"acc": 0.81630154,
"epoch": 3.4800550206327374,
"grad_norm": 1.9919400215148926,
"learning_rate": 2.321787561426436e-05,
"loss": 0.61488199,
"memory(GiB)": 67.98,
"step": 5060,
"train_speed(iter/s)": 0.039568
},
{
"acc": 0.81280794,
"epoch": 3.483493810178817,
"grad_norm": 1.9915574789047241,
"learning_rate": 2.3121929552813775e-05,
"loss": 0.62114315,
"memory(GiB)": 67.98,
"step": 5065,
"train_speed(iter/s)": 0.039585
},
{
"acc": 0.80458755,
"epoch": 3.4869325997248968,
"grad_norm": 1.9132686853408813,
"learning_rate": 2.302612257155754e-05,
"loss": 0.63852549,
"memory(GiB)": 67.98,
"step": 5070,
"train_speed(iter/s)": 0.039601
},
{
"acc": 0.80228262,
"epoch": 3.4903713892709765,
"grad_norm": 1.638962745666504,
"learning_rate": 2.2930455166155325e-05,
"loss": 0.65759382,
"memory(GiB)": 67.98,
"step": 5075,
"train_speed(iter/s)": 0.039619
},
{
"acc": 0.81052542,
"epoch": 3.4938101788170566,
"grad_norm": 2.4375152587890625,
"learning_rate": 2.2834927831544663e-05,
"loss": 0.62842712,
"memory(GiB)": 67.98,
"step": 5080,
"train_speed(iter/s)": 0.039637
},
{
"acc": 0.81806412,
"epoch": 3.4972489683631363,
"grad_norm": 1.8711788654327393,
"learning_rate": 2.273954106193851e-05,
"loss": 0.59915447,
"memory(GiB)": 67.98,
"step": 5085,
"train_speed(iter/s)": 0.039654
},
{
"acc": 0.80885086,
"epoch": 3.500687757909216,
"grad_norm": 1.7943886518478394,
"learning_rate": 2.2644295350822523e-05,
"loss": 0.64677639,
"memory(GiB)": 67.98,
"step": 5090,
"train_speed(iter/s)": 0.03967
},
{
"acc": 0.80517883,
"epoch": 3.5041265474552956,
"grad_norm": 1.9428882598876953,
"learning_rate": 2.2549191190952614e-05,
"loss": 0.64541783,
"memory(GiB)": 67.98,
"step": 5095,
"train_speed(iter/s)": 0.039687
},
{
"acc": 0.82121677,
"epoch": 3.5075653370013757,
"grad_norm": 2.129689931869507,
"learning_rate": 2.245422907435237e-05,
"loss": 0.59930925,
"memory(GiB)": 67.98,
"step": 5100,
"train_speed(iter/s)": 0.039705
},
{
"epoch": 3.5075653370013757,
"eval_acc": 0.7766230260493994,
"eval_loss": 0.7989464998245239,
"eval_runtime": 1103.0791,
"eval_samples_per_second": 3.883,
"eval_steps_per_second": 0.07,
"step": 5100
},
{
"acc": 0.80364723,
"epoch": 3.5110041265474554,
"grad_norm": 1.949704885482788,
"learning_rate": 2.2359409492310554e-05,
"loss": 0.65982656,
"memory(GiB)": 72.17,
"step": 5105,
"train_speed(iter/s)": 45.411016
},
{
"acc": 0.81052856,
"epoch": 3.514442916093535,
"grad_norm": 1.766641616821289,
"learning_rate": 2.2264732935378485e-05,
"loss": 0.62573719,
"memory(GiB)": 72.17,
"step": 5110,
"train_speed(iter/s)": 26.201936
},
{
"acc": 0.81840916,
"epoch": 3.5178817056396148,
"grad_norm": 2.0052237510681152,
"learning_rate": 2.217019989336754e-05,
"loss": 0.60661297,
"memory(GiB)": 72.17,
"step": 5115,
"train_speed(iter/s)": 19.601314
},
{
"acc": 0.81169033,
"epoch": 3.5213204951856945,
"grad_norm": 1.8747566938400269,
"learning_rate": 2.2075810855346627e-05,
"loss": 0.6164432,
"memory(GiB)": 72.17,
"step": 5120,
"train_speed(iter/s)": 15.593037
},
{
"acc": 0.81197557,
"epoch": 3.5247592847317746,
"grad_norm": 1.8955270051956177,
"learning_rate": 2.1981566309639646e-05,
"loss": 0.63830528,
"memory(GiB)": 72.17,
"step": 5125,
"train_speed(iter/s)": 13.017298
},
{
"acc": 0.80265837,
"epoch": 3.5281980742778543,
"grad_norm": 1.9690247774124146,
"learning_rate": 2.1887466743822955e-05,
"loss": 0.66069555,
"memory(GiB)": 72.17,
"step": 5130,
"train_speed(iter/s)": 10.901481
},
{
"acc": 0.80450611,
"epoch": 3.531636863823934,
"grad_norm": 1.9993948936462402,
"learning_rate": 2.1793512644722865e-05,
"loss": 0.66204972,
"memory(GiB)": 72.23,
"step": 5135,
"train_speed(iter/s)": 9.338386
},
{
"acc": 0.80088081,
"epoch": 3.5350756533700136,
"grad_norm": 2.0130441188812256,
"learning_rate": 2.1699704498413108e-05,
"loss": 0.67445641,
"memory(GiB)": 72.23,
"step": 5140,
"train_speed(iter/s)": 8.21847
},
{
"acc": 0.80354471,
"epoch": 3.5385144429160933,
"grad_norm": 1.8594011068344116,
"learning_rate": 2.1606042790212308e-05,
"loss": 0.6569746,
"memory(GiB)": 72.28,
"step": 5145,
"train_speed(iter/s)": 7.396472
},
{
"acc": 0.817062,
"epoch": 3.5419532324621734,
"grad_norm": 1.8149155378341675,
"learning_rate": 2.1512528004681535e-05,
"loss": 0.63296041,
"memory(GiB)": 72.7,
"step": 5150,
"train_speed(iter/s)": 6.704915
},
{
"acc": 0.81256504,
"epoch": 3.545392022008253,
"grad_norm": 1.9153436422348022,
"learning_rate": 2.1419160625621713e-05,
"loss": 0.6270606,
"memory(GiB)": 72.7,
"step": 5155,
"train_speed(iter/s)": 6.15874
},
{
"acc": 0.80941086,
"epoch": 3.5488308115543328,
"grad_norm": 1.6500003337860107,
"learning_rate": 2.1325941136071155e-05,
"loss": 0.63835382,
"memory(GiB)": 72.7,
"step": 5160,
"train_speed(iter/s)": 5.671284
},
{
"acc": 0.81136417,
"epoch": 3.552269601100413,
"grad_norm": 1.8858124017715454,
"learning_rate": 2.1232870018303073e-05,
"loss": 0.63752775,
"memory(GiB)": 72.7,
"step": 5165,
"train_speed(iter/s)": 5.219987
},
{
"acc": 0.80491219,
"epoch": 3.5557083906464926,
"grad_norm": 1.6921783685684204,
"learning_rate": 2.1139947753823062e-05,
"loss": 0.64572196,
"memory(GiB)": 72.7,
"step": 5170,
"train_speed(iter/s)": 4.872821
},
{
"acc": 0.81005411,
"epoch": 3.5591471801925723,
"grad_norm": 2.046410322189331,
"learning_rate": 2.104717482336666e-05,
"loss": 0.63220901,
"memory(GiB)": 72.7,
"step": 5175,
"train_speed(iter/s)": 4.560666
},
{
"acc": 0.81424847,
"epoch": 3.562585969738652,
"grad_norm": 1.6710875034332275,
"learning_rate": 2.095455170689679e-05,
"loss": 0.61837616,
"memory(GiB)": 72.7,
"step": 5180,
"train_speed(iter/s)": 4.309485
},
{
"acc": 0.80108767,
"epoch": 3.5660247592847316,
"grad_norm": 1.8342450857162476,
"learning_rate": 2.0862078883601306e-05,
"loss": 0.65744696,
"memory(GiB)": 72.7,
"step": 5185,
"train_speed(iter/s)": 4.065786
},
{
"acc": 0.81536474,
"epoch": 3.5694635488308117,
"grad_norm": 1.8400901556015015,
"learning_rate": 2.0769756831890517e-05,
"loss": 0.60680361,
"memory(GiB)": 72.7,
"step": 5190,
"train_speed(iter/s)": 3.855262
},
{
"acc": 0.80920811,
"epoch": 3.5729023383768914,
"grad_norm": 2.1009435653686523,
"learning_rate": 2.067758602939473e-05,
"loss": 0.617168,
"memory(GiB)": 72.7,
"step": 5195,
"train_speed(iter/s)": 3.680601
},
{
"acc": 0.81185656,
"epoch": 3.576341127922971,
"grad_norm": 1.8651849031448364,
"learning_rate": 2.058556695296173e-05,
"loss": 0.62884312,
"memory(GiB)": 72.7,
"step": 5200,
"train_speed(iter/s)": 3.496681
},
{
"epoch": 3.576341127922971,
"eval_acc": 0.7778602600440905,
"eval_loss": 0.7939268350601196,
"eval_runtime": 1134.0299,
"eval_samples_per_second": 3.777,
"eval_steps_per_second": 0.068,
"step": 5200
},
{
"acc": 0.80170975,
"epoch": 3.5797799174690508,
"grad_norm": 1.989461064338684,
"learning_rate": 2.0493700078654395e-05,
"loss": 0.65876365,
"memory(GiB)": 72.7,
"step": 5205,
"train_speed(iter/s)": 1.934041
},
{
"acc": 0.81692247,
"epoch": 3.5832187070151305,
"grad_norm": 1.9536714553833008,
"learning_rate": 2.040198588174813e-05,
"loss": 0.60520372,
"memory(GiB)": 67.62,
"step": 5210,
"train_speed(iter/s)": 1.887787
},
{
"acc": 0.81152821,
"epoch": 3.5866574965612106,
"grad_norm": 1.9955531358718872,
"learning_rate": 2.0310424836728494e-05,
"loss": 0.64239225,
"memory(GiB)": 67.62,
"step": 5215,
"train_speed(iter/s)": 1.844883
},
{
"acc": 0.81309061,
"epoch": 3.5900962861072903,
"grad_norm": 1.7581534385681152,
"learning_rate": 2.0219017417288675e-05,
"loss": 0.62655144,
"memory(GiB)": 67.62,
"step": 5220,
"train_speed(iter/s)": 1.804023
},
{
"acc": 0.79863563,
"epoch": 3.59353507565337,
"grad_norm": 2.349116086959839,
"learning_rate": 2.0127764096327113e-05,
"loss": 0.6843668,
"memory(GiB)": 67.62,
"step": 5225,
"train_speed(iter/s)": 1.756569
},
{
"acc": 0.82426891,
"epoch": 3.59697386519945,
"grad_norm": 1.8200994729995728,
"learning_rate": 2.0036665345945005e-05,
"loss": 0.57460217,
"memory(GiB)": 67.62,
"step": 5230,
"train_speed(iter/s)": 1.719852
},
{
"acc": 0.81199923,
"epoch": 3.6004126547455297,
"grad_norm": 1.760864019393921,
"learning_rate": 1.9945721637443855e-05,
"loss": 0.63763566,
"memory(GiB)": 67.62,
"step": 5235,
"train_speed(iter/s)": 1.68241
},
{
"acc": 0.81704388,
"epoch": 3.6038514442916094,
"grad_norm": 1.843873143196106,
"learning_rate": 1.9854933441323074e-05,
"loss": 0.61490622,
"memory(GiB)": 67.62,
"step": 5240,
"train_speed(iter/s)": 1.645642
},
{
"acc": 0.80201912,
"epoch": 3.607290233837689,
"grad_norm": 1.963784098625183,
"learning_rate": 1.9764301227277503e-05,
"loss": 0.64649305,
"memory(GiB)": 67.62,
"step": 5245,
"train_speed(iter/s)": 1.609859
},
{
"acc": 0.79966879,
"epoch": 3.6107290233837688,
"grad_norm": 2.0832812786102295,
"learning_rate": 1.9673825464195065e-05,
"loss": 0.68630571,
"memory(GiB)": 67.62,
"step": 5250,
"train_speed(iter/s)": 1.577467
},
{
"acc": 0.80603333,
"epoch": 3.614167812929849,
"grad_norm": 2.1369543075561523,
"learning_rate": 1.9583506620154203e-05,
"loss": 0.65753994,
"memory(GiB)": 67.62,
"step": 5255,
"train_speed(iter/s)": 1.547258
},
{
"acc": 0.80580025,
"epoch": 3.6176066024759286,
"grad_norm": 1.853987455368042,
"learning_rate": 1.9493345162421595e-05,
"loss": 0.65103807,
"memory(GiB)": 67.62,
"step": 5260,
"train_speed(iter/s)": 1.514916
},
{
"acc": 0.81311512,
"epoch": 3.6210453920220083,
"grad_norm": 2.1064698696136475,
"learning_rate": 1.9403341557449614e-05,
"loss": 0.61463804,
"memory(GiB)": 67.62,
"step": 5265,
"train_speed(iter/s)": 1.48572
},
{
"acc": 0.79921217,
"epoch": 3.624484181568088,
"grad_norm": 1.9443074464797974,
"learning_rate": 1.9313496270874065e-05,
"loss": 0.67477508,
"memory(GiB)": 67.62,
"step": 5270,
"train_speed(iter/s)": 1.458339
},
{
"acc": 0.81266232,
"epoch": 3.6279229711141676,
"grad_norm": 1.8594951629638672,
"learning_rate": 1.9223809767511622e-05,
"loss": 0.62132969,
"memory(GiB)": 67.62,
"step": 5275,
"train_speed(iter/s)": 1.430819
},
{
"acc": 0.81770267,
"epoch": 3.6313617606602477,
"grad_norm": 1.726508617401123,
"learning_rate": 1.913428251135751e-05,
"loss": 0.59776912,
"memory(GiB)": 67.62,
"step": 5280,
"train_speed(iter/s)": 1.404985
},
{
"acc": 0.81523685,
"epoch": 3.6348005502063274,
"grad_norm": 1.8356785774230957,
"learning_rate": 1.904491496558308e-05,
"loss": 0.62854185,
"memory(GiB)": 67.62,
"step": 5285,
"train_speed(iter/s)": 1.381204
},
{
"acc": 0.81304836,
"epoch": 3.638239339752407,
"grad_norm": 2.129279136657715,
"learning_rate": 1.8955707592533422e-05,
"loss": 0.62155433,
"memory(GiB)": 67.62,
"step": 5290,
"train_speed(iter/s)": 1.359057
},
{
"acc": 0.80884018,
"epoch": 3.6416781292984868,
"grad_norm": 1.8221231698989868,
"learning_rate": 1.8866660853724986e-05,
"loss": 0.63217707,
"memory(GiB)": 67.62,
"step": 5295,
"train_speed(iter/s)": 1.33386
},
{
"acc": 0.81211977,
"epoch": 3.6451169188445665,
"grad_norm": 1.8867233991622925,
"learning_rate": 1.8777775209843136e-05,
"loss": 0.62917542,
"memory(GiB)": 67.62,
"step": 5300,
"train_speed(iter/s)": 1.313671
},
{
"epoch": 3.6451169188445665,
"eval_acc": 0.7788275520763036,
"eval_loss": 0.791822075843811,
"eval_runtime": 1053.4964,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.073,
"step": 5300
},
{
"acc": 0.81195221,
"epoch": 3.6485557083906466,
"grad_norm": 1.8668956756591797,
"learning_rate": 1.868905112073983e-05,
"loss": 0.63313217,
"memory(GiB)": 67.62,
"step": 5305,
"train_speed(iter/s)": 1.028242
},
{
"acc": 0.80295448,
"epoch": 3.6519944979367263,
"grad_norm": 1.8957765102386475,
"learning_rate": 1.8600489045431255e-05,
"loss": 0.6641448,
"memory(GiB)": 67.62,
"step": 5310,
"train_speed(iter/s)": 1.011268
},
{
"acc": 0.81735973,
"epoch": 3.655433287482806,
"grad_norm": 1.843002438545227,
"learning_rate": 1.851208944209535e-05,
"loss": 0.60693998,
"memory(GiB)": 67.62,
"step": 5315,
"train_speed(iter/s)": 0.999134
},
{
"acc": 0.80702572,
"epoch": 3.658872077028886,
"grad_norm": 1.8155903816223145,
"learning_rate": 1.8423852768069548e-05,
"loss": 0.65699286,
"memory(GiB)": 67.62,
"step": 5320,
"train_speed(iter/s)": 0.984903
},
{
"acc": 0.80685482,
"epoch": 3.6623108665749657,
"grad_norm": 2.5197625160217285,
"learning_rate": 1.8335779479848343e-05,
"loss": 0.64485803,
"memory(GiB)": 67.62,
"step": 5325,
"train_speed(iter/s)": 0.97156
},
{
"acc": 0.80205936,
"epoch": 3.6657496561210454,
"grad_norm": 2.0389351844787598,
"learning_rate": 1.8247870033080946e-05,
"loss": 0.66550064,
"memory(GiB)": 67.62,
"step": 5330,
"train_speed(iter/s)": 0.958682
},
{
"acc": 0.79223623,
"epoch": 3.669188445667125,
"grad_norm": 1.8488144874572754,
"learning_rate": 1.8160124882568932e-05,
"loss": 0.69218178,
"memory(GiB)": 67.62,
"step": 5335,
"train_speed(iter/s)": 0.946767
},
{
"acc": 0.80992165,
"epoch": 3.6726272352132048,
"grad_norm": 1.701180338859558,
"learning_rate": 1.8072544482263918e-05,
"loss": 0.63368897,
"memory(GiB)": 67.62,
"step": 5340,
"train_speed(iter/s)": 0.934657
},
{
"acc": 0.81273346,
"epoch": 3.676066024759285,
"grad_norm": 1.870936632156372,
"learning_rate": 1.798512928526514e-05,
"loss": 0.62342134,
"memory(GiB)": 67.62,
"step": 5345,
"train_speed(iter/s)": 0.922237
},
{
"acc": 0.81911898,
"epoch": 3.6795048143053646,
"grad_norm": 2.1875438690185547,
"learning_rate": 1.789787974381717e-05,
"loss": 0.60667896,
"memory(GiB)": 67.62,
"step": 5350,
"train_speed(iter/s)": 0.910869
},
{
"acc": 0.81839104,
"epoch": 3.6829436038514443,
"grad_norm": 1.9975168704986572,
"learning_rate": 1.7810796309307553e-05,
"loss": 0.61631479,
"memory(GiB)": 67.62,
"step": 5355,
"train_speed(iter/s)": 0.898788
},
{
"acc": 0.79996266,
"epoch": 3.686382393397524,
"grad_norm": 2.1356396675109863,
"learning_rate": 1.7723879432264454e-05,
"loss": 0.65718513,
"memory(GiB)": 67.62,
"step": 5360,
"train_speed(iter/s)": 0.888306
},
{
"acc": 0.81604223,
"epoch": 3.6898211829436036,
"grad_norm": 1.8731410503387451,
"learning_rate": 1.763712956235441e-05,
"loss": 0.62172794,
"memory(GiB)": 67.62,
"step": 5365,
"train_speed(iter/s)": 0.877305
},
{
"acc": 0.80391541,
"epoch": 3.6932599724896837,
"grad_norm": 2.0950632095336914,
"learning_rate": 1.7550547148379887e-05,
"loss": 0.66051216,
"memory(GiB)": 67.62,
"step": 5370,
"train_speed(iter/s)": 0.867559
},
{
"acc": 0.81647606,
"epoch": 3.6966987620357634,
"grad_norm": 1.7469427585601807,
"learning_rate": 1.7464132638277024e-05,
"loss": 0.61341143,
"memory(GiB)": 67.62,
"step": 5375,
"train_speed(iter/s)": 0.85733
},
{
"acc": 0.81001339,
"epoch": 3.700137551581843,
"grad_norm": 1.9832128286361694,
"learning_rate": 1.737788647911332e-05,
"loss": 0.63573427,
"memory(GiB)": 67.62,
"step": 5380,
"train_speed(iter/s)": 0.846179
},
{
"acc": 0.81297035,
"epoch": 3.703576341127923,
"grad_norm": 1.993898868560791,
"learning_rate": 1.72918091170853e-05,
"loss": 0.64141645,
"memory(GiB)": 67.62,
"step": 5385,
"train_speed(iter/s)": 0.837314
},
{
"acc": 0.81126728,
"epoch": 3.707015130674003,
"grad_norm": 1.8106107711791992,
"learning_rate": 1.72059009975162e-05,
"loss": 0.63114452,
"memory(GiB)": 67.62,
"step": 5390,
"train_speed(iter/s)": 0.827419
},
{
"acc": 0.80908537,
"epoch": 3.7104539202200826,
"grad_norm": 2.117880344390869,
"learning_rate": 1.71201625648537e-05,
"loss": 0.64524364,
"memory(GiB)": 67.62,
"step": 5395,
"train_speed(iter/s)": 0.818052
},
{
"acc": 0.80611687,
"epoch": 3.7138927097661623,
"grad_norm": 1.86283278465271,
"learning_rate": 1.7034594262667588e-05,
"loss": 0.65121384,
"memory(GiB)": 67.62,
"step": 5400,
"train_speed(iter/s)": 0.809997
},
{
"epoch": 3.7138927097661623,
"eval_acc": 0.7794068025374544,
"eval_loss": 0.7907042503356934,
"eval_runtime": 1176.8109,
"eval_samples_per_second": 3.639,
"eval_steps_per_second": 0.065,
"step": 5400
},
{
"acc": 0.80929089,
"epoch": 3.717331499312242,
"grad_norm": 2.175724983215332,
"learning_rate": 1.6949196533647456e-05,
"loss": 0.63896065,
"memory(GiB)": 67.62,
"step": 5405,
"train_speed(iter/s)": 0.682919
},
{
"acc": 0.80504618,
"epoch": 3.720770288858322,
"grad_norm": 1.7912895679473877,
"learning_rate": 1.6863969819600486e-05,
"loss": 0.6515821,
"memory(GiB)": 67.62,
"step": 5410,
"train_speed(iter/s)": 0.677418
},
{
"acc": 0.82038784,
"epoch": 3.7242090784044017,
"grad_norm": 1.9168109893798828,
"learning_rate": 1.6778914561449068e-05,
"loss": 0.60445056,
"memory(GiB)": 67.62,
"step": 5415,
"train_speed(iter/s)": 0.672424
},
{
"acc": 0.80365715,
"epoch": 3.7276478679504814,
"grad_norm": 2.0032663345336914,
"learning_rate": 1.669403119922857e-05,
"loss": 0.65206861,
"memory(GiB)": 67.62,
"step": 5420,
"train_speed(iter/s)": 0.666879
},
{
"acc": 0.81271191,
"epoch": 3.731086657496561,
"grad_norm": 1.7971467971801758,
"learning_rate": 1.660932017208504e-05,
"loss": 0.63001757,
"memory(GiB)": 67.62,
"step": 5425,
"train_speed(iter/s)": 0.661349
},
{
"acc": 0.80627918,
"epoch": 3.7345254470426408,
"grad_norm": 2.1404869556427,
"learning_rate": 1.6524781918272988e-05,
"loss": 0.65701981,
"memory(GiB)": 67.62,
"step": 5430,
"train_speed(iter/s)": 0.656674
},
{
"acc": 0.81756916,
"epoch": 3.737964236588721,
"grad_norm": 1.9488438367843628,
"learning_rate": 1.6440416875153035e-05,
"loss": 0.62909493,
"memory(GiB)": 67.62,
"step": 5435,
"train_speed(iter/s)": 0.651758
},
{
"acc": 0.81080599,
"epoch": 3.7414030261348006,
"grad_norm": 1.9031460285186768,
"learning_rate": 1.6356225479189706e-05,
"loss": 0.64159656,
"memory(GiB)": 67.62,
"step": 5440,
"train_speed(iter/s)": 0.647121
},
{
"acc": 0.80497589,
"epoch": 3.7448418156808803,
"grad_norm": 1.9063955545425415,
"learning_rate": 1.6272208165949165e-05,
"loss": 0.66333132,
"memory(GiB)": 67.62,
"step": 5445,
"train_speed(iter/s)": 0.641945
},
{
"acc": 0.81537628,
"epoch": 3.7482806052269604,
"grad_norm": 1.9544923305511475,
"learning_rate": 1.6188365370096938e-05,
"loss": 0.60649881,
"memory(GiB)": 67.62,
"step": 5450,
"train_speed(iter/s)": 0.637092
},
{
"acc": 0.81485357,
"epoch": 3.7517193947730396,
"grad_norm": 1.7963929176330566,
"learning_rate": 1.61046975253957e-05,
"loss": 0.62127781,
"memory(GiB)": 67.62,
"step": 5455,
"train_speed(iter/s)": 0.632455
},
{
"acc": 0.80606298,
"epoch": 3.7551581843191197,
"grad_norm": 1.91194748878479,
"learning_rate": 1.6021205064703e-05,
"loss": 0.6456295,
"memory(GiB)": 67.62,
"step": 5460,
"train_speed(iter/s)": 0.627638
},
{
"acc": 0.80508499,
"epoch": 3.7585969738651994,
"grad_norm": 1.6945174932479858,
"learning_rate": 1.593788841996904e-05,
"loss": 0.64310069,
"memory(GiB)": 67.62,
"step": 5465,
"train_speed(iter/s)": 0.622963
},
{
"acc": 0.82610073,
"epoch": 3.762035763411279,
"grad_norm": 1.6806504726409912,
"learning_rate": 1.5854748022234422e-05,
"loss": 0.57846365,
"memory(GiB)": 67.62,
"step": 5470,
"train_speed(iter/s)": 0.619076
},
{
"acc": 0.80571289,
"epoch": 3.7654745529573592,
"grad_norm": 1.9896758794784546,
"learning_rate": 1.5771784301627968e-05,
"loss": 0.64995089,
"memory(GiB)": 67.62,
"step": 5475,
"train_speed(iter/s)": 0.614402
},
{
"acc": 0.81692181,
"epoch": 3.768913342503439,
"grad_norm": 1.7746247053146362,
"learning_rate": 1.5688997687364408e-05,
"loss": 0.61731248,
"memory(GiB)": 67.62,
"step": 5480,
"train_speed(iter/s)": 0.610536
},
{
"acc": 0.79758596,
"epoch": 3.7723521320495186,
"grad_norm": 1.9613304138183594,
"learning_rate": 1.560638860774223e-05,
"loss": 0.66896119,
"memory(GiB)": 67.62,
"step": 5485,
"train_speed(iter/s)": 0.605861
},
{
"acc": 0.80816298,
"epoch": 3.7757909215955983,
"grad_norm": 1.7979682683944702,
"learning_rate": 1.552395749014145e-05,
"loss": 0.64903908,
"memory(GiB)": 67.62,
"step": 5490,
"train_speed(iter/s)": 0.601267
},
{
"acc": 0.81845226,
"epoch": 3.779229711141678,
"grad_norm": 1.610510230064392,
"learning_rate": 1.5441704761021365e-05,
"loss": 0.61122522,
"memory(GiB)": 67.62,
"step": 5495,
"train_speed(iter/s)": 0.59685
},
{
"acc": 0.80607834,
"epoch": 3.782668500687758,
"grad_norm": 1.8088189363479614,
"learning_rate": 1.535963084591842e-05,
"loss": 0.6456028,
"memory(GiB)": 67.62,
"step": 5500,
"train_speed(iter/s)": 0.59234
},
{
"epoch": 3.782668500687758,
"eval_acc": 0.7795867638457732,
"eval_loss": 0.7851858735084534,
"eval_runtime": 1107.2216,
"eval_samples_per_second": 3.868,
"eval_steps_per_second": 0.07,
"step": 5500
},
{
"acc": 0.80722027,
"epoch": 3.7861072902338377,
"grad_norm": 1.7930651903152466,
"learning_rate": 1.527773616944393e-05,
"loss": 0.65197091,
"memory(GiB)": 67.62,
"step": 5505,
"train_speed(iter/s)": 0.525895
},
{
"acc": 0.80907288,
"epoch": 3.7895460797799174,
"grad_norm": 1.873205542564392,
"learning_rate": 1.519602115528191e-05,
"loss": 0.63936815,
"memory(GiB)": 67.62,
"step": 5510,
"train_speed(iter/s)": 0.522195
},
{
"acc": 0.81462736,
"epoch": 3.792984869325997,
"grad_norm": 2.1219732761383057,
"learning_rate": 1.5114486226186914e-05,
"loss": 0.63517313,
"memory(GiB)": 67.62,
"step": 5515,
"train_speed(iter/s)": 0.518863
},
{
"acc": 0.81379719,
"epoch": 3.796423658872077,
"grad_norm": 1.8798179626464844,
"learning_rate": 1.5033131803981795e-05,
"loss": 0.6165091,
"memory(GiB)": 67.62,
"step": 5520,
"train_speed(iter/s)": 0.516156
},
{
"acc": 0.80504332,
"epoch": 3.799862448418157,
"grad_norm": 2.1897356510162354,
"learning_rate": 1.495195830955555e-05,
"loss": 0.65493903,
"memory(GiB)": 67.62,
"step": 5525,
"train_speed(iter/s)": 0.512721
},
{
"acc": 0.79971151,
"epoch": 3.8033012379642366,
"grad_norm": 2.3374557495117188,
"learning_rate": 1.4870966162861185e-05,
"loss": 0.66825953,
"memory(GiB)": 67.62,
"step": 5530,
"train_speed(iter/s)": 0.509778
},
{
"acc": 0.81023417,
"epoch": 3.8067400275103163,
"grad_norm": 2.0296730995178223,
"learning_rate": 1.4790155782913446e-05,
"loss": 0.6293088,
"memory(GiB)": 67.62,
"step": 5535,
"train_speed(iter/s)": 0.506899
},
{
"acc": 0.81744757,
"epoch": 3.8101788170563964,
"grad_norm": 2.1950666904449463,
"learning_rate": 1.4709527587786729e-05,
"loss": 0.60644913,
"memory(GiB)": 67.62,
"step": 5540,
"train_speed(iter/s)": 0.504436
},
{
"acc": 0.80740032,
"epoch": 3.813617606602476,
"grad_norm": 1.870073676109314,
"learning_rate": 1.4629081994612883e-05,
"loss": 0.65674248,
"memory(GiB)": 67.62,
"step": 5545,
"train_speed(iter/s)": 0.501407
},
{
"acc": 0.82541618,
"epoch": 3.8170563961485557,
"grad_norm": 1.814864993095398,
"learning_rate": 1.4548819419579082e-05,
"loss": 0.59056787,
"memory(GiB)": 67.62,
"step": 5550,
"train_speed(iter/s)": 0.49869
},
{
"acc": 0.79932752,
"epoch": 3.8204951856946354,
"grad_norm": 2.118622303009033,
"learning_rate": 1.4468740277925627e-05,
"loss": 0.67586517,
"memory(GiB)": 67.62,
"step": 5555,
"train_speed(iter/s)": 0.496257
},
{
"acc": 0.80807095,
"epoch": 3.823933975240715,
"grad_norm": 2.1060431003570557,
"learning_rate": 1.4388844983943837e-05,
"loss": 0.64639549,
"memory(GiB)": 67.62,
"step": 5560,
"train_speed(iter/s)": 0.493446
},
{
"acc": 0.80921745,
"epoch": 3.8273727647867952,
"grad_norm": 1.9443578720092773,
"learning_rate": 1.430913395097388e-05,
"loss": 0.63667898,
"memory(GiB)": 67.62,
"step": 5565,
"train_speed(iter/s)": 0.490752
},
{
"acc": 0.82145481,
"epoch": 3.830811554332875,
"grad_norm": 2.1125001907348633,
"learning_rate": 1.4229607591402635e-05,
"loss": 0.58884673,
"memory(GiB)": 67.62,
"step": 5570,
"train_speed(iter/s)": 0.487797
},
{
"acc": 0.82449379,
"epoch": 3.8342503438789546,
"grad_norm": 1.7490825653076172,
"learning_rate": 1.4150266316661623e-05,
"loss": 0.60519004,
"memory(GiB)": 67.62,
"step": 5575,
"train_speed(iter/s)": 0.485346
},
{
"acc": 0.81672592,
"epoch": 3.8376891334250343,
"grad_norm": 1.6957894563674927,
"learning_rate": 1.407111053722477e-05,
"loss": 0.61075163,
"memory(GiB)": 67.62,
"step": 5580,
"train_speed(iter/s)": 0.482577
},
{
"acc": 0.79658046,
"epoch": 3.841127922971114,
"grad_norm": 2.5719101428985596,
"learning_rate": 1.3992140662606357e-05,
"loss": 0.67197566,
"memory(GiB)": 67.62,
"step": 5585,
"train_speed(iter/s)": 0.480134
},
{
"acc": 0.82320576,
"epoch": 3.844566712517194,
"grad_norm": 2.0491995811462402,
"learning_rate": 1.3913357101358865e-05,
"loss": 0.59475327,
"memory(GiB)": 67.62,
"step": 5590,
"train_speed(iter/s)": 0.477933
},
{
"acc": 0.81174135,
"epoch": 3.8480055020632737,
"grad_norm": 1.8027801513671875,
"learning_rate": 1.3834760261070908e-05,
"loss": 0.63737803,
"memory(GiB)": 67.62,
"step": 5595,
"train_speed(iter/s)": 0.475151
},
{
"acc": 0.81277132,
"epoch": 3.8514442916093534,
"grad_norm": 2.0076797008514404,
"learning_rate": 1.3756350548365069e-05,
"loss": 0.64119816,
"memory(GiB)": 67.62,
"step": 5600,
"train_speed(iter/s)": 0.472763
},
{
"epoch": 3.8514442916093534,
"eval_acc": 0.7802222522157736,
"eval_loss": 0.7851279377937317,
"eval_runtime": 1153.3756,
"eval_samples_per_second": 3.713,
"eval_steps_per_second": 0.067,
"step": 5600
},
{
"acc": 0.79907894,
"epoch": 3.8548830811554335,
"grad_norm": 2.066263437271118,
"learning_rate": 1.3678128368895824e-05,
"loss": 0.66954241,
"memory(GiB)": 67.62,
"step": 5605,
"train_speed(iter/s)": 0.428917
},
{
"acc": 0.81123543,
"epoch": 3.8583218707015132,
"grad_norm": 1.8212432861328125,
"learning_rate": 1.3600094127347462e-05,
"loss": 0.64494739,
"memory(GiB)": 67.62,
"step": 5610,
"train_speed(iter/s)": 0.427046
},
{
"acc": 0.8146841,
"epoch": 3.861760660247593,
"grad_norm": 1.99728524684906,
"learning_rate": 1.3522248227431972e-05,
"loss": 0.61559277,
"memory(GiB)": 67.62,
"step": 5615,
"train_speed(iter/s)": 0.42489
},
{
"acc": 0.8208971,
"epoch": 3.8651994497936726,
"grad_norm": 2.206382989883423,
"learning_rate": 1.3444591071886931e-05,
"loss": 0.61427069,
"memory(GiB)": 67.62,
"step": 5620,
"train_speed(iter/s)": 0.423148
},
{
"acc": 0.81454735,
"epoch": 3.8686382393397523,
"grad_norm": 1.812099575996399,
"learning_rate": 1.3367123062473446e-05,
"loss": 0.62899446,
"memory(GiB)": 67.62,
"step": 5625,
"train_speed(iter/s)": 0.421338
},
{
"acc": 0.81133175,
"epoch": 3.8720770288858324,
"grad_norm": 1.8373388051986694,
"learning_rate": 1.328984459997408e-05,
"loss": 0.63102517,
"memory(GiB)": 67.62,
"step": 5630,
"train_speed(iter/s)": 0.419334
},
{
"acc": 0.80838804,
"epoch": 3.875515818431912,
"grad_norm": 1.7026041746139526,
"learning_rate": 1.3212756084190767e-05,
"loss": 0.63373623,
"memory(GiB)": 67.62,
"step": 5635,
"train_speed(iter/s)": 0.417529
},
{
"acc": 0.81233072,
"epoch": 3.8789546079779917,
"grad_norm": 1.80439293384552,
"learning_rate": 1.313585791394274e-05,
"loss": 0.63350501,
"memory(GiB)": 67.62,
"step": 5640,
"train_speed(iter/s)": 0.415882
},
{
"acc": 0.80420494,
"epoch": 3.8823933975240714,
"grad_norm": 1.835792064666748,
"learning_rate": 1.3059150487064497e-05,
"loss": 0.64182324,
"memory(GiB)": 67.62,
"step": 5645,
"train_speed(iter/s)": 0.413931
},
{
"acc": 0.82478485,
"epoch": 3.885832187070151,
"grad_norm": 2.1019296646118164,
"learning_rate": 1.2982634200403704e-05,
"loss": 0.57977004,
"memory(GiB)": 67.62,
"step": 5650,
"train_speed(iter/s)": 0.41225
},
{
"acc": 0.82099762,
"epoch": 3.8892709766162312,
"grad_norm": 1.7367315292358398,
"learning_rate": 1.2906309449819154e-05,
"loss": 0.60107656,
"memory(GiB)": 67.62,
"step": 5655,
"train_speed(iter/s)": 0.410363
},
{
"acc": 0.81159325,
"epoch": 3.892709766162311,
"grad_norm": 2.0694830417633057,
"learning_rate": 1.2830176630178729e-05,
"loss": 0.61608582,
"memory(GiB)": 67.62,
"step": 5660,
"train_speed(iter/s)": 0.408617
},
{
"acc": 0.81860466,
"epoch": 3.8961485557083906,
"grad_norm": 1.9818027019500732,
"learning_rate": 1.2754236135357367e-05,
"loss": 0.60277052,
"memory(GiB)": 67.62,
"step": 5665,
"train_speed(iter/s)": 0.40679
},
{
"acc": 0.8118084,
"epoch": 3.8995873452544703,
"grad_norm": 1.893306016921997,
"learning_rate": 1.2678488358234992e-05,
"loss": 0.64575768,
"memory(GiB)": 67.62,
"step": 5670,
"train_speed(iter/s)": 0.405098
},
{
"acc": 0.80856295,
"epoch": 3.90302613480055,
"grad_norm": 1.9855684041976929,
"learning_rate": 1.2602933690694502e-05,
"loss": 0.65475564,
"memory(GiB)": 67.62,
"step": 5675,
"train_speed(iter/s)": 0.403391
},
{
"acc": 0.82089319,
"epoch": 3.90646492434663,
"grad_norm": 1.8527436256408691,
"learning_rate": 1.2527572523619729e-05,
"loss": 0.59858413,
"memory(GiB)": 67.62,
"step": 5680,
"train_speed(iter/s)": 0.401656
},
{
"acc": 0.81135502,
"epoch": 3.9099037138927097,
"grad_norm": 1.8112705945968628,
"learning_rate": 1.245240524689345e-05,
"loss": 0.640869,
"memory(GiB)": 67.62,
"step": 5685,
"train_speed(iter/s)": 0.399915
},
{
"acc": 0.81638031,
"epoch": 3.9133425034387894,
"grad_norm": 1.88164222240448,
"learning_rate": 1.2377432249395323e-05,
"loss": 0.62925024,
"memory(GiB)": 67.62,
"step": 5690,
"train_speed(iter/s)": 0.398338
},
{
"acc": 0.81665897,
"epoch": 3.9167812929848695,
"grad_norm": 2.2220370769500732,
"learning_rate": 1.2302653918999902e-05,
"loss": 0.61042566,
"memory(GiB)": 67.62,
"step": 5695,
"train_speed(iter/s)": 0.396628
},
{
"acc": 0.81146564,
"epoch": 3.9202200825309492,
"grad_norm": 2.2208054065704346,
"learning_rate": 1.2228070642574637e-05,
"loss": 0.62549958,
"memory(GiB)": 67.62,
"step": 5700,
"train_speed(iter/s)": 0.394844
},
{
"epoch": 3.9202200825309492,
"eval_acc": 0.7811276825482522,
"eval_loss": 0.783173680305481,
"eval_runtime": 1145.264,
"eval_samples_per_second": 3.74,
"eval_steps_per_second": 0.067,
"step": 5700
},
{
"acc": 0.82111177,
"epoch": 3.923658872077029,
"grad_norm": 2.1147096157073975,
"learning_rate": 1.2153682805977849e-05,
"loss": 0.61029615,
"memory(GiB)": 67.62,
"step": 5705,
"train_speed(iter/s)": 0.364527
},
{
"acc": 0.81873646,
"epoch": 3.9270976616231086,
"grad_norm": 1.870378851890564,
"learning_rate": 1.2079490794056745e-05,
"loss": 0.60247025,
"memory(GiB)": 67.62,
"step": 5710,
"train_speed(iter/s)": 0.363318
},
{
"acc": 0.8050211,
"epoch": 3.9305364511691883,
"grad_norm": 2.061549663543701,
"learning_rate": 1.2005494990645446e-05,
"loss": 0.64639635,
"memory(GiB)": 67.62,
"step": 5715,
"train_speed(iter/s)": 0.361877
},
{
"acc": 0.80292168,
"epoch": 3.9339752407152684,
"grad_norm": 2.088428020477295,
"learning_rate": 1.1931695778562984e-05,
"loss": 0.66072493,
"memory(GiB)": 67.62,
"step": 5720,
"train_speed(iter/s)": 0.360551
},
{
"acc": 0.81333771,
"epoch": 3.937414030261348,
"grad_norm": 2.195223093032837,
"learning_rate": 1.1858093539611302e-05,
"loss": 0.62468419,
"memory(GiB)": 67.62,
"step": 5725,
"train_speed(iter/s)": 0.359367
},
{
"acc": 0.80784473,
"epoch": 3.9408528198074277,
"grad_norm": 2.1771881580352783,
"learning_rate": 1.1784688654573306e-05,
"loss": 0.6561008,
"memory(GiB)": 67.62,
"step": 5730,
"train_speed(iter/s)": 0.35807
},
{
"acc": 0.81136761,
"epoch": 3.9442916093535074,
"grad_norm": 1.9094853401184082,
"learning_rate": 1.1711481503210884e-05,
"loss": 0.63656788,
"memory(GiB)": 67.62,
"step": 5735,
"train_speed(iter/s)": 0.356785
},
{
"acc": 0.81901407,
"epoch": 3.947730398899587,
"grad_norm": 1.9423341751098633,
"learning_rate": 1.1638472464262948e-05,
"loss": 0.61632404,
"memory(GiB)": 67.62,
"step": 5740,
"train_speed(iter/s)": 0.355531
},
{
"acc": 0.81649647,
"epoch": 3.9511691884456672,
"grad_norm": 1.8900690078735352,
"learning_rate": 1.1565661915443475e-05,
"loss": 0.61735368,
"memory(GiB)": 67.62,
"step": 5745,
"train_speed(iter/s)": 0.354181
},
{
"acc": 0.80203295,
"epoch": 3.954607977991747,
"grad_norm": 1.9980183839797974,
"learning_rate": 1.1493050233439526e-05,
"loss": 0.66276655,
"memory(GiB)": 67.62,
"step": 5750,
"train_speed(iter/s)": 0.352959
},
{
"acc": 0.81161861,
"epoch": 3.9580467675378266,
"grad_norm": 1.8814200162887573,
"learning_rate": 1.1420637793909362e-05,
"loss": 0.64876308,
"memory(GiB)": 67.62,
"step": 5755,
"train_speed(iter/s)": 0.351641
},
{
"acc": 0.80822277,
"epoch": 3.9614855570839067,
"grad_norm": 2.157858371734619,
"learning_rate": 1.1348424971480429e-05,
"loss": 0.64273562,
"memory(GiB)": 67.62,
"step": 5760,
"train_speed(iter/s)": 0.350261
},
{
"acc": 0.81014862,
"epoch": 3.9649243466299864,
"grad_norm": 1.8866498470306396,
"learning_rate": 1.1276412139747452e-05,
"loss": 0.63060379,
"memory(GiB)": 67.62,
"step": 5765,
"train_speed(iter/s)": 0.34897
},
{
"acc": 0.80797882,
"epoch": 3.968363136176066,
"grad_norm": 2.011620283126831,
"learning_rate": 1.1204599671270494e-05,
"loss": 0.64154892,
"memory(GiB)": 67.62,
"step": 5770,
"train_speed(iter/s)": 0.347683
},
{
"acc": 0.81029825,
"epoch": 3.9718019257221457,
"grad_norm": 2.1030616760253906,
"learning_rate": 1.1132987937573052e-05,
"loss": 0.62338347,
"memory(GiB)": 67.62,
"step": 5775,
"train_speed(iter/s)": 0.346438
},
{
"acc": 0.7949429,
"epoch": 3.9752407152682254,
"grad_norm": 2.054006338119507,
"learning_rate": 1.1061577309140098e-05,
"loss": 0.70458865,
"memory(GiB)": 67.62,
"step": 5780,
"train_speed(iter/s)": 0.345233
},
{
"acc": 0.8113884,
"epoch": 3.9786795048143055,
"grad_norm": 2.072899103164673,
"learning_rate": 1.0990368155416202e-05,
"loss": 0.63724394,
"memory(GiB)": 67.62,
"step": 5785,
"train_speed(iter/s)": 0.344106
},
{
"acc": 0.81157551,
"epoch": 3.9821182943603852,
"grad_norm": 1.9557698965072632,
"learning_rate": 1.091936084480358e-05,
"loss": 0.62347059,
"memory(GiB)": 67.62,
"step": 5790,
"train_speed(iter/s)": 0.342935
},
{
"acc": 0.81167564,
"epoch": 3.985557083906465,
"grad_norm": 1.9136029481887817,
"learning_rate": 1.0848555744660215e-05,
"loss": 0.61960039,
"memory(GiB)": 67.62,
"step": 5795,
"train_speed(iter/s)": 0.341839
},
{
"acc": 0.83220634,
"epoch": 3.9889958734525446,
"grad_norm": 1.9021817445755005,
"learning_rate": 1.0777953221297932e-05,
"loss": 0.56068201,
"memory(GiB)": 67.62,
"step": 5800,
"train_speed(iter/s)": 0.340892
},
{
"epoch": 3.9889958734525446,
"eval_acc": 0.7817744185000225,
"eval_loss": 0.7804912328720093,
"eval_runtime": 1111.4903,
"eval_samples_per_second": 3.853,
"eval_steps_per_second": 0.069,
"step": 5800
},
{
"acc": 0.80734158,
"epoch": 3.9924346629986243,
"grad_norm": 2.02614688873291,
"learning_rate": 1.0707553639980585e-05,
"loss": 0.64991465,
"memory(GiB)": 67.62,
"step": 5805,
"train_speed(iter/s)": 0.319008
},
{
"acc": 0.82151909,
"epoch": 3.9958734525447044,
"grad_norm": 2.07773494720459,
"learning_rate": 1.0637357364922026e-05,
"loss": 0.58141608,
"memory(GiB)": 67.62,
"step": 5810,
"train_speed(iter/s)": 0.318131
},
{
"acc": 0.81823015,
"epoch": 3.999312242090784,
"grad_norm": 1.7081282138824463,
"learning_rate": 1.0567364759284327e-05,
"loss": 0.61670866,
"memory(GiB)": 67.62,
"step": 5815,
"train_speed(iter/s)": 0.317189
},
{
"acc": 0.82516012,
"epoch": 4.002751031636864,
"grad_norm": 1.9197478294372559,
"learning_rate": 1.0497576185175877e-05,
"loss": 0.57296357,
"memory(GiB)": 67.62,
"step": 5820,
"train_speed(iter/s)": 0.315614
},
{
"acc": 0.82848129,
"epoch": 4.006189821182944,
"grad_norm": 1.7602168321609497,
"learning_rate": 1.042799200364949e-05,
"loss": 0.57674851,
"memory(GiB)": 67.62,
"step": 5825,
"train_speed(iter/s)": 0.314514
},
{
"acc": 0.8312006,
"epoch": 4.009628610729023,
"grad_norm": 1.904069423675537,
"learning_rate": 1.0358612574700576e-05,
"loss": 0.58140912,
"memory(GiB)": 67.62,
"step": 5830,
"train_speed(iter/s)": 0.313493
},
{
"acc": 0.83973274,
"epoch": 4.013067400275103,
"grad_norm": 1.8969364166259766,
"learning_rate": 1.0289438257265218e-05,
"loss": 0.5239769,
"memory(GiB)": 67.62,
"step": 5835,
"train_speed(iter/s)": 0.3126
},
{
"acc": 0.82053461,
"epoch": 4.016506189821183,
"grad_norm": 2.004246950149536,
"learning_rate": 1.0220469409218385e-05,
"loss": 0.58951969,
"memory(GiB)": 67.62,
"step": 5840,
"train_speed(iter/s)": 0.311738
},
{
"acc": 0.82730808,
"epoch": 4.019944979367263,
"grad_norm": 2.120168685913086,
"learning_rate": 1.0151706387371993e-05,
"loss": 0.57174788,
"memory(GiB)": 67.62,
"step": 5845,
"train_speed(iter/s)": 0.310754
},
{
"acc": 0.81913013,
"epoch": 4.023383768913343,
"grad_norm": 2.083112955093384,
"learning_rate": 1.008314954747319e-05,
"loss": 0.60139389,
"memory(GiB)": 67.62,
"step": 5850,
"train_speed(iter/s)": 0.309795
},
{
"acc": 0.82976856,
"epoch": 4.026822558459422,
"grad_norm": 1.9144500494003296,
"learning_rate": 1.0014799244202362e-05,
"loss": 0.56441569,
"memory(GiB)": 67.62,
"step": 5855,
"train_speed(iter/s)": 0.308991
},
{
"acc": 0.82570667,
"epoch": 4.030261348005502,
"grad_norm": 1.9201669692993164,
"learning_rate": 9.94665583117142e-06,
"loss": 0.58550615,
"memory(GiB)": 67.62,
"step": 5860,
"train_speed(iter/s)": 0.308024
},
{
"acc": 0.82395906,
"epoch": 4.033700137551582,
"grad_norm": 2.058741807937622,
"learning_rate": 9.878719660921893e-06,
"loss": 0.59208636,
"memory(GiB)": 67.62,
"step": 5865,
"train_speed(iter/s)": 0.30702
},
{
"acc": 0.82681818,
"epoch": 4.037138927097661,
"grad_norm": 2.0860073566436768,
"learning_rate": 9.810991084923154e-06,
"loss": 0.57163272,
"memory(GiB)": 67.62,
"step": 5870,
"train_speed(iter/s)": 0.306194
},
{
"acc": 0.81524105,
"epoch": 4.0405777166437415,
"grad_norm": 1.9567036628723145,
"learning_rate": 9.743470453570575e-06,
"loss": 0.62305789,
"memory(GiB)": 67.62,
"step": 5875,
"train_speed(iter/s)": 0.305279
},
{
"acc": 0.81588497,
"epoch": 4.044016506189821,
"grad_norm": 2.048231601715088,
"learning_rate": 9.676158116183729e-06,
"loss": 0.60361052,
"memory(GiB)": 67.62,
"step": 5880,
"train_speed(iter/s)": 0.304326
},
{
"acc": 0.82715149,
"epoch": 4.047455295735901,
"grad_norm": 1.918243408203125,
"learning_rate": 9.609054421004562e-06,
"loss": 0.56623569,
"memory(GiB)": 67.62,
"step": 5885,
"train_speed(iter/s)": 0.303437
},
{
"acc": 0.82016706,
"epoch": 4.050894085281981,
"grad_norm": 2.212838888168335,
"learning_rate": 9.542159715195614e-06,
"loss": 0.60472922,
"memory(GiB)": 67.62,
"step": 5890,
"train_speed(iter/s)": 0.302387
},
{
"acc": 0.82020359,
"epoch": 4.05433287482806,
"grad_norm": 2.029686450958252,
"learning_rate": 9.475474344838204e-06,
"loss": 0.59589596,
"memory(GiB)": 67.62,
"step": 5895,
"train_speed(iter/s)": 0.301428
},
{
"acc": 0.82250319,
"epoch": 4.05777166437414,
"grad_norm": 2.0857136249542236,
"learning_rate": 9.408998654930675e-06,
"loss": 0.59207001,
"memory(GiB)": 67.62,
"step": 5900,
"train_speed(iter/s)": 0.300628
},
{
"epoch": 4.05777166437414,
"eval_acc": 0.7809758401943582,
"eval_loss": 0.7950036525726318,
"eval_runtime": 1140.5258,
"eval_samples_per_second": 3.755,
"eval_steps_per_second": 0.068,
"step": 5900
},
{
"acc": 0.83261538,
"epoch": 4.0612104539202205,
"grad_norm": 2.129284143447876,
"learning_rate": 9.342732989386557e-06,
"loss": 0.54631634,
"memory(GiB)": 67.62,
"step": 5905,
"train_speed(iter/s)": 0.283416
},
{
"acc": 0.82259159,
"epoch": 4.0646492434663,
"grad_norm": 2.0771372318267822,
"learning_rate": 9.27667769103282e-06,
"loss": 0.59988642,
"memory(GiB)": 67.62,
"step": 5910,
"train_speed(iter/s)": 0.282658
},
{
"acc": 0.82938833,
"epoch": 4.06808803301238,
"grad_norm": 2.0288455486297607,
"learning_rate": 9.210833101608094e-06,
"loss": 0.56707897,
"memory(GiB)": 67.62,
"step": 5915,
"train_speed(iter/s)": 0.281964
},
{
"acc": 0.81752338,
"epoch": 4.071526822558459,
"grad_norm": 2.1337034702301025,
"learning_rate": 9.145199561760913e-06,
"loss": 0.58798003,
"memory(GiB)": 67.62,
"step": 5920,
"train_speed(iter/s)": 0.281194
},
{
"acc": 0.83025227,
"epoch": 4.074965612104539,
"grad_norm": 1.9078054428100586,
"learning_rate": 9.079777411047923e-06,
"loss": 0.55221009,
"memory(GiB)": 67.62,
"step": 5925,
"train_speed(iter/s)": 0.280374
},
{
"acc": 0.82038937,
"epoch": 4.078404401650619,
"grad_norm": 2.1154861450195312,
"learning_rate": 9.014566987932155e-06,
"loss": 0.58884945,
"memory(GiB)": 67.62,
"step": 5930,
"train_speed(iter/s)": 0.279665
},
{
"acc": 0.83256464,
"epoch": 4.081843191196699,
"grad_norm": 2.46669602394104,
"learning_rate": 8.949568629781233e-06,
"loss": 0.55993681,
"memory(GiB)": 67.62,
"step": 5935,
"train_speed(iter/s)": 0.279022
},
{
"acc": 0.82162399,
"epoch": 4.085281980742779,
"grad_norm": 2.2108795642852783,
"learning_rate": 8.884782672865745e-06,
"loss": 0.58439035,
"memory(GiB)": 67.62,
"step": 5940,
"train_speed(iter/s)": 0.278353
},
{
"acc": 0.81261024,
"epoch": 4.088720770288858,
"grad_norm": 2.3239004611968994,
"learning_rate": 8.820209452357312e-06,
"loss": 0.62102919,
"memory(GiB)": 67.62,
"step": 5945,
"train_speed(iter/s)": 0.277507
},
{
"acc": 0.82187653,
"epoch": 4.092159559834938,
"grad_norm": 2.306704521179199,
"learning_rate": 8.755849302327025e-06,
"loss": 0.58051348,
"memory(GiB)": 67.62,
"step": 5950,
"train_speed(iter/s)": 0.276835
},
{
"acc": 0.83303099,
"epoch": 4.095598349381018,
"grad_norm": 2.3323071002960205,
"learning_rate": 8.691702555743604e-06,
"loss": 0.54123106,
"memory(GiB)": 67.62,
"step": 5955,
"train_speed(iter/s)": 0.27621
},
{
"acc": 0.82155704,
"epoch": 4.099037138927097,
"grad_norm": 2.2443792819976807,
"learning_rate": 8.627769544471766e-06,
"loss": 0.57790089,
"memory(GiB)": 67.62,
"step": 5960,
"train_speed(iter/s)": 0.275577
},
{
"acc": 0.81777382,
"epoch": 4.1024759284731775,
"grad_norm": 2.0098752975463867,
"learning_rate": 8.564050599270423e-06,
"loss": 0.60635762,
"memory(GiB)": 67.62,
"step": 5965,
"train_speed(iter/s)": 0.27489
},
{
"acc": 0.82945662,
"epoch": 4.105914718019257,
"grad_norm": 2.6297407150268555,
"learning_rate": 8.50054604979104e-06,
"loss": 0.55736432,
"memory(GiB)": 67.62,
"step": 5970,
"train_speed(iter/s)": 0.274231
},
{
"acc": 0.83819923,
"epoch": 4.109353507565337,
"grad_norm": 2.027495861053467,
"learning_rate": 8.43725622457589e-06,
"loss": 0.53537874,
"memory(GiB)": 67.62,
"step": 5975,
"train_speed(iter/s)": 0.273579
},
{
"acc": 0.83728676,
"epoch": 4.112792297111417,
"grad_norm": 1.9991952180862427,
"learning_rate": 8.37418145105636e-06,
"loss": 0.52903852,
"memory(GiB)": 67.62,
"step": 5980,
"train_speed(iter/s)": 0.272886
},
{
"acc": 0.83919382,
"epoch": 4.116231086657496,
"grad_norm": 1.941271424293518,
"learning_rate": 8.311322055551258e-06,
"loss": 0.54152002,
"memory(GiB)": 67.62,
"step": 5985,
"train_speed(iter/s)": 0.272292
},
{
"acc": 0.82639074,
"epoch": 4.119669876203576,
"grad_norm": 2.0080490112304688,
"learning_rate": 8.248678363265168e-06,
"loss": 0.58616934,
"memory(GiB)": 67.62,
"step": 5990,
"train_speed(iter/s)": 0.271655
},
{
"acc": 0.82685022,
"epoch": 4.1231086657496565,
"grad_norm": 2.1029014587402344,
"learning_rate": 8.186250698286685e-06,
"loss": 0.57365303,
"memory(GiB)": 67.62,
"step": 5995,
"train_speed(iter/s)": 0.271018
},
{
"acc": 0.83401289,
"epoch": 4.126547455295736,
"grad_norm": 2.381568431854248,
"learning_rate": 8.124039383586785e-06,
"loss": 0.54990234,
"memory(GiB)": 67.62,
"step": 6000,
"train_speed(iter/s)": 0.27041
},
{
"epoch": 4.126547455295736,
"eval_acc": 0.7808971071219688,
"eval_loss": 0.7977337837219238,
"eval_runtime": 1150.4843,
"eval_samples_per_second": 3.723,
"eval_steps_per_second": 0.067,
"step": 6000
},
{
"acc": 0.82955971,
"epoch": 4.129986244841816,
"grad_norm": 2.2076478004455566,
"learning_rate": 8.062044741017174e-06,
"loss": 0.56549349,
"memory(GiB)": 67.62,
"step": 6005,
"train_speed(iter/s)": 0.256539
},
{
"acc": 0.82928619,
"epoch": 4.133425034387895,
"grad_norm": 2.240816116333008,
"learning_rate": 8.00026709130858e-06,
"loss": 0.56595135,
"memory(GiB)": 67.62,
"step": 6010,
"train_speed(iter/s)": 0.256006
},
{
"acc": 0.81660957,
"epoch": 4.136863823933975,
"grad_norm": 2.1177453994750977,
"learning_rate": 7.938706754069125e-06,
"loss": 0.60902424,
"memory(GiB)": 67.62,
"step": 6015,
"train_speed(iter/s)": 0.255365
},
{
"acc": 0.81723537,
"epoch": 4.140302613480055,
"grad_norm": 2.291558265686035,
"learning_rate": 7.877364047782646e-06,
"loss": 0.59432869,
"memory(GiB)": 67.62,
"step": 6020,
"train_speed(iter/s)": 0.254797
},
{
"acc": 0.83183041,
"epoch": 4.143741403026135,
"grad_norm": 2.1598074436187744,
"learning_rate": 7.816239289807078e-06,
"loss": 0.56827602,
"memory(GiB)": 67.62,
"step": 6025,
"train_speed(iter/s)": 0.254252
},
{
"acc": 0.82450991,
"epoch": 4.147180192572215,
"grad_norm": 2.316070556640625,
"learning_rate": 7.755332796372783e-06,
"loss": 0.5860589,
"memory(GiB)": 67.62,
"step": 6030,
"train_speed(iter/s)": 0.253753
},
{
"acc": 0.83219881,
"epoch": 4.150618982118294,
"grad_norm": 2.0400826930999756,
"learning_rate": 7.694644882580929e-06,
"loss": 0.56074944,
"memory(GiB)": 67.62,
"step": 6035,
"train_speed(iter/s)": 0.253206
},
{
"acc": 0.82405052,
"epoch": 4.154057771664374,
"grad_norm": 2.167229652404785,
"learning_rate": 7.634175862401859e-06,
"loss": 0.5924716,
"memory(GiB)": 67.62,
"step": 6040,
"train_speed(iter/s)": 0.252704
},
{
"acc": 0.83036137,
"epoch": 4.157496561210454,
"grad_norm": 2.0544652938842773,
"learning_rate": 7.5739260486734785e-06,
"loss": 0.56387725,
"memory(GiB)": 67.62,
"step": 6045,
"train_speed(iter/s)": 0.252135
},
{
"acc": 0.82638521,
"epoch": 4.160935350756533,
"grad_norm": 2.0944511890411377,
"learning_rate": 7.5138957530996e-06,
"loss": 0.58068042,
"memory(GiB)": 67.62,
"step": 6050,
"train_speed(iter/s)": 0.251533
},
{
"acc": 0.82557564,
"epoch": 4.1643741403026135,
"grad_norm": 2.206922769546509,
"learning_rate": 7.454085286248365e-06,
"loss": 0.57935457,
"memory(GiB)": 67.62,
"step": 6055,
"train_speed(iter/s)": 0.250929
},
{
"acc": 0.82684364,
"epoch": 4.167812929848694,
"grad_norm": 2.0836057662963867,
"learning_rate": 7.394494957550617e-06,
"loss": 0.57276134,
"memory(GiB)": 67.62,
"step": 6060,
"train_speed(iter/s)": 0.250399
},
{
"acc": 0.82708397,
"epoch": 4.171251719394773,
"grad_norm": 2.394265651702881,
"learning_rate": 7.335125075298327e-06,
"loss": 0.56799402,
"memory(GiB)": 67.62,
"step": 6065,
"train_speed(iter/s)": 0.249822
},
{
"acc": 0.82027712,
"epoch": 4.174690508940853,
"grad_norm": 2.20003080368042,
"learning_rate": 7.2759759466429625e-06,
"loss": 0.59135399,
"memory(GiB)": 67.62,
"step": 6070,
"train_speed(iter/s)": 0.249261
},
{
"acc": 0.83627338,
"epoch": 4.178129298486932,
"grad_norm": 2.0617763996124268,
"learning_rate": 7.217047877593917e-06,
"loss": 0.542978,
"memory(GiB)": 67.62,
"step": 6075,
"train_speed(iter/s)": 0.248741
},
{
"acc": 0.83200588,
"epoch": 4.181568088033012,
"grad_norm": 2.2494707107543945,
"learning_rate": 7.158341173016954e-06,
"loss": 0.54484763,
"memory(GiB)": 67.62,
"step": 6080,
"train_speed(iter/s)": 0.248292
},
{
"acc": 0.83537827,
"epoch": 4.1850068775790925,
"grad_norm": 2.177746295928955,
"learning_rate": 7.099856136632578e-06,
"loss": 0.54962234,
"memory(GiB)": 67.62,
"step": 6085,
"train_speed(iter/s)": 0.247751
},
{
"acc": 0.83314114,
"epoch": 4.188445667125172,
"grad_norm": 1.8806217908859253,
"learning_rate": 7.041593071014495e-06,
"loss": 0.55333209,
"memory(GiB)": 67.62,
"step": 6090,
"train_speed(iter/s)": 0.247191
},
{
"acc": 0.83060188,
"epoch": 4.191884456671252,
"grad_norm": 2.284046173095703,
"learning_rate": 6.983552277588039e-06,
"loss": 0.55391922,
"memory(GiB)": 67.62,
"step": 6095,
"train_speed(iter/s)": 0.246671
},
{
"acc": 0.82202473,
"epoch": 4.195323246217331,
"grad_norm": 2.113684892654419,
"learning_rate": 6.925734056628606e-06,
"loss": 0.59055824,
"memory(GiB)": 67.62,
"step": 6100,
"train_speed(iter/s)": 0.246148
},
{
"epoch": 4.195323246217331,
"eval_acc": 0.7810377018940927,
"eval_loss": 0.7978992462158203,
"eval_runtime": 1151.0505,
"eval_samples_per_second": 3.721,
"eval_steps_per_second": 0.067,
"step": 6100
},
{
"acc": 0.82853069,
"epoch": 4.198762035763411,
"grad_norm": 1.9292495250701904,
"learning_rate": 6.8681387072601215e-06,
"loss": 0.56650033,
"memory(GiB)": 67.62,
"step": 6105,
"train_speed(iter/s)": 0.234762
},
{
"acc": 0.83146677,
"epoch": 4.202200825309491,
"grad_norm": 2.457911729812622,
"learning_rate": 6.8107665274534755e-06,
"loss": 0.56592517,
"memory(GiB)": 67.62,
"step": 6110,
"train_speed(iter/s)": 0.234306
},
{
"acc": 0.81799488,
"epoch": 4.205639614855571,
"grad_norm": 2.1064655780792236,
"learning_rate": 6.753617814024982e-06,
"loss": 0.59414425,
"memory(GiB)": 67.62,
"step": 6115,
"train_speed(iter/s)": 0.233775
},
{
"acc": 0.83699923,
"epoch": 4.209078404401651,
"grad_norm": 2.15045166015625,
"learning_rate": 6.696692862634848e-06,
"loss": 0.53455338,
"memory(GiB)": 67.62,
"step": 6120,
"train_speed(iter/s)": 0.233246
},
{
"acc": 0.82855034,
"epoch": 4.212517193947731,
"grad_norm": 1.9256818294525146,
"learning_rate": 6.639991967785629e-06,
"loss": 0.57589531,
"memory(GiB)": 67.62,
"step": 6125,
"train_speed(iter/s)": 0.232841
},
{
"acc": 0.82569561,
"epoch": 4.21595598349381,
"grad_norm": 2.2568438053131104,
"learning_rate": 6.583515422820755e-06,
"loss": 0.59608107,
"memory(GiB)": 67.62,
"step": 6130,
"train_speed(iter/s)": 0.232428
},
{
"acc": 0.83448133,
"epoch": 4.21939477303989,
"grad_norm": 2.2241194248199463,
"learning_rate": 6.527263519922942e-06,
"loss": 0.53996773,
"memory(GiB)": 67.62,
"step": 6135,
"train_speed(iter/s)": 0.232016
},
{
"acc": 0.82402668,
"epoch": 4.222833562585969,
"grad_norm": 2.152508020401001,
"learning_rate": 6.471236550112733e-06,
"loss": 0.5897275,
"memory(GiB)": 67.62,
"step": 6140,
"train_speed(iter/s)": 0.231577
},
{
"acc": 0.82092781,
"epoch": 4.2262723521320495,
"grad_norm": 2.7539846897125244,
"learning_rate": 6.415434803246959e-06,
"loss": 0.60109167,
"memory(GiB)": 67.62,
"step": 6145,
"train_speed(iter/s)": 0.231131
},
{
"acc": 0.82336702,
"epoch": 4.22971114167813,
"grad_norm": 2.2428319454193115,
"learning_rate": 6.359858568017257e-06,
"loss": 0.5810329,
"memory(GiB)": 67.62,
"step": 6150,
"train_speed(iter/s)": 0.230762
},
{
"acc": 0.8375886,
"epoch": 4.233149931224209,
"grad_norm": 2.108989715576172,
"learning_rate": 6.304508131948601e-06,
"loss": 0.54037862,
"memory(GiB)": 67.62,
"step": 6155,
"train_speed(iter/s)": 0.230346
},
{
"acc": 0.82534332,
"epoch": 4.236588720770289,
"grad_norm": 2.159034252166748,
"learning_rate": 6.249383781397765e-06,
"loss": 0.58905783,
"memory(GiB)": 67.62,
"step": 6160,
"train_speed(iter/s)": 0.229862
},
{
"acc": 0.82394867,
"epoch": 4.240027510316368,
"grad_norm": 2.191835880279541,
"learning_rate": 6.194485801551856e-06,
"loss": 0.57035618,
"memory(GiB)": 67.62,
"step": 6165,
"train_speed(iter/s)": 0.229394
},
{
"acc": 0.83958015,
"epoch": 4.243466299862448,
"grad_norm": 1.9931029081344604,
"learning_rate": 6.139814476426854e-06,
"loss": 0.53320942,
"memory(GiB)": 67.62,
"step": 6170,
"train_speed(iter/s)": 0.22895
},
{
"acc": 0.81791973,
"epoch": 4.2469050894085285,
"grad_norm": 2.611358404159546,
"learning_rate": 6.085370088866157e-06,
"loss": 0.61060858,
"memory(GiB)": 67.62,
"step": 6175,
"train_speed(iter/s)": 0.22846
},
{
"acc": 0.81913891,
"epoch": 4.250343878954608,
"grad_norm": 2.0629124641418457,
"learning_rate": 6.031152920539071e-06,
"loss": 0.59518094,
"memory(GiB)": 67.62,
"step": 6180,
"train_speed(iter/s)": 0.228031
},
{
"acc": 0.83380852,
"epoch": 4.253782668500688,
"grad_norm": 2.2911267280578613,
"learning_rate": 5.977163251939388e-06,
"loss": 0.55708628,
"memory(GiB)": 67.62,
"step": 6185,
"train_speed(iter/s)": 0.227608
},
{
"acc": 0.83007746,
"epoch": 4.257221458046768,
"grad_norm": 2.3003599643707275,
"learning_rate": 5.9234013623839155e-06,
"loss": 0.56224914,
"memory(GiB)": 67.62,
"step": 6190,
"train_speed(iter/s)": 0.227229
},
{
"acc": 0.82731237,
"epoch": 4.260660247592847,
"grad_norm": 2.23395037651062,
"learning_rate": 5.869867530011054e-06,
"loss": 0.57990241,
"memory(GiB)": 67.62,
"step": 6195,
"train_speed(iter/s)": 0.226782
},
{
"acc": 0.82513866,
"epoch": 4.264099037138927,
"grad_norm": 1.8877415657043457,
"learning_rate": 5.816562031779334e-06,
"loss": 0.58530903,
"memory(GiB)": 67.62,
"step": 6200,
"train_speed(iter/s)": 0.226378
},
{
"epoch": 4.264099037138927,
"eval_acc": 0.7814088720925001,
"eval_loss": 0.796574592590332,
"eval_runtime": 1138.6928,
"eval_samples_per_second": 3.761,
"eval_steps_per_second": 0.068,
"step": 6200
},
{
"acc": 0.83331938,
"epoch": 4.267537826685007,
"grad_norm": 2.007477283477783,
"learning_rate": 5.7634851434660045e-06,
"loss": 0.55948911,
"memory(GiB)": 67.62,
"step": 6205,
"train_speed(iter/s)": 0.21694
},
{
"acc": 0.83156748,
"epoch": 4.270976616231087,
"grad_norm": 2.2435107231140137,
"learning_rate": 5.7106371396655885e-06,
"loss": 0.55306296,
"memory(GiB)": 67.62,
"step": 6210,
"train_speed(iter/s)": 0.21658
},
{
"acc": 0.82246685,
"epoch": 4.274415405777167,
"grad_norm": 2.471839427947998,
"learning_rate": 5.658018293788461e-06,
"loss": 0.58456354,
"memory(GiB)": 67.62,
"step": 6215,
"train_speed(iter/s)": 0.216188
},
{
"acc": 0.8260498,
"epoch": 4.277854195323246,
"grad_norm": 2.342773675918579,
"learning_rate": 5.6056288780594584e-06,
"loss": 0.58758726,
"memory(GiB)": 67.62,
"step": 6220,
"train_speed(iter/s)": 0.215836
},
{
"acc": 0.83068848,
"epoch": 4.281292984869326,
"grad_norm": 2.36448073387146,
"learning_rate": 5.553469163516459e-06,
"loss": 0.55812101,
"memory(GiB)": 67.62,
"step": 6225,
"train_speed(iter/s)": 0.21546
},
{
"acc": 0.8118145,
"epoch": 4.284731774415405,
"grad_norm": 2.0966968536376953,
"learning_rate": 5.501539420008957e-06,
"loss": 0.62151508,
"memory(GiB)": 67.62,
"step": 6230,
"train_speed(iter/s)": 0.215114
},
{
"acc": 0.82315483,
"epoch": 4.2881705639614855,
"grad_norm": 2.090514898300171,
"learning_rate": 5.449839916196701e-06,
"loss": 0.59569468,
"memory(GiB)": 67.62,
"step": 6235,
"train_speed(iter/s)": 0.214737
},
{
"acc": 0.82968979,
"epoch": 4.291609353507566,
"grad_norm": 2.4561944007873535,
"learning_rate": 5.398370919548289e-06,
"loss": 0.56410408,
"memory(GiB)": 67.62,
"step": 6240,
"train_speed(iter/s)": 0.214364
},
{
"acc": 0.82265596,
"epoch": 4.295048143053645,
"grad_norm": 2.0787575244903564,
"learning_rate": 5.3471326963397644e-06,
"loss": 0.59666047,
"memory(GiB)": 67.62,
"step": 6245,
"train_speed(iter/s)": 0.213976
},
{
"acc": 0.83164139,
"epoch": 4.298486932599725,
"grad_norm": 1.96835458278656,
"learning_rate": 5.296125511653292e-06,
"loss": 0.56099758,
"memory(GiB)": 67.62,
"step": 6250,
"train_speed(iter/s)": 0.213621
},
{
"acc": 0.82760611,
"epoch": 4.301925722145804,
"grad_norm": 2.032607078552246,
"learning_rate": 5.245349629375726e-06,
"loss": 0.56520452,
"memory(GiB)": 67.62,
"step": 6255,
"train_speed(iter/s)": 0.213246
},
{
"acc": 0.81889114,
"epoch": 4.305364511691884,
"grad_norm": 2.076733112335205,
"learning_rate": 5.194805312197261e-06,
"loss": 0.60234947,
"memory(GiB)": 67.62,
"step": 6260,
"train_speed(iter/s)": 0.212879
},
{
"acc": 0.83639603,
"epoch": 4.3088033012379645,
"grad_norm": 2.0413177013397217,
"learning_rate": 5.144492821610151e-06,
"loss": 0.53537364,
"memory(GiB)": 67.62,
"step": 6265,
"train_speed(iter/s)": 0.212585
},
{
"acc": 0.84348145,
"epoch": 4.312242090784044,
"grad_norm": 2.1440134048461914,
"learning_rate": 5.094412417907226e-06,
"loss": 0.52636375,
"memory(GiB)": 67.62,
"step": 6270,
"train_speed(iter/s)": 0.21228
},
{
"acc": 0.81755209,
"epoch": 4.315680880330124,
"grad_norm": 2.337132692337036,
"learning_rate": 5.0445643601806165e-06,
"loss": 0.60215778,
"memory(GiB)": 67.62,
"step": 6275,
"train_speed(iter/s)": 0.211939
},
{
"acc": 0.81957273,
"epoch": 4.319119669876203,
"grad_norm": 2.3544983863830566,
"learning_rate": 4.994948906320421e-06,
"loss": 0.62419033,
"memory(GiB)": 67.62,
"step": 6280,
"train_speed(iter/s)": 0.211611
},
{
"acc": 0.83044434,
"epoch": 4.322558459422283,
"grad_norm": 2.0763583183288574,
"learning_rate": 4.945566313013359e-06,
"loss": 0.56670027,
"memory(GiB)": 67.62,
"step": 6285,
"train_speed(iter/s)": 0.211304
},
{
"acc": 0.83016624,
"epoch": 4.325997248968363,
"grad_norm": 2.207101583480835,
"learning_rate": 4.896416835741426e-06,
"loss": 0.57944641,
"memory(GiB)": 67.62,
"step": 6290,
"train_speed(iter/s)": 0.210967
},
{
"acc": 0.82729073,
"epoch": 4.329436038514443,
"grad_norm": 2.1743686199188232,
"learning_rate": 4.847500728780591e-06,
"loss": 0.57582512,
"memory(GiB)": 67.62,
"step": 6295,
"train_speed(iter/s)": 0.210593
},
{
"acc": 0.83406305,
"epoch": 4.332874828060523,
"grad_norm": 2.1914258003234863,
"learning_rate": 4.798818245199488e-06,
"loss": 0.56798325,
"memory(GiB)": 67.62,
"step": 6300,
"train_speed(iter/s)": 0.210291
},
{
"epoch": 4.332874828060523,
"eval_acc": 0.7814426148378099,
"eval_loss": 0.7952266335487366,
"eval_runtime": 1113.0563,
"eval_samples_per_second": 3.848,
"eval_steps_per_second": 0.069,
"step": 6300
},
{
"acc": 0.81289082,
"epoch": 4.336313617606603,
"grad_norm": 1.9904134273529053,
"learning_rate": 4.7503696368580756e-06,
"loss": 0.62703791,
"memory(GiB)": 67.62,
"step": 6305,
"train_speed(iter/s)": 0.202425
},
{
"acc": 0.82088013,
"epoch": 4.339752407152682,
"grad_norm": 2.3138110637664795,
"learning_rate": 4.702155154406356e-06,
"loss": 0.59575286,
"memory(GiB)": 67.62,
"step": 6310,
"train_speed(iter/s)": 0.202102
},
{
"acc": 0.82686548,
"epoch": 4.343191196698762,
"grad_norm": 2.3361921310424805,
"learning_rate": 4.654175047283105e-06,
"loss": 0.58184552,
"memory(GiB)": 67.62,
"step": 6315,
"train_speed(iter/s)": 0.201819
},
{
"acc": 0.81809053,
"epoch": 4.346629986244841,
"grad_norm": 2.237659215927124,
"learning_rate": 4.606429563714522e-06,
"loss": 0.61091933,
"memory(GiB)": 67.62,
"step": 6320,
"train_speed(iter/s)": 0.201524
},
{
"acc": 0.83638992,
"epoch": 4.3500687757909215,
"grad_norm": 2.163444995880127,
"learning_rate": 4.558918950712983e-06,
"loss": 0.53875408,
"memory(GiB)": 67.62,
"step": 6325,
"train_speed(iter/s)": 0.201286
},
{
"acc": 0.83066168,
"epoch": 4.353507565337002,
"grad_norm": 2.1895644664764404,
"learning_rate": 4.511643454075753e-06,
"loss": 0.54859762,
"memory(GiB)": 67.62,
"step": 6330,
"train_speed(iter/s)": 0.201
},
{
"acc": 0.82753067,
"epoch": 4.356946354883081,
"grad_norm": 2.3949623107910156,
"learning_rate": 4.464603318383724e-06,
"loss": 0.57942715,
"memory(GiB)": 67.62,
"step": 6335,
"train_speed(iter/s)": 0.20074
},
{
"acc": 0.8256155,
"epoch": 4.360385144429161,
"grad_norm": 2.2843456268310547,
"learning_rate": 4.417798787000139e-06,
"loss": 0.5838841,
"memory(GiB)": 67.62,
"step": 6340,
"train_speed(iter/s)": 0.200436
},
{
"acc": 0.82477741,
"epoch": 4.36382393397524,
"grad_norm": 2.1748905181884766,
"learning_rate": 4.371230102069333e-06,
"loss": 0.57569537,
"memory(GiB)": 67.62,
"step": 6345,
"train_speed(iter/s)": 0.200165
},
{
"acc": 0.82552452,
"epoch": 4.36726272352132,
"grad_norm": 2.2806589603424072,
"learning_rate": 4.324897504515494e-06,
"loss": 0.5679925,
"memory(GiB)": 67.62,
"step": 6350,
"train_speed(iter/s)": 0.199857
},
{
"acc": 0.82228546,
"epoch": 4.3707015130674005,
"grad_norm": 2.5865187644958496,
"learning_rate": 4.278801234041395e-06,
"loss": 0.60699501,
"memory(GiB)": 67.62,
"step": 6355,
"train_speed(iter/s)": 0.199561
},
{
"acc": 0.83120518,
"epoch": 4.37414030261348,
"grad_norm": 2.1603238582611084,
"learning_rate": 4.2329415291271675e-06,
"loss": 0.56461072,
"memory(GiB)": 67.62,
"step": 6360,
"train_speed(iter/s)": 0.199251
},
{
"acc": 0.82535934,
"epoch": 4.37757909215956,
"grad_norm": 2.120961904525757,
"learning_rate": 4.18731862702908e-06,
"loss": 0.57014971,
"memory(GiB)": 67.62,
"step": 6365,
"train_speed(iter/s)": 0.199009
},
{
"acc": 0.82253723,
"epoch": 4.38101788170564,
"grad_norm": 2.091716766357422,
"learning_rate": 4.141932763778269e-06,
"loss": 0.58944392,
"memory(GiB)": 67.62,
"step": 6370,
"train_speed(iter/s)": 0.198744
},
{
"acc": 0.82127199,
"epoch": 4.384456671251719,
"grad_norm": 2.529238700866699,
"learning_rate": 4.09678417417958e-06,
"loss": 0.60495977,
"memory(GiB)": 67.62,
"step": 6375,
"train_speed(iter/s)": 0.19843
},
{
"acc": 0.82691174,
"epoch": 4.387895460797799,
"grad_norm": 2.0297234058380127,
"learning_rate": 4.051873091810289e-06,
"loss": 0.57716408,
"memory(GiB)": 67.62,
"step": 6380,
"train_speed(iter/s)": 0.198164
},
{
"acc": 0.82726593,
"epoch": 4.391334250343879,
"grad_norm": 2.1247737407684326,
"learning_rate": 4.007199749018933e-06,
"loss": 0.56230278,
"memory(GiB)": 67.62,
"step": 6385,
"train_speed(iter/s)": 0.197892
},
{
"acc": 0.83000584,
"epoch": 4.394773039889959,
"grad_norm": 2.1872763633728027,
"learning_rate": 3.962764376924093e-06,
"loss": 0.57364516,
"memory(GiB)": 67.62,
"step": 6390,
"train_speed(iter/s)": 0.197621
},
{
"acc": 0.80675488,
"epoch": 4.398211829436039,
"grad_norm": 2.4632184505462646,
"learning_rate": 3.918567205413209e-06,
"loss": 0.63493814,
"memory(GiB)": 67.62,
"step": 6395,
"train_speed(iter/s)": 0.197376
},
{
"acc": 0.83054581,
"epoch": 4.401650618982118,
"grad_norm": 2.1453042030334473,
"learning_rate": 3.8746084631413774e-06,
"loss": 0.55714712,
"memory(GiB)": 67.62,
"step": 6400,
"train_speed(iter/s)": 0.197063
},
{
"epoch": 4.401650618982118,
"eval_acc": 0.7813807531380753,
"eval_loss": 0.7940236926078796,
"eval_runtime": 1132.2427,
"eval_samples_per_second": 3.783,
"eval_steps_per_second": 0.068,
"step": 6400
},
{
"acc": 0.82402639,
"epoch": 4.405089408528198,
"grad_norm": 2.5457465648651123,
"learning_rate": 3.830888377530191e-06,
"loss": 0.58401513,
"memory(GiB)": 67.62,
"step": 6405,
"train_speed(iter/s)": 0.190185
},
{
"acc": 0.8252965,
"epoch": 4.408528198074277,
"grad_norm": 2.4412484169006348,
"learning_rate": 3.787407174766534e-06,
"loss": 0.57594061,
"memory(GiB)": 67.62,
"step": 6410,
"train_speed(iter/s)": 0.189944
},
{
"acc": 0.82564621,
"epoch": 4.4119669876203575,
"grad_norm": 2.4891350269317627,
"learning_rate": 3.7441650798014204e-06,
"loss": 0.58461208,
"memory(GiB)": 67.62,
"step": 6415,
"train_speed(iter/s)": 0.189741
},
{
"acc": 0.82523041,
"epoch": 4.415405777166438,
"grad_norm": 2.297450065612793,
"learning_rate": 3.7011623163488466e-06,
"loss": 0.56609049,
"memory(GiB)": 67.62,
"step": 6420,
"train_speed(iter/s)": 0.18951
},
{
"acc": 0.82219734,
"epoch": 4.418844566712517,
"grad_norm": 2.200800657272339,
"learning_rate": 3.6583991068846157e-06,
"loss": 0.59716201,
"memory(GiB)": 67.62,
"step": 6425,
"train_speed(iter/s)": 0.189276
},
{
"acc": 0.83268661,
"epoch": 4.422283356258597,
"grad_norm": 2.185145378112793,
"learning_rate": 3.61587567264519e-06,
"loss": 0.56204829,
"memory(GiB)": 67.62,
"step": 6430,
"train_speed(iter/s)": 0.188997
},
{
"acc": 0.82503653,
"epoch": 4.425722145804677,
"grad_norm": 2.043168067932129,
"learning_rate": 3.5735922336265567e-06,
"loss": 0.5881556,
"memory(GiB)": 67.62,
"step": 6435,
"train_speed(iter/s)": 0.188753
},
{
"acc": 0.81975737,
"epoch": 4.429160935350756,
"grad_norm": 2.160871982574463,
"learning_rate": 3.5315490085830724e-06,
"loss": 0.6149045,
"memory(GiB)": 67.62,
"step": 6440,
"train_speed(iter/s)": 0.188516
},
{
"acc": 0.82297249,
"epoch": 4.4325997248968365,
"grad_norm": 2.2979509830474854,
"learning_rate": 3.489746215026349e-06,
"loss": 0.58171053,
"memory(GiB)": 67.62,
"step": 6445,
"train_speed(iter/s)": 0.188237
},
{
"acc": 0.81737309,
"epoch": 4.436038514442916,
"grad_norm": 2.3226141929626465,
"learning_rate": 3.4481840692241092e-06,
"loss": 0.61316481,
"memory(GiB)": 67.62,
"step": 6450,
"train_speed(iter/s)": 0.18799
},
{
"acc": 0.82329559,
"epoch": 4.439477303988996,
"grad_norm": 2.2420105934143066,
"learning_rate": 3.4068627861991034e-06,
"loss": 0.60935397,
"memory(GiB)": 67.62,
"step": 6455,
"train_speed(iter/s)": 0.187782
},
{
"acc": 0.82957897,
"epoch": 4.442916093535076,
"grad_norm": 2.281442403793335,
"learning_rate": 3.365782579727948e-06,
"loss": 0.58194571,
"memory(GiB)": 67.62,
"step": 6460,
"train_speed(iter/s)": 0.187574
},
{
"acc": 0.82551146,
"epoch": 4.446354883081155,
"grad_norm": 2.1205482482910156,
"learning_rate": 3.3249436623400493e-06,
"loss": 0.57835684,
"memory(GiB)": 67.62,
"step": 6465,
"train_speed(iter/s)": 0.187326
},
{
"acc": 0.82712269,
"epoch": 4.449793672627235,
"grad_norm": 2.2721188068389893,
"learning_rate": 3.284346245316513e-06,
"loss": 0.57927489,
"memory(GiB)": 67.62,
"step": 6470,
"train_speed(iter/s)": 0.187079
},
{
"acc": 0.82159977,
"epoch": 4.453232462173315,
"grad_norm": 2.2417726516723633,
"learning_rate": 3.24399053868902e-06,
"loss": 0.57816648,
"memory(GiB)": 67.62,
"step": 6475,
"train_speed(iter/s)": 0.186865
},
{
"acc": 0.83896151,
"epoch": 4.456671251719395,
"grad_norm": 2.33647084236145,
"learning_rate": 3.203876751238749e-06,
"loss": 0.53038335,
"memory(GiB)": 67.62,
"step": 6480,
"train_speed(iter/s)": 0.186653
},
{
"acc": 0.83427839,
"epoch": 4.460110041265475,
"grad_norm": 2.063394069671631,
"learning_rate": 3.1640050904953505e-06,
"loss": 0.56539698,
"memory(GiB)": 67.62,
"step": 6485,
"train_speed(iter/s)": 0.186435
},
{
"acc": 0.82990141,
"epoch": 4.463548830811554,
"grad_norm": 2.2717719078063965,
"learning_rate": 3.1243757627357668e-06,
"loss": 0.55906305,
"memory(GiB)": 67.62,
"step": 6490,
"train_speed(iter/s)": 0.186164
},
{
"acc": 0.82575073,
"epoch": 4.466987620357634,
"grad_norm": 2.5037717819213867,
"learning_rate": 3.0849889729832654e-06,
"loss": 0.57216806,
"memory(GiB)": 67.62,
"step": 6495,
"train_speed(iter/s)": 0.185961
},
{
"acc": 0.82585573,
"epoch": 4.470426409903714,
"grad_norm": 2.19950795173645,
"learning_rate": 3.045844925006326e-06,
"loss": 0.57823243,
"memory(GiB)": 67.62,
"step": 6500,
"train_speed(iter/s)": 0.185733
},
{
"epoch": 4.470426409903714,
"eval_acc": 0.7815775858190489,
"eval_loss": 0.7929303646087646,
"eval_runtime": 1155.4394,
"eval_samples_per_second": 3.707,
"eval_steps_per_second": 0.067,
"step": 6500
},
{
"acc": 0.81569099,
"epoch": 4.4738651994497936,
"grad_norm": 2.3302502632141113,
"learning_rate": 3.0069438213175954e-06,
"loss": 0.61277876,
"memory(GiB)": 67.62,
"step": 6505,
"train_speed(iter/s)": 0.179594
},
{
"acc": 0.82125263,
"epoch": 4.477303988995874,
"grad_norm": 2.0833966732025146,
"learning_rate": 2.968285863172848e-06,
"loss": 0.59841776,
"memory(GiB)": 67.62,
"step": 6510,
"train_speed(iter/s)": 0.179377
},
{
"acc": 0.81880264,
"epoch": 4.480742778541953,
"grad_norm": 2.5484683513641357,
"learning_rate": 2.929871250569924e-06,
"loss": 0.59419332,
"memory(GiB)": 67.62,
"step": 6515,
"train_speed(iter/s)": 0.179133
},
{
"acc": 0.82751369,
"epoch": 4.484181568088033,
"grad_norm": 2.393644332885742,
"learning_rate": 2.891700182247734e-06,
"loss": 0.57184334,
"memory(GiB)": 67.62,
"step": 6520,
"train_speed(iter/s)": 0.17891
},
{
"acc": 0.81892633,
"epoch": 4.487620357634113,
"grad_norm": 2.44018292427063,
"learning_rate": 2.8537728556851844e-06,
"loss": 0.61149454,
"memory(GiB)": 67.62,
"step": 6525,
"train_speed(iter/s)": 0.178678
},
{
"acc": 0.82124205,
"epoch": 4.491059147180192,
"grad_norm": 2.0875890254974365,
"learning_rate": 2.8160894671001892e-06,
"loss": 0.5891263,
"memory(GiB)": 67.62,
"step": 6530,
"train_speed(iter/s)": 0.178449
},
{
"acc": 0.82609577,
"epoch": 4.4944979367262725,
"grad_norm": 2.057404041290283,
"learning_rate": 2.778650211448648e-06,
"loss": 0.56262321,
"memory(GiB)": 67.62,
"step": 6535,
"train_speed(iter/s)": 0.178257
},
{
"acc": 0.83202305,
"epoch": 4.497936726272352,
"grad_norm": 2.3149304389953613,
"learning_rate": 2.741455282423418e-06,
"loss": 0.55560713,
"memory(GiB)": 67.62,
"step": 6540,
"train_speed(iter/s)": 0.178079
},
{
"acc": 0.83527908,
"epoch": 4.501375515818432,
"grad_norm": 2.2315163612365723,
"learning_rate": 2.7045048724533295e-06,
"loss": 0.54867306,
"memory(GiB)": 67.62,
"step": 6545,
"train_speed(iter/s)": 0.177882
},
{
"acc": 0.82490063,
"epoch": 4.504814305364512,
"grad_norm": 2.0971333980560303,
"learning_rate": 2.667799172702211e-06,
"loss": 0.58073626,
"memory(GiB)": 67.62,
"step": 6550,
"train_speed(iter/s)": 0.177654
},
{
"acc": 0.82666264,
"epoch": 4.508253094910591,
"grad_norm": 2.328887701034546,
"learning_rate": 2.6313383730678536e-06,
"loss": 0.58351974,
"memory(GiB)": 67.62,
"step": 6555,
"train_speed(iter/s)": 0.177423
},
{
"acc": 0.81643009,
"epoch": 4.511691884456671,
"grad_norm": 2.3826959133148193,
"learning_rate": 2.5951226621810548e-06,
"loss": 0.60832229,
"memory(GiB)": 67.62,
"step": 6560,
"train_speed(iter/s)": 0.17721
},
{
"acc": 0.83378086,
"epoch": 4.5151306740027515,
"grad_norm": 2.135087490081787,
"learning_rate": 2.5591522274046416e-06,
"loss": 0.56533546,
"memory(GiB)": 67.62,
"step": 6565,
"train_speed(iter/s)": 0.177032
},
{
"acc": 0.83013229,
"epoch": 4.518569463548831,
"grad_norm": 2.335890054702759,
"learning_rate": 2.523427254832501e-06,
"loss": 0.55983028,
"memory(GiB)": 67.62,
"step": 6570,
"train_speed(iter/s)": 0.176797
},
{
"acc": 0.82724657,
"epoch": 4.522008253094911,
"grad_norm": 2.3773765563964844,
"learning_rate": 2.487947929288618e-06,
"loss": 0.57505946,
"memory(GiB)": 67.62,
"step": 6575,
"train_speed(iter/s)": 0.176609
},
{
"acc": 0.82321806,
"epoch": 4.52544704264099,
"grad_norm": 2.1447110176086426,
"learning_rate": 2.4527144343261097e-06,
"loss": 0.58117051,
"memory(GiB)": 67.62,
"step": 6580,
"train_speed(iter/s)": 0.176429
},
{
"acc": 0.81534252,
"epoch": 4.52888583218707,
"grad_norm": 2.3002796173095703,
"learning_rate": 2.417726952226283e-06,
"loss": 0.59847307,
"memory(GiB)": 67.62,
"step": 6585,
"train_speed(iter/s)": 0.176205
},
{
"acc": 0.83123646,
"epoch": 4.53232462173315,
"grad_norm": 2.134842872619629,
"learning_rate": 2.382985663997712e-06,
"loss": 0.56259084,
"memory(GiB)": 67.62,
"step": 6590,
"train_speed(iter/s)": 0.175983
},
{
"acc": 0.82430344,
"epoch": 4.5357634112792296,
"grad_norm": 2.316795825958252,
"learning_rate": 2.348490749375251e-06,
"loss": 0.57970629,
"memory(GiB)": 67.62,
"step": 6595,
"train_speed(iter/s)": 0.1758
},
{
"acc": 0.83597136,
"epoch": 4.53920220082531,
"grad_norm": 2.263073444366455,
"learning_rate": 2.3142423868191563e-06,
"loss": 0.54895492,
"memory(GiB)": 67.62,
"step": 6600,
"train_speed(iter/s)": 0.175618
},
{
"epoch": 4.53920220082531,
"eval_acc": 0.7819375084356863,
"eval_loss": 0.7933745980262756,
"eval_runtime": 1098.756,
"eval_samples_per_second": 3.898,
"eval_steps_per_second": 0.07,
"step": 6600
},
{
"acc": 0.832055,
"epoch": 4.542640990371389,
"grad_norm": 2.175189971923828,
"learning_rate": 2.2802407535141275e-06,
"loss": 0.56409612,
"memory(GiB)": 67.62,
"step": 6605,
"train_speed(iter/s)": 0.170456
},
{
"acc": 0.82646189,
"epoch": 4.546079779917469,
"grad_norm": 2.112194776535034,
"learning_rate": 2.246486025368418e-06,
"loss": 0.56891632,
"memory(GiB)": 67.62,
"step": 6610,
"train_speed(iter/s)": 0.170302
},
{
"acc": 0.82066345,
"epoch": 4.549518569463549,
"grad_norm": 2.304631233215332,
"learning_rate": 2.212978377012892e-06,
"loss": 0.60033989,
"memory(GiB)": 67.62,
"step": 6615,
"train_speed(iter/s)": 0.170106
},
{
"acc": 0.83084068,
"epoch": 4.552957359009628,
"grad_norm": 2.2651240825653076,
"learning_rate": 2.179717981800164e-06,
"loss": 0.55889602,
"memory(GiB)": 67.62,
"step": 6620,
"train_speed(iter/s)": 0.169961
},
{
"acc": 0.82767801,
"epoch": 4.5563961485557085,
"grad_norm": 2.218092918395996,
"learning_rate": 2.1467050118036613e-06,
"loss": 0.58023634,
"memory(GiB)": 67.62,
"step": 6625,
"train_speed(iter/s)": 0.1698
},
{
"acc": 0.82311954,
"epoch": 4.559834938101789,
"grad_norm": 2.081865072250366,
"learning_rate": 2.1139396378167637e-06,
"loss": 0.58637218,
"memory(GiB)": 67.62,
"step": 6630,
"train_speed(iter/s)": 0.169621
},
{
"acc": 0.82979736,
"epoch": 4.563273727647868,
"grad_norm": 2.2547144889831543,
"learning_rate": 2.08142202935188e-06,
"loss": 0.55914106,
"memory(GiB)": 67.62,
"step": 6635,
"train_speed(iter/s)": 0.169453
},
{
"acc": 0.82038078,
"epoch": 4.566712517193948,
"grad_norm": 2.181720495223999,
"learning_rate": 2.0491523546396466e-06,
"loss": 0.59662962,
"memory(GiB)": 67.62,
"step": 6640,
"train_speed(iter/s)": 0.169266
},
{
"acc": 0.8245801,
"epoch": 4.570151306740027,
"grad_norm": 2.330573558807373,
"learning_rate": 2.01713078062797e-06,
"loss": 0.58751688,
"memory(GiB)": 67.62,
"step": 6645,
"train_speed(iter/s)": 0.169123
},
{
"acc": 0.83173065,
"epoch": 4.573590096286107,
"grad_norm": 2.1227643489837646,
"learning_rate": 1.9853574729812123e-06,
"loss": 0.54269109,
"memory(GiB)": 67.62,
"step": 6650,
"train_speed(iter/s)": 0.168939
},
{
"acc": 0.83502407,
"epoch": 4.577028885832187,
"grad_norm": 2.232192277908325,
"learning_rate": 1.953832596079319e-06,
"loss": 0.5437376,
"memory(GiB)": 67.62,
"step": 6655,
"train_speed(iter/s)": 0.168764
},
{
"acc": 0.83107376,
"epoch": 4.580467675378267,
"grad_norm": 2.2640929222106934,
"learning_rate": 1.9225563130169875e-06,
"loss": 0.54885445,
"memory(GiB)": 67.62,
"step": 6660,
"train_speed(iter/s)": 0.168622
},
{
"acc": 0.83116302,
"epoch": 4.583906464924347,
"grad_norm": 2.4255106449127197,
"learning_rate": 1.8915287856027996e-06,
"loss": 0.57933769,
"memory(GiB)": 67.62,
"step": 6665,
"train_speed(iter/s)": 0.168435
},
{
"acc": 0.83079157,
"epoch": 4.587345254470426,
"grad_norm": 2.252610445022583,
"learning_rate": 1.8607501743583902e-06,
"loss": 0.57562494,
"memory(GiB)": 67.62,
"step": 6670,
"train_speed(iter/s)": 0.168263
},
{
"acc": 0.82178955,
"epoch": 4.590784044016506,
"grad_norm": 2.378258466720581,
"learning_rate": 1.8302206385176258e-06,
"loss": 0.59762077,
"memory(GiB)": 67.62,
"step": 6675,
"train_speed(iter/s)": 0.168089
},
{
"acc": 0.83059864,
"epoch": 4.594222833562586,
"grad_norm": 2.4089572429656982,
"learning_rate": 1.7999403360257766e-06,
"loss": 0.57908206,
"memory(GiB)": 67.62,
"step": 6680,
"train_speed(iter/s)": 0.167941
},
{
"acc": 0.82545843,
"epoch": 4.5976616231086656,
"grad_norm": 2.556912660598755,
"learning_rate": 1.7699094235386956e-06,
"loss": 0.5731123,
"memory(GiB)": 67.62,
"step": 6685,
"train_speed(iter/s)": 0.167785
},
{
"acc": 0.83477535,
"epoch": 4.601100412654746,
"grad_norm": 2.144914150238037,
"learning_rate": 1.7401280564220138e-06,
"loss": 0.54660711,
"memory(GiB)": 67.62,
"step": 6690,
"train_speed(iter/s)": 0.167611
},
{
"acc": 0.82835121,
"epoch": 4.604539202200826,
"grad_norm": 2.0818796157836914,
"learning_rate": 1.7105963887503236e-06,
"loss": 0.57266307,
"memory(GiB)": 67.62,
"step": 6695,
"train_speed(iter/s)": 0.167452
},
{
"acc": 0.82310772,
"epoch": 4.607977991746905,
"grad_norm": 2.37752366065979,
"learning_rate": 1.6813145733064094e-06,
"loss": 0.5846642,
"memory(GiB)": 67.62,
"step": 6700,
"train_speed(iter/s)": 0.167295
},
{
"epoch": 4.607977991746905,
"eval_acc": 0.7817238043820579,
"eval_loss": 0.7931625843048096,
"eval_runtime": 1146.6195,
"eval_samples_per_second": 3.735,
"eval_steps_per_second": 0.067,
"step": 6700
},
{
"acc": 0.82920761,
"epoch": 4.611416781292985,
"grad_norm": 2.6180896759033203,
"learning_rate": 1.6522827615804277e-06,
"loss": 0.55708656,
"memory(GiB)": 67.62,
"step": 6705,
"train_speed(iter/s)": 0.162482
},
{
"acc": 0.82743568,
"epoch": 4.614855570839064,
"grad_norm": 2.1857407093048096,
"learning_rate": 1.6235011037691344e-06,
"loss": 0.58240447,
"memory(GiB)": 67.62,
"step": 6710,
"train_speed(iter/s)": 0.16232
},
{
"acc": 0.81718578,
"epoch": 4.6182943603851445,
"grad_norm": 2.2875170707702637,
"learning_rate": 1.5949697487751052e-06,
"loss": 0.61164322,
"memory(GiB)": 67.62,
"step": 6715,
"train_speed(iter/s)": 0.162187
},
{
"acc": 0.82232466,
"epoch": 4.621733149931224,
"grad_norm": 2.1736197471618652,
"learning_rate": 1.5666888442059804e-06,
"loss": 0.58460808,
"memory(GiB)": 67.62,
"step": 6720,
"train_speed(iter/s)": 0.162055
},
{
"acc": 0.82449484,
"epoch": 4.625171939477304,
"grad_norm": 2.126422643661499,
"learning_rate": 1.538658536373673e-06,
"loss": 0.57822762,
"memory(GiB)": 67.62,
"step": 6725,
"train_speed(iter/s)": 0.161946
},
{
"acc": 0.82507849,
"epoch": 4.628610729023384,
"grad_norm": 2.2693231105804443,
"learning_rate": 1.5108789702936455e-06,
"loss": 0.57952757,
"memory(GiB)": 67.62,
"step": 6730,
"train_speed(iter/s)": 0.161804
},
{
"acc": 0.8332633,
"epoch": 4.632049518569463,
"grad_norm": 2.1562063694000244,
"learning_rate": 1.4833502896841289e-06,
"loss": 0.55239053,
"memory(GiB)": 67.62,
"step": 6735,
"train_speed(iter/s)": 0.161675
},
{
"acc": 0.82784958,
"epoch": 4.635488308115543,
"grad_norm": 2.1214349269866943,
"learning_rate": 1.456072636965399e-06,
"loss": 0.5708005,
"memory(GiB)": 67.62,
"step": 6740,
"train_speed(iter/s)": 0.161537
},
{
"acc": 0.8265028,
"epoch": 4.6389270976616235,
"grad_norm": 2.2998435497283936,
"learning_rate": 1.4290461532590343e-06,
"loss": 0.58597693,
"memory(GiB)": 67.62,
"step": 6745,
"train_speed(iter/s)": 0.161423
},
{
"acc": 0.82159843,
"epoch": 4.642365887207703,
"grad_norm": 2.096148729324341,
"learning_rate": 1.4022709783871718e-06,
"loss": 0.60574317,
"memory(GiB)": 67.62,
"step": 6750,
"train_speed(iter/s)": 0.161279
},
{
"acc": 0.82948322,
"epoch": 4.645804676753783,
"grad_norm": 1.9622774124145508,
"learning_rate": 1.375747250871807e-06,
"loss": 0.57297769,
"memory(GiB)": 67.62,
"step": 6755,
"train_speed(iter/s)": 0.161116
},
{
"acc": 0.82648077,
"epoch": 4.649243466299862,
"grad_norm": 2.2610554695129395,
"learning_rate": 1.3494751079340738e-06,
"loss": 0.56792774,
"memory(GiB)": 67.62,
"step": 6760,
"train_speed(iter/s)": 0.16096
},
{
"acc": 0.82656231,
"epoch": 4.652682255845942,
"grad_norm": 2.134491205215454,
"learning_rate": 1.3234546854935154e-06,
"loss": 0.56553001,
"memory(GiB)": 67.62,
"step": 6765,
"train_speed(iter/s)": 0.160816
},
{
"acc": 0.83355551,
"epoch": 4.656121045392022,
"grad_norm": 2.2637131214141846,
"learning_rate": 1.2976861181673923e-06,
"loss": 0.55729747,
"memory(GiB)": 67.62,
"step": 6770,
"train_speed(iter/s)": 0.160707
},
{
"acc": 0.83004456,
"epoch": 4.6595598349381016,
"grad_norm": 2.241671323776245,
"learning_rate": 1.2721695392699869e-06,
"loss": 0.55024014,
"memory(GiB)": 67.62,
"step": 6775,
"train_speed(iter/s)": 0.16056
},
{
"acc": 0.82354479,
"epoch": 4.662998624484182,
"grad_norm": 2.196913480758667,
"learning_rate": 1.2469050808119282e-06,
"loss": 0.57635975,
"memory(GiB)": 67.62,
"step": 6780,
"train_speed(iter/s)": 0.160424
},
{
"acc": 0.814569,
"epoch": 4.666437414030261,
"grad_norm": 2.4140119552612305,
"learning_rate": 1.221892873499479e-06,
"loss": 0.61613665,
"memory(GiB)": 67.62,
"step": 6785,
"train_speed(iter/s)": 0.160253
},
{
"acc": 0.83262882,
"epoch": 4.669876203576341,
"grad_norm": 2.239264726638794,
"learning_rate": 1.1971330467338833e-06,
"loss": 0.55864224,
"memory(GiB)": 67.62,
"step": 6790,
"train_speed(iter/s)": 0.1601
},
{
"acc": 0.82022276,
"epoch": 4.673314993122421,
"grad_norm": 2.135786771774292,
"learning_rate": 1.172625728610676e-06,
"loss": 0.58857613,
"memory(GiB)": 67.62,
"step": 6795,
"train_speed(iter/s)": 0.15997
},
{
"acc": 0.83236532,
"epoch": 4.6767537826685,
"grad_norm": 1.979997992515564,
"learning_rate": 1.1483710459190515e-06,
"loss": 0.56562681,
"memory(GiB)": 67.62,
"step": 6800,
"train_speed(iter/s)": 0.159825
},
{
"epoch": 4.6767537826685,
"eval_acc": 0.7819093894812615,
"eval_loss": 0.7931298613548279,
"eval_runtime": 1157.2244,
"eval_samples_per_second": 3.701,
"eval_steps_per_second": 0.067,
"step": 6800
},
{
"acc": 0.8271327,
"epoch": 4.6801925722145805,
"grad_norm": 2.349480152130127,
"learning_rate": 1.1243691241411644e-06,
"loss": 0.58665218,
"memory(GiB)": 67.62,
"step": 6805,
"train_speed(iter/s)": 0.155462
},
{
"acc": 0.83063755,
"epoch": 4.683631361760661,
"grad_norm": 2.1535379886627197,
"learning_rate": 1.1006200874515338e-06,
"loss": 0.55733638,
"memory(GiB)": 67.62,
"step": 6810,
"train_speed(iter/s)": 0.155314
},
{
"acc": 0.81677713,
"epoch": 4.68707015130674,
"grad_norm": 2.1077511310577393,
"learning_rate": 1.0771240587163464e-06,
"loss": 0.60006194,
"memory(GiB)": 67.62,
"step": 6815,
"train_speed(iter/s)": 0.155164
},
{
"acc": 0.83417349,
"epoch": 4.69050894085282,
"grad_norm": 2.45220685005188,
"learning_rate": 1.0538811594928607e-06,
"loss": 0.53521776,
"memory(GiB)": 67.62,
"step": 6820,
"train_speed(iter/s)": 0.155057
},
{
"acc": 0.82799282,
"epoch": 4.693947730398899,
"grad_norm": 2.1742374897003174,
"learning_rate": 1.0308915100287642e-06,
"loss": 0.56440144,
"memory(GiB)": 67.62,
"step": 6825,
"train_speed(iter/s)": 0.154917
},
{
"acc": 0.83087101,
"epoch": 4.697386519944979,
"grad_norm": 2.1993463039398193,
"learning_rate": 1.0081552292615454e-06,
"loss": 0.5529726,
"memory(GiB)": 67.62,
"step": 6830,
"train_speed(iter/s)": 0.154819
},
{
"acc": 0.83782015,
"epoch": 4.7008253094910595,
"grad_norm": 2.260230541229248,
"learning_rate": 9.856724348178841e-07,
"loss": 0.53974109,
"memory(GiB)": 67.62,
"step": 6835,
"train_speed(iter/s)": 0.154699
},
{
"acc": 0.83221836,
"epoch": 4.704264099037139,
"grad_norm": 2.035860061645508,
"learning_rate": 9.634432430130399e-07,
"loss": 0.54515915,
"memory(GiB)": 67.62,
"step": 6840,
"train_speed(iter/s)": 0.154586
},
{
"acc": 0.82770882,
"epoch": 4.707702888583219,
"grad_norm": 2.026685953140259,
"learning_rate": 9.414677688502594e-07,
"loss": 0.5836278,
"memory(GiB)": 67.62,
"step": 6845,
"train_speed(iter/s)": 0.154451
},
{
"acc": 0.82769499,
"epoch": 4.711141678129298,
"grad_norm": 2.1812551021575928,
"learning_rate": 9.1974612602017e-07,
"loss": 0.57010379,
"memory(GiB)": 67.62,
"step": 6850,
"train_speed(iter/s)": 0.154322
},
{
"acc": 0.81980133,
"epoch": 4.714580467675378,
"grad_norm": 2.3447399139404297,
"learning_rate": 8.982784269002089e-07,
"loss": 0.59749265,
"memory(GiB)": 67.62,
"step": 6855,
"train_speed(iter/s)": 0.154209
},
{
"acc": 0.83671551,
"epoch": 4.718019257221458,
"grad_norm": 2.019040107727051,
"learning_rate": 8.770647825540072e-07,
"loss": 0.5339366,
"memory(GiB)": 67.62,
"step": 6860,
"train_speed(iter/s)": 0.154098
},
{
"acc": 0.83358383,
"epoch": 4.7214580467675376,
"grad_norm": 2.4504003524780273,
"learning_rate": 8.561053027308616e-07,
"loss": 0.54877663,
"memory(GiB)": 67.62,
"step": 6865,
"train_speed(iter/s)": 0.153941
},
{
"acc": 0.82203579,
"epoch": 4.724896836313618,
"grad_norm": 2.2956948280334473,
"learning_rate": 8.354000958651198e-07,
"loss": 0.58671484,
"memory(GiB)": 67.62,
"step": 6870,
"train_speed(iter/s)": 0.153816
},
{
"acc": 0.82069569,
"epoch": 4.728335625859698,
"grad_norm": 2.3851406574249268,
"learning_rate": 8.149492690756679e-07,
"loss": 0.58018303,
"memory(GiB)": 67.62,
"step": 6875,
"train_speed(iter/s)": 0.153716
},
{
"acc": 0.82189007,
"epoch": 4.731774415405777,
"grad_norm": 2.3761680126190186,
"learning_rate": 7.947529281653329e-07,
"loss": 0.5802557,
"memory(GiB)": 67.62,
"step": 6880,
"train_speed(iter/s)": 0.153572
},
{
"acc": 0.8201951,
"epoch": 4.735213204951857,
"grad_norm": 2.3680715560913086,
"learning_rate": 7.748111776203488e-07,
"loss": 0.5941371,
"memory(GiB)": 67.62,
"step": 6885,
"train_speed(iter/s)": 0.153396
},
{
"acc": 0.83601265,
"epoch": 4.738651994497936,
"grad_norm": 2.2949132919311523,
"learning_rate": 7.551241206098402e-07,
"loss": 0.54753556,
"memory(GiB)": 67.62,
"step": 6890,
"train_speed(iter/s)": 0.153255
},
{
"acc": 0.82891521,
"epoch": 4.7420907840440165,
"grad_norm": 2.6076362133026123,
"learning_rate": 7.356918589852512e-07,
"loss": 0.56754522,
"memory(GiB)": 67.62,
"step": 6895,
"train_speed(iter/s)": 0.153143
},
{
"acc": 0.82609663,
"epoch": 4.745529573590097,
"grad_norm": 2.297222852706909,
"learning_rate": 7.165144932798456e-07,
"loss": 0.56647487,
"memory(GiB)": 67.62,
"step": 6900,
"train_speed(iter/s)": 0.153005
},
{
"epoch": 4.745529573590097,
"eval_acc": 0.7816394475187834,
"eval_loss": 0.7943344116210938,
"eval_runtime": 1104.7871,
"eval_samples_per_second": 3.877,
"eval_steps_per_second": 0.07,
"step": 6900
},
{
"acc": 0.83013258,
"epoch": 4.748968363136176,
"grad_norm": 2.427417755126953,
"learning_rate": 6.975921227081685e-07,
"loss": 0.55977812,
"memory(GiB)": 67.62,
"step": 6905,
"train_speed(iter/s)": 0.149232
},
{
"acc": 0.8199255,
"epoch": 4.752407152682256,
"grad_norm": 2.2759101390838623,
"learning_rate": 6.789248451655523e-07,
"loss": 0.58387136,
"memory(GiB)": 67.62,
"step": 6910,
"train_speed(iter/s)": 0.149131
},
{
"acc": 0.82206144,
"epoch": 4.755845942228335,
"grad_norm": 2.231541395187378,
"learning_rate": 6.605127572275894e-07,
"loss": 0.59709778,
"memory(GiB)": 67.62,
"step": 6915,
"train_speed(iter/s)": 0.149024
},
{
"acc": 0.8175106,
"epoch": 4.759284731774415,
"grad_norm": 2.4362175464630127,
"learning_rate": 6.423559541496492e-07,
"loss": 0.6127625,
"memory(GiB)": 67.62,
"step": 6920,
"train_speed(iter/s)": 0.148912
},
{
"acc": 0.83411427,
"epoch": 4.7627235213204955,
"grad_norm": 2.0732574462890625,
"learning_rate": 6.244545298663843e-07,
"loss": 0.54563398,
"memory(GiB)": 67.62,
"step": 6925,
"train_speed(iter/s)": 0.148809
},
{
"acc": 0.8238575,
"epoch": 4.766162310866575,
"grad_norm": 2.174506187438965,
"learning_rate": 6.068085769912308e-07,
"loss": 0.58828888,
"memory(GiB)": 67.62,
"step": 6930,
"train_speed(iter/s)": 0.148728
},
{
"acc": 0.82762337,
"epoch": 4.769601100412655,
"grad_norm": 2.551449775695801,
"learning_rate": 5.894181868159313e-07,
"loss": 0.57614126,
"memory(GiB)": 67.62,
"step": 6935,
"train_speed(iter/s)": 0.148607
},
{
"acc": 0.82847862,
"epoch": 4.773039889958735,
"grad_norm": 2.242396354675293,
"learning_rate": 5.722834493100845e-07,
"loss": 0.58625593,
"memory(GiB)": 67.62,
"step": 6940,
"train_speed(iter/s)": 0.148523
},
{
"acc": 0.83427067,
"epoch": 4.776478679504814,
"grad_norm": 2.2920279502868652,
"learning_rate": 5.554044531206463e-07,
"loss": 0.55577106,
"memory(GiB)": 67.62,
"step": 6945,
"train_speed(iter/s)": 0.148434
},
{
"acc": 0.82505064,
"epoch": 4.779917469050894,
"grad_norm": 2.4490933418273926,
"learning_rate": 5.387812855715081e-07,
"loss": 0.57476597,
"memory(GiB)": 67.62,
"step": 6950,
"train_speed(iter/s)": 0.148301
},
{
"acc": 0.82180548,
"epoch": 4.7833562585969736,
"grad_norm": 2.4874212741851807,
"learning_rate": 5.224140326630133e-07,
"loss": 0.59430389,
"memory(GiB)": 67.62,
"step": 6955,
"train_speed(iter/s)": 0.148156
},
{
"acc": 0.81490593,
"epoch": 4.786795048143054,
"grad_norm": 2.1581063270568848,
"learning_rate": 5.063027790715248e-07,
"loss": 0.60423484,
"memory(GiB)": 67.62,
"step": 6960,
"train_speed(iter/s)": 0.148057
},
{
"acc": 0.82663193,
"epoch": 4.790233837689134,
"grad_norm": 2.1210756301879883,
"learning_rate": 4.904476081489975e-07,
"loss": 0.56228495,
"memory(GiB)": 67.62,
"step": 6965,
"train_speed(iter/s)": 0.147956
},
{
"acc": 0.83753424,
"epoch": 4.793672627235213,
"grad_norm": 2.065978527069092,
"learning_rate": 4.7484860192252317e-07,
"loss": 0.53960943,
"memory(GiB)": 67.62,
"step": 6970,
"train_speed(iter/s)": 0.147817
},
{
"acc": 0.83375235,
"epoch": 4.797111416781293,
"grad_norm": 2.488433837890625,
"learning_rate": 4.595058410939305e-07,
"loss": 0.55561361,
"memory(GiB)": 67.62,
"step": 6975,
"train_speed(iter/s)": 0.147709
},
{
"acc": 0.82256441,
"epoch": 4.800550206327372,
"grad_norm": 2.134580135345459,
"learning_rate": 4.4441940503934173e-07,
"loss": 0.59003773,
"memory(GiB)": 67.62,
"step": 6980,
"train_speed(iter/s)": 0.147603
},
{
"acc": 0.82549543,
"epoch": 4.8039889958734525,
"grad_norm": 2.2374000549316406,
"learning_rate": 4.295893718088e-07,
"loss": 0.57104192,
"memory(GiB)": 67.62,
"step": 6985,
"train_speed(iter/s)": 0.147515
},
{
"acc": 0.83225937,
"epoch": 4.807427785419533,
"grad_norm": 1.948536992073059,
"learning_rate": 4.150158181258259e-07,
"loss": 0.55912457,
"memory(GiB)": 67.62,
"step": 6990,
"train_speed(iter/s)": 0.147431
},
{
"acc": 0.81516037,
"epoch": 4.810866574965612,
"grad_norm": 2.495556354522705,
"learning_rate": 4.0069881938703406e-07,
"loss": 0.59933119,
"memory(GiB)": 67.62,
"step": 6995,
"train_speed(iter/s)": 0.147319
},
{
"acc": 0.82695866,
"epoch": 4.814305364511692,
"grad_norm": 2.299910545349121,
"learning_rate": 3.866384496617616e-07,
"loss": 0.58013859,
"memory(GiB)": 67.62,
"step": 7000,
"train_speed(iter/s)": 0.147213
},
{
"epoch": 4.814305364511692,
"eval_acc": 0.7819543798083413,
"eval_loss": 0.793637216091156,
"eval_runtime": 1150.3821,
"eval_samples_per_second": 3.723,
"eval_steps_per_second": 0.067,
"step": 7000
},
{
"acc": 0.82657938,
"epoch": 4.817744154057772,
"grad_norm": 2.2365365028381348,
"learning_rate": 3.7283478169165165e-07,
"loss": 0.59894753,
"memory(GiB)": 67.62,
"step": 7005,
"train_speed(iter/s)": 0.143635
},
{
"acc": 0.82827587,
"epoch": 4.821182943603851,
"grad_norm": 2.287341833114624,
"learning_rate": 3.592878868903036e-07,
"loss": 0.56538892,
"memory(GiB)": 67.62,
"step": 7010,
"train_speed(iter/s)": 0.143557
},
{
"acc": 0.82892952,
"epoch": 4.8246217331499315,
"grad_norm": 2.37528920173645,
"learning_rate": 3.459978353429071e-07,
"loss": 0.56618586,
"memory(GiB)": 67.62,
"step": 7015,
"train_speed(iter/s)": 0.143484
},
{
"acc": 0.82298727,
"epoch": 4.828060522696011,
"grad_norm": 2.1354215145111084,
"learning_rate": 3.3296469580584186e-07,
"loss": 0.58705649,
"memory(GiB)": 67.62,
"step": 7020,
"train_speed(iter/s)": 0.143374
},
{
"acc": 0.81914625,
"epoch": 4.831499312242091,
"grad_norm": 2.2157156467437744,
"learning_rate": 3.201885357063674e-07,
"loss": 0.60606232,
"memory(GiB)": 67.62,
"step": 7025,
"train_speed(iter/s)": 0.143299
},
{
"acc": 0.82481365,
"epoch": 4.83493810178817,
"grad_norm": 2.350295066833496,
"learning_rate": 3.076694211422452e-07,
"loss": 0.59341784,
"memory(GiB)": 67.62,
"step": 7030,
"train_speed(iter/s)": 0.14321
},
{
"acc": 0.8327177,
"epoch": 4.83837689133425,
"grad_norm": 2.483370304107666,
"learning_rate": 2.954074168814115e-07,
"loss": 0.57141585,
"memory(GiB)": 67.62,
"step": 7035,
"train_speed(iter/s)": 0.1431
},
{
"acc": 0.82273092,
"epoch": 4.84181568088033,
"grad_norm": 2.237597942352295,
"learning_rate": 2.8340258636162734e-07,
"loss": 0.59980655,
"memory(GiB)": 67.62,
"step": 7040,
"train_speed(iter/s)": 0.142991
},
{
"acc": 0.81792231,
"epoch": 4.8452544704264096,
"grad_norm": 2.165174961090088,
"learning_rate": 2.716549916901624e-07,
"loss": 0.59414587,
"memory(GiB)": 67.62,
"step": 7045,
"train_speed(iter/s)": 0.142869
},
{
"acc": 0.83234692,
"epoch": 4.84869325997249,
"grad_norm": 2.2582786083221436,
"learning_rate": 2.601646936434731e-07,
"loss": 0.56242762,
"memory(GiB)": 67.62,
"step": 7050,
"train_speed(iter/s)": 0.142752
},
{
"acc": 0.82207642,
"epoch": 4.85213204951857,
"grad_norm": 2.386744737625122,
"learning_rate": 2.4893175166689693e-07,
"loss": 0.5899931,
"memory(GiB)": 67.62,
"step": 7055,
"train_speed(iter/s)": 0.142651
},
{
"acc": 0.83866978,
"epoch": 4.855570839064649,
"grad_norm": 2.4052698612213135,
"learning_rate": 2.3795622387430887e-07,
"loss": 0.52610168,
"memory(GiB)": 67.62,
"step": 7060,
"train_speed(iter/s)": 0.14255
},
{
"acc": 0.81752338,
"epoch": 4.859009628610729,
"grad_norm": 2.0953776836395264,
"learning_rate": 2.272381670478657e-07,
"loss": 0.60933762,
"memory(GiB)": 67.62,
"step": 7065,
"train_speed(iter/s)": 0.142448
},
{
"acc": 0.82258358,
"epoch": 4.862448418156809,
"grad_norm": 2.4188003540039062,
"learning_rate": 2.1677763663768406e-07,
"loss": 0.58760223,
"memory(GiB)": 67.62,
"step": 7070,
"train_speed(iter/s)": 0.142309
},
{
"acc": 0.82635889,
"epoch": 4.8658872077028885,
"grad_norm": 2.2215888500213623,
"learning_rate": 2.0657468676155762e-07,
"loss": 0.58528147,
"memory(GiB)": 67.62,
"step": 7075,
"train_speed(iter/s)": 0.142201
},
{
"acc": 0.84418049,
"epoch": 4.869325997248969,
"grad_norm": 2.0003366470336914,
"learning_rate": 1.9662937020469589e-07,
"loss": 0.51888628,
"memory(GiB)": 67.62,
"step": 7080,
"train_speed(iter/s)": 0.142127
},
{
"acc": 0.83303547,
"epoch": 4.872764786795048,
"grad_norm": 2.1851377487182617,
"learning_rate": 1.8694173841941928e-07,
"loss": 0.55756779,
"memory(GiB)": 67.62,
"step": 7085,
"train_speed(iter/s)": 0.142023
},
{
"acc": 0.82351046,
"epoch": 4.876203576341128,
"grad_norm": 2.3260505199432373,
"learning_rate": 1.775118415249201e-07,
"loss": 0.58764186,
"memory(GiB)": 67.62,
"step": 7090,
"train_speed(iter/s)": 0.141935
},
{
"acc": 0.82887058,
"epoch": 4.879642365887207,
"grad_norm": 2.2045719623565674,
"learning_rate": 1.6833972830699635e-07,
"loss": 0.56427956,
"memory(GiB)": 67.62,
"step": 7095,
"train_speed(iter/s)": 0.141825
},
{
"acc": 0.81958294,
"epoch": 4.883081155433287,
"grad_norm": 2.245159149169922,
"learning_rate": 1.5942544621777965e-07,
"loss": 0.60630999,
"memory(GiB)": 67.62,
"step": 7100,
"train_speed(iter/s)": 0.141715
},
{
"epoch": 4.883081155433287,
"eval_acc": 0.7818137850362172,
"eval_loss": 0.7931898832321167,
"eval_runtime": 1152.8663,
"eval_samples_per_second": 3.715,
"eval_steps_per_second": 0.067,
"step": 7100
},
{
"acc": 0.83190765,
"epoch": 4.8865199449793675,
"grad_norm": 2.2760040760040283,
"learning_rate": 1.507690413755244e-07,
"loss": 0.56932721,
"memory(GiB)": 67.62,
"step": 7105,
"train_speed(iter/s)": 0.138438
},
{
"acc": 0.82073574,
"epoch": 4.889958734525447,
"grad_norm": 2.2572543621063232,
"learning_rate": 1.423705585643412e-07,
"loss": 0.59770269,
"memory(GiB)": 67.62,
"step": 7110,
"train_speed(iter/s)": 0.138349
},
{
"acc": 0.82008648,
"epoch": 4.893397524071527,
"grad_norm": 2.431645631790161,
"learning_rate": 1.342300412339805e-07,
"loss": 0.60884895,
"memory(GiB)": 67.62,
"step": 7115,
"train_speed(iter/s)": 0.138261
},
{
"acc": 0.83563404,
"epoch": 4.896836313617607,
"grad_norm": 2.210167646408081,
"learning_rate": 1.2634753149959394e-07,
"loss": 0.55552473,
"memory(GiB)": 67.62,
"step": 7120,
"train_speed(iter/s)": 0.138175
},
{
"acc": 0.83866234,
"epoch": 4.900275103163686,
"grad_norm": 2.1584184169769287,
"learning_rate": 1.1872307014153448e-07,
"loss": 0.5373682,
"memory(GiB)": 67.62,
"step": 7125,
"train_speed(iter/s)": 0.138058
},
{
"acc": 0.83324118,
"epoch": 4.903713892709766,
"grad_norm": 2.51465106010437,
"learning_rate": 1.1135669660512879e-07,
"loss": 0.54701567,
"memory(GiB)": 67.62,
"step": 7130,
"train_speed(iter/s)": 0.137984
},
{
"acc": 0.84189644,
"epoch": 4.9071526822558456,
"grad_norm": 2.2430858612060547,
"learning_rate": 1.0424844900048863e-07,
"loss": 0.52747626,
"memory(GiB)": 67.62,
"step": 7135,
"train_speed(iter/s)": 0.137875
},
{
"acc": 0.82826939,
"epoch": 4.910591471801926,
"grad_norm": 2.468977451324463,
"learning_rate": 9.739836410229431e-08,
"loss": 0.56382651,
"memory(GiB)": 67.62,
"step": 7140,
"train_speed(iter/s)": 0.137804
},
{
"acc": 0.82850714,
"epoch": 4.914030261348006,
"grad_norm": 2.1959378719329834,
"learning_rate": 9.080647734961705e-08,
"loss": 0.5642982,
"memory(GiB)": 67.62,
"step": 7145,
"train_speed(iter/s)": 0.137706
},
{
"acc": 0.80990505,
"epoch": 4.917469050894085,
"grad_norm": 2.1937224864959717,
"learning_rate": 8.447282284574144e-08,
"loss": 0.64270401,
"memory(GiB)": 67.62,
"step": 7150,
"train_speed(iter/s)": 0.137623
},
{
"acc": 0.83124857,
"epoch": 4.920907840440165,
"grad_norm": 2.274343967437744,
"learning_rate": 7.839743335798222e-08,
"loss": 0.58021183,
"memory(GiB)": 67.62,
"step": 7155,
"train_speed(iter/s)": 0.137544
},
{
"acc": 0.83879738,
"epoch": 4.924346629986244,
"grad_norm": 2.0642943382263184,
"learning_rate": 7.258034031750108e-08,
"loss": 0.55038834,
"memory(GiB)": 67.62,
"step": 7160,
"train_speed(iter/s)": 0.137462
},
{
"acc": 0.82772274,
"epoch": 4.9277854195323245,
"grad_norm": 2.236903429031372,
"learning_rate": 6.702157381916804e-08,
"loss": 0.57812862,
"memory(GiB)": 67.62,
"step": 7165,
"train_speed(iter/s)": 0.137368
},
{
"acc": 0.81524467,
"epoch": 4.931224209078405,
"grad_norm": 2.3664135932922363,
"learning_rate": 6.172116262139473e-08,
"loss": 0.59173594,
"memory(GiB)": 67.62,
"step": 7170,
"train_speed(iter/s)": 0.137273
},
{
"acc": 0.84409065,
"epoch": 4.934662998624484,
"grad_norm": 1.8912343978881836,
"learning_rate": 5.66791341459791e-08,
"loss": 0.51706591,
"memory(GiB)": 67.62,
"step": 7175,
"train_speed(iter/s)": 0.13718
},
{
"acc": 0.82883434,
"epoch": 4.938101788170564,
"grad_norm": 2.3647637367248535,
"learning_rate": 5.189551447797223e-08,
"loss": 0.57346845,
"memory(GiB)": 67.62,
"step": 7180,
"train_speed(iter/s)": 0.137112
},
{
"acc": 0.82722406,
"epoch": 4.941540577716644,
"grad_norm": 2.390969753265381,
"learning_rate": 4.7370328365550553e-08,
"loss": 0.58734665,
"memory(GiB)": 67.62,
"step": 7185,
"train_speed(iter/s)": 0.137029
},
{
"acc": 0.83363981,
"epoch": 4.944979367262723,
"grad_norm": 2.3286654949188232,
"learning_rate": 4.3103599219855e-08,
"loss": 0.55134306,
"memory(GiB)": 67.62,
"step": 7190,
"train_speed(iter/s)": 0.13694
},
{
"acc": 0.8398654,
"epoch": 4.9484181568088035,
"grad_norm": 1.95890474319458,
"learning_rate": 3.909534911492433e-08,
"loss": 0.52122355,
"memory(GiB)": 67.62,
"step": 7195,
"train_speed(iter/s)": 0.136882
},
{
"acc": 0.82601204,
"epoch": 4.951856946354883,
"grad_norm": 2.7101948261260986,
"learning_rate": 3.534559878752308e-08,
"loss": 0.58264699,
"memory(GiB)": 67.62,
"step": 7200,
"train_speed(iter/s)": 0.13679
},
{
"epoch": 4.951856946354883,
"eval_acc": 0.7817575471273677,
"eval_loss": 0.7930530309677124,
"eval_runtime": 1110.3461,
"eval_samples_per_second": 3.857,
"eval_steps_per_second": 0.069,
"step": 7200
},
{
"acc": 0.83350286,
"epoch": 4.955295735900963,
"grad_norm": 2.281674861907959,
"learning_rate": 3.185436763708053e-08,
"loss": 0.55087848,
"memory(GiB)": 67.62,
"step": 7205,
"train_speed(iter/s)": 0.13388
},
{
"acc": 0.83256226,
"epoch": 4.958734525447043,
"grad_norm": 2.406829357147217,
"learning_rate": 2.862167372556297e-08,
"loss": 0.55789819,
"memory(GiB)": 67.62,
"step": 7210,
"train_speed(iter/s)": 0.133821
},
{
"acc": 0.81454487,
"epoch": 4.962173314993122,
"grad_norm": 2.2203316688537598,
"learning_rate": 2.564753377737945e-08,
"loss": 0.60484362,
"memory(GiB)": 67.62,
"step": 7215,
"train_speed(iter/s)": 0.133748
},
{
"acc": 0.82130527,
"epoch": 4.965612104539202,
"grad_norm": 2.130246162414551,
"learning_rate": 2.2931963179320628e-08,
"loss": 0.59843221,
"memory(GiB)": 67.62,
"step": 7220,
"train_speed(iter/s)": 0.133637
},
{
"acc": 0.83102131,
"epoch": 4.9690508940852816,
"grad_norm": 2.521017551422119,
"learning_rate": 2.04749759804478e-08,
"loss": 0.55911312,
"memory(GiB)": 67.62,
"step": 7225,
"train_speed(iter/s)": 0.133538
},
{
"acc": 0.81951447,
"epoch": 4.972489683631362,
"grad_norm": 2.495345115661621,
"learning_rate": 1.8276584892048502e-08,
"loss": 0.59946508,
"memory(GiB)": 67.62,
"step": 7230,
"train_speed(iter/s)": 0.133454
},
{
"acc": 0.83500395,
"epoch": 4.975928473177442,
"grad_norm": 2.169851541519165,
"learning_rate": 1.6336801287547673e-08,
"loss": 0.55714474,
"memory(GiB)": 67.62,
"step": 7235,
"train_speed(iter/s)": 0.133371
},
{
"acc": 0.83105001,
"epoch": 4.979367262723521,
"grad_norm": 1.9003541469573975,
"learning_rate": 1.4655635202457724e-08,
"loss": 0.56020293,
"memory(GiB)": 67.62,
"step": 7240,
"train_speed(iter/s)": 0.133301
},
{
"acc": 0.82036457,
"epoch": 4.982806052269601,
"grad_norm": 2.2826859951019287,
"learning_rate": 1.3233095334339681e-08,
"loss": 0.5854476,
"memory(GiB)": 67.62,
"step": 7245,
"train_speed(iter/s)": 0.133207
},
{
"acc": 0.82185326,
"epoch": 4.986244841815681,
"grad_norm": 2.5508041381835938,
"learning_rate": 1.2069189042725465e-08,
"loss": 0.58682165,
"memory(GiB)": 67.62,
"step": 7250,
"train_speed(iter/s)": 0.133133
},
{
"acc": 0.83299255,
"epoch": 4.9896836313617605,
"grad_norm": 2.2958316802978516,
"learning_rate": 1.1163922349123454e-08,
"loss": 0.54637289,
"memory(GiB)": 67.62,
"step": 7255,
"train_speed(iter/s)": 0.133052
},
{
"acc": 0.81462727,
"epoch": 4.993122420907841,
"grad_norm": 2.2949371337890625,
"learning_rate": 1.051729993694077e-08,
"loss": 0.60125666,
"memory(GiB)": 67.62,
"step": 7260,
"train_speed(iter/s)": 0.132965
},
{
"acc": 0.83669167,
"epoch": 4.99656121045392,
"grad_norm": 2.335374593734741,
"learning_rate": 1.0129325151499931e-08,
"loss": 0.51913919,
"memory(GiB)": 67.62,
"step": 7265,
"train_speed(iter/s)": 0.132889
},
{
"acc": 0.83688688,
"epoch": 5.0,
"grad_norm": 2.2776167392730713,
"learning_rate": 1e-08,
"loss": 0.54480848,
"memory(GiB)": 67.62,
"step": 7270,
"train_speed(iter/s)": 0.132804
},
{
"epoch": 5.0,
"eval_acc": 0.781712556800288,
"eval_loss": 0.793134868144989,
"eval_runtime": 1106.3573,
"eval_samples_per_second": 3.871,
"eval_steps_per_second": 0.07,
"step": 7270
}
],
"logging_steps": 5,
"max_steps": 7270,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.939618530083786e+19,
"train_batch_size": 14,
"trial_name": null,
"trial_params": null
}