{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 38553, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 5.181347150259067e-07, "loss": 1.6299, "step": 20, "vit_lr": 1.0362694300518134e-07 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 1.0362694300518134e-06, "loss": 1.4909, "step": 40, "vit_lr": 2.0725388601036267e-07 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 1.5544041450777204e-06, "loss": 1.2831, "step": 60, "vit_lr": 3.1088082901554404e-07 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 2.072538860103627e-06, "loss": 1.1203, "step": 80, "vit_lr": 4.1450777202072535e-07 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 2.5906735751295338e-06, "loss": 1.0687, "step": 100, "vit_lr": 5.181347150259067e-07 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 3.1088082901554407e-06, "loss": 1.0234, "step": 120, "vit_lr": 6.217616580310881e-07 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 3.6269430051813476e-06, "loss": 1.0083, "step": 140, "vit_lr": 7.253886010362694e-07 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 4.145077720207254e-06, "loss": 0.9992, "step": 160, "vit_lr": 8.290155440414507e-07 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 4.663212435233161e-06, "loss": 0.9716, "step": 180, "vit_lr": 9.326424870466321e-07 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 5.1813471502590676e-06, "loss": 0.9793, "step": 200, "vit_lr": 1.0362694300518134e-06 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 5.699481865284975e-06, "loss": 0.9656, "step": 220, "vit_lr": 1.139896373056995e-06 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 6.217616580310881e-06, "loss": 0.9689, "step": 240, "vit_lr": 1.2435233160621762e-06 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 6.735751295336788e-06, "loss": 0.9411, "step": 260, "vit_lr": 1.3471502590673574e-06 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 7.253886010362695e-06, "loss": 0.9614, "step": 280, "vit_lr": 1.4507772020725389e-06 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 7.772020725388602e-06, "loss": 0.9435, "step": 300, "vit_lr": 1.5544041450777201e-06 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 8.290155440414507e-06, "loss": 0.9414, "step": 320, "vit_lr": 1.6580310880829014e-06 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 8.808290155440415e-06, "loss": 0.9312, "step": 340, "vit_lr": 1.7616580310880829e-06 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 9.326424870466322e-06, "loss": 0.9321, "step": 360, "vit_lr": 1.8652849740932641e-06 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 9.844559585492228e-06, "loss": 0.909, "step": 380, "vit_lr": 1.9689119170984454e-06 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 9.999996680140323e-06, "loss": 0.9261, "step": 400, "vit_lr": 1.9999993360280644e-06 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 9.999980419613738e-06, "loss": 0.9092, "step": 420, "vit_lr": 1.9999960839227473e-06 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 9.999950608694114e-06, "loss": 0.9067, "step": 440, "vit_lr": 1.9999901217388228e-06 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.999907247462242e-06, "loss": 0.9253, "step": 460, "vit_lr": 1.999981449492448e-06 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.999850336035633e-06, "loss": 0.9183, "step": 480, "vit_lr": 1.999970067207126e-06 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.999779874568523e-06, "loss": 0.9041, "step": 500, "vit_lr": 1.9999559749137043e-06 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.999695863251868e-06, "loss": 0.9082, "step": 520, "vit_lr": 1.9999391726503734e-06 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.99959830231335e-06, "loss": 0.9104, "step": 540, "vit_lr": 1.9999196604626696e-06 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 9.99948719201736e-06, "loss": 0.8835, "step": 560, "vit_lr": 1.9998974384034718e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.999362532665025e-06, "loss": 0.8812, "step": 580, "vit_lr": 1.9998725065330045e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.999224324594179e-06, "loss": 0.8997, "step": 600, "vit_lr": 1.9998448649188355e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.999072568179378e-06, "loss": 0.8913, "step": 620, "vit_lr": 1.999814513635875e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.998907263831895e-06, "loss": 0.8841, "step": 640, "vit_lr": 1.999781452766379e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.998728411999724e-06, "loss": 0.8868, "step": 660, "vit_lr": 1.9997456823999444e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.998536013167564e-06, "loss": 0.8964, "step": 680, "vit_lr": 1.9997072026335126e-06 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 9.998330067856834e-06, "loss": 0.8807, "step": 700, "vit_lr": 1.9996660135713664e-06 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 9.998110576625665e-06, "loss": 0.8953, "step": 720, "vit_lr": 1.999622115325133e-06 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 9.997877540068902e-06, "loss": 0.8928, "step": 740, "vit_lr": 1.99957550801378e-06 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 9.997630958818087e-06, "loss": 0.8706, "step": 760, "vit_lr": 1.999526191763617e-06 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 9.997370833541484e-06, "loss": 0.871, "step": 780, "vit_lr": 1.9994741667082968e-06 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 9.997097164944052e-06, "loss": 0.8791, "step": 800, "vit_lr": 1.9994194329888102e-06 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 9.996809953767457e-06, "loss": 0.8741, "step": 820, "vit_lr": 1.999361990753491e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.99650920079007e-06, "loss": 0.8865, "step": 840, "vit_lr": 1.9993018401580135e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.996194906826954e-06, "loss": 0.8506, "step": 860, "vit_lr": 1.9992389813653906e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.995867072729878e-06, "loss": 0.8918, "step": 880, "vit_lr": 1.999173414545975e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.995525699387298e-06, "loss": 0.8659, "step": 900, "vit_lr": 1.9991051398774594e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.995170787724371e-06, "loss": 0.8722, "step": 920, "vit_lr": 1.999034157544874e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.994802338702934e-06, "loss": 0.8756, "step": 940, "vit_lr": 1.9989604677405864e-06 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.994420353321518e-06, "loss": 0.8723, "step": 960, "vit_lr": 1.9988840706643034e-06 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 9.994024832615342e-06, "loss": 0.871, "step": 980, "vit_lr": 1.998804966523068e-06 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 9.993615777656297e-06, "loss": 0.8787, "step": 1000, "vit_lr": 1.9987231555312593e-06 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 9.993193189552962e-06, "loss": 0.8649, "step": 1020, "vit_lr": 1.998638637910592e-06 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 9.992757069450584e-06, "loss": 0.8456, "step": 1040, "vit_lr": 1.9985514138901165e-06 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 9.99230741853109e-06, "loss": 0.8655, "step": 1060, "vit_lr": 1.998461483706218e-06 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 9.991844238013077e-06, "loss": 0.8541, "step": 1080, "vit_lr": 1.998368847602615e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.991367529151801e-06, "loss": 0.8503, "step": 1100, "vit_lr": 1.99827350583036e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.990877293239188e-06, "loss": 0.8847, "step": 1120, "vit_lr": 1.9981754586478374e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.99037353160382e-06, "loss": 0.8435, "step": 1140, "vit_lr": 1.998074706320764e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.989856245610939e-06, "loss": 0.8597, "step": 1160, "vit_lr": 1.9979712491221874e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.98932543666243e-06, "loss": 0.8644, "step": 1180, "vit_lr": 1.9978650873324855e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.988781106196838e-06, "loss": 0.8683, "step": 1200, "vit_lr": 1.9977562212393673e-06 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 9.988223255689345e-06, "loss": 0.8486, "step": 1220, "vit_lr": 1.9976446511378686e-06 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 9.987651886651775e-06, "loss": 0.8566, "step": 1240, "vit_lr": 1.9975303773303546e-06 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 9.98706700063259e-06, "loss": 0.8523, "step": 1260, "vit_lr": 1.997413400126518e-06 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 9.986468599216885e-06, "loss": 0.8513, "step": 1280, "vit_lr": 1.997293719843377e-06 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 9.985856684026381e-06, "loss": 0.846, "step": 1300, "vit_lr": 1.997171336805276e-06 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 9.985231256719419e-06, "loss": 0.8512, "step": 1320, "vit_lr": 1.9970462513438834e-06 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 9.984592318990964e-06, "loss": 0.873, "step": 1340, "vit_lr": 1.9969184637981927e-06 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 9.983939872572596e-06, "loss": 0.8459, "step": 1360, "vit_lr": 1.996787974514519e-06 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 9.983273919232505e-06, "loss": 0.848, "step": 1380, "vit_lr": 1.996654783846501e-06 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 9.98259446077548e-06, "loss": 0.8653, "step": 1400, "vit_lr": 1.9965188921550953e-06 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 9.981901499042915e-06, "loss": 0.8691, "step": 1420, "vit_lr": 1.9963802998085826e-06 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 9.981195035912798e-06, "loss": 0.8392, "step": 1440, "vit_lr": 1.996239007182559e-06 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 9.980475073299707e-06, "loss": 0.8498, "step": 1460, "vit_lr": 1.9960950146599413e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.979741613154807e-06, "loss": 0.8571, "step": 1480, "vit_lr": 1.995948322630961e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.97899465746584e-06, "loss": 0.8655, "step": 1500, "vit_lr": 1.9957989314931676e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.978234208257121e-06, "loss": 0.8386, "step": 1520, "vit_lr": 1.995646841651424e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.977460267589534e-06, "loss": 0.8551, "step": 1540, "vit_lr": 1.9954920535179065e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.976672837560531e-06, "loss": 0.8614, "step": 1560, "vit_lr": 1.995334567512106e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.975871920304114e-06, "loss": 0.837, "step": 1580, "vit_lr": 1.9951743840608227e-06 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 9.975057517990843e-06, "loss": 0.8546, "step": 1600, "vit_lr": 1.995011503598168e-06 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 9.974229632827818e-06, "loss": 0.8462, "step": 1620, "vit_lr": 1.9948459265655634e-06 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 9.973388267058686e-06, "loss": 0.8478, "step": 1640, "vit_lr": 1.994677653411737e-06 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 9.972533422963619e-06, "loss": 0.8415, "step": 1660, "vit_lr": 1.994506684592724e-06 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 9.971665102859323e-06, "loss": 0.852, "step": 1680, "vit_lr": 1.994333020571864e-06 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 9.970783309099024e-06, "loss": 0.8588, "step": 1700, "vit_lr": 1.9941566618198044e-06 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 9.969888044072457e-06, "loss": 0.8321, "step": 1720, "vit_lr": 1.993977608814491e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.968979310205876e-06, "loss": 0.8428, "step": 1740, "vit_lr": 1.993795862041175e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.968057109962028e-06, "loss": 0.8432, "step": 1760, "vit_lr": 1.9936114219924055e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.967121445840158e-06, "loss": 0.8443, "step": 1780, "vit_lr": 1.993424289168031e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.966172320375999e-06, "loss": 0.8373, "step": 1800, "vit_lr": 1.9932344640751995e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.965209736141766e-06, "loss": 0.8466, "step": 1820, "vit_lr": 1.993041947228353e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.964233695746148e-06, "loss": 0.8503, "step": 1840, "vit_lr": 1.9928467391492295e-06 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 9.963244201834302e-06, "loss": 0.8279, "step": 1860, "vit_lr": 1.99264884036686e-06 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 9.962241257087845e-06, "loss": 0.8452, "step": 1880, "vit_lr": 1.992448251417569e-06 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 9.961224864224846e-06, "loss": 0.8438, "step": 1900, "vit_lr": 1.992244972844969e-06 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 9.960195025999822e-06, "loss": 0.8524, "step": 1920, "vit_lr": 1.9920390051999642e-06 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 9.959151745203724e-06, "loss": 0.8363, "step": 1940, "vit_lr": 1.9918303490407444e-06 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 9.958095024663936e-06, "loss": 0.8174, "step": 1960, "vit_lr": 1.991619004932787e-06 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 9.957024867244267e-06, "loss": 0.8336, "step": 1980, "vit_lr": 1.991404973448853e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.955941275844939e-06, "loss": 0.8331, "step": 2000, "vit_lr": 1.9911882551689877e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.95484425340258e-06, "loss": 0.8365, "step": 2020, "vit_lr": 1.990968850680516e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.95373380289022e-06, "loss": 0.852, "step": 2040, "vit_lr": 1.9907467605780437e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.952609927317278e-06, "loss": 0.834, "step": 2060, "vit_lr": 1.990521985463455e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.951472629729556e-06, "loss": 0.8363, "step": 2080, "vit_lr": 1.990294525945911e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.950321913209234e-06, "loss": 0.8275, "step": 2100, "vit_lr": 1.990064382641847e-06 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 9.949157780874856e-06, "loss": 0.8211, "step": 2120, "vit_lr": 1.989831556174971e-06 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 9.947980235881324e-06, "loss": 0.8248, "step": 2140, "vit_lr": 1.9895960471762647e-06 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 9.946789281419891e-06, "loss": 0.8402, "step": 2160, "vit_lr": 1.989357856283978e-06 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 9.945584920718147e-06, "loss": 0.8333, "step": 2180, "vit_lr": 1.989116984143629e-06 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 9.94436715704002e-06, "loss": 0.8322, "step": 2200, "vit_lr": 1.988873431408004e-06 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 9.943135993685759e-06, "loss": 0.8504, "step": 2220, "vit_lr": 1.9886271987371513e-06 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 9.941891433991924e-06, "loss": 0.8367, "step": 2240, "vit_lr": 1.9883782867983846e-06 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 9.940633481331381e-06, "loss": 0.8287, "step": 2260, "vit_lr": 1.9881266962662757e-06 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 9.9393621391133e-06, "loss": 0.829, "step": 2280, "vit_lr": 1.9878724278226597e-06 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 9.938077410783125e-06, "loss": 0.8246, "step": 2300, "vit_lr": 1.9876154821566247e-06 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 9.936779299822589e-06, "loss": 0.8303, "step": 2320, "vit_lr": 1.9873558599645176e-06 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 9.935467809749689e-06, "loss": 0.8295, "step": 2340, "vit_lr": 1.9870935619499375e-06 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 9.934142944118676e-06, "loss": 0.8172, "step": 2360, "vit_lr": 1.986828588823735e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.932804706520058e-06, "loss": 0.8403, "step": 2380, "vit_lr": 1.9865609413040114e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.931453100580576e-06, "loss": 0.8436, "step": 2400, "vit_lr": 1.986290620116115e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.930088129963203e-06, "loss": 0.8299, "step": 2420, "vit_lr": 1.9860176259926405e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.928709798367133e-06, "loss": 0.8259, "step": 2440, "vit_lr": 1.9857419596734265e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.927318109527765e-06, "loss": 0.829, "step": 2460, "vit_lr": 1.9854636219055527e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.925913067216701e-06, "loss": 0.8269, "step": 2480, "vit_lr": 1.98518261344334e-06 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 9.924494675241731e-06, "loss": 0.8315, "step": 2500, "vit_lr": 1.984898935048346e-06 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 9.923062937446826e-06, "loss": 0.8311, "step": 2520, "vit_lr": 1.984612587489365e-06 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 9.92161785771212e-06, "loss": 0.8524, "step": 2540, "vit_lr": 1.9843235715424237e-06 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 9.920159439953907e-06, "loss": 0.8394, "step": 2560, "vit_lr": 1.9840318879907813e-06 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 9.918687688124633e-06, "loss": 0.8252, "step": 2580, "vit_lr": 1.9837375376249264e-06 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 9.917202606212876e-06, "loss": 0.8361, "step": 2600, "vit_lr": 1.983440521242575e-06 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 9.915704198243338e-06, "loss": 0.8362, "step": 2620, "vit_lr": 1.9831408396486677e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.91419246827684e-06, "loss": 0.818, "step": 2640, "vit_lr": 1.9828384936553677e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.912667420410306e-06, "loss": 0.8258, "step": 2660, "vit_lr": 1.982533484082061e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.91112905877675e-06, "loss": 0.8348, "step": 2680, "vit_lr": 1.9822258117553497e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.909577387545271e-06, "loss": 0.8307, "step": 2700, "vit_lr": 1.981915477509054e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.908012410921035e-06, "loss": 0.8313, "step": 2720, "vit_lr": 1.9816024821842065e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.906434133145268e-06, "loss": 0.834, "step": 2740, "vit_lr": 1.9812868266290535e-06 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 9.904842558495245e-06, "loss": 0.821, "step": 2760, "vit_lr": 1.9809685116990486e-06 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 9.903237691284274e-06, "loss": 0.8235, "step": 2780, "vit_lr": 1.9806475382568547e-06 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 9.90161953586169e-06, "loss": 0.8153, "step": 2800, "vit_lr": 1.9803239071723377e-06 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 9.899988096612838e-06, "loss": 0.8221, "step": 2820, "vit_lr": 1.979997619322567e-06 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 9.898343377959061e-06, "loss": 0.8204, "step": 2840, "vit_lr": 1.979668675591812e-06 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 9.896685384357699e-06, "loss": 0.8334, "step": 2860, "vit_lr": 1.9793370768715394e-06 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 9.895014120302056e-06, "loss": 0.815, "step": 2880, "vit_lr": 1.979002824060411e-06 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 9.893329590321411e-06, "loss": 0.8356, "step": 2900, "vit_lr": 1.9786659180642822e-06 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 9.891631798980992e-06, "loss": 0.8234, "step": 2920, "vit_lr": 1.978326359796198e-06 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 9.889920750881961e-06, "loss": 0.8253, "step": 2940, "vit_lr": 1.977984150176392e-06 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 9.888196450661413e-06, "loss": 0.8352, "step": 2960, "vit_lr": 1.9776392901322823e-06 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 9.886458902992354e-06, "loss": 0.8143, "step": 2980, "vit_lr": 1.9772917805984706e-06 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 9.884708112583697e-06, "loss": 0.8238, "step": 3000, "vit_lr": 1.976941622516739e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.882944084180235e-06, "loss": 0.8261, "step": 3020, "vit_lr": 1.976588816836047e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.881166822562646e-06, "loss": 0.8186, "step": 3040, "vit_lr": 1.9762333645125288e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.879376332547466e-06, "loss": 0.8071, "step": 3060, "vit_lr": 1.9758752665094927e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.877572618987082e-06, "loss": 0.8166, "step": 3080, "vit_lr": 1.975514523797416e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.87575568676972e-06, "loss": 0.8336, "step": 3100, "vit_lr": 1.975151137353944e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.873925540819431e-06, "loss": 0.8276, "step": 3120, "vit_lr": 1.974785108163886e-06 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 9.872082186096068e-06, "loss": 0.8074, "step": 3140, "vit_lr": 1.9744164372192133e-06 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 9.870225627595292e-06, "loss": 0.8152, "step": 3160, "vit_lr": 1.974045125519058e-06 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 9.868355870348538e-06, "loss": 0.838, "step": 3180, "vit_lr": 1.9736711740697075e-06 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 9.866472919423018e-06, "loss": 0.8308, "step": 3200, "vit_lr": 1.9732945838846033e-06 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 9.864576779921695e-06, "loss": 0.8345, "step": 3220, "vit_lr": 1.9729153559843388e-06 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 9.86266745698328e-06, "loss": 0.8246, "step": 3240, "vit_lr": 1.9725334913966556e-06 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 9.860744955782202e-06, "loss": 0.8349, "step": 3260, "vit_lr": 1.9721489911564403e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.858809281528616e-06, "loss": 0.8279, "step": 3280, "vit_lr": 1.971761856305723e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.85686043946837e-06, "loss": 0.8248, "step": 3300, "vit_lr": 1.9713720878936735e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.854898434883e-06, "loss": 0.8135, "step": 3320, "vit_lr": 1.9709796869766e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.852923273089715e-06, "loss": 0.8073, "step": 3340, "vit_lr": 1.9705846546179426e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.850934959441379e-06, "loss": 0.81, "step": 3360, "vit_lr": 1.9701869918882755e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.848933499326497e-06, "loss": 0.8171, "step": 3380, "vit_lr": 1.969786699865299e-06 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 9.846918898169207e-06, "loss": 0.8176, "step": 3400, "vit_lr": 1.969383779633841e-06 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 9.844891161429255e-06, "loss": 0.8359, "step": 3420, "vit_lr": 1.9689782322858507e-06 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 9.842850294601993e-06, "loss": 0.8145, "step": 3440, "vit_lr": 1.9685700589203984e-06 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 9.84079630321835e-06, "loss": 0.814, "step": 3460, "vit_lr": 1.9681592606436697e-06 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 9.838729192844825e-06, "loss": 0.8331, "step": 3480, "vit_lr": 1.9677458385689646e-06 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 9.836648969083473e-06, "loss": 0.8076, "step": 3500, "vit_lr": 1.967329793816694e-06 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 9.834555637571883e-06, "loss": 0.7985, "step": 3520, "vit_lr": 1.9669111275143764e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.832449203983174e-06, "loss": 0.8175, "step": 3540, "vit_lr": 1.9664898407966346e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.830329674025969e-06, "loss": 0.8375, "step": 3560, "vit_lr": 1.9660659348051938e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.82819705344438e-06, "loss": 0.8236, "step": 3580, "vit_lr": 1.9656394106888757e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.826051348018e-06, "loss": 0.8192, "step": 3600, "vit_lr": 1.9652102696035996e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.823892563561885e-06, "loss": 0.8158, "step": 3620, "vit_lr": 1.9647785127123765e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.821720705926529e-06, "loss": 0.7945, "step": 3640, "vit_lr": 1.9643441411853057e-06 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 9.819535780997864e-06, "loss": 0.8208, "step": 3660, "vit_lr": 1.9639071561995725e-06 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 9.817337794697229e-06, "loss": 0.8246, "step": 3680, "vit_lr": 1.9634675589394457e-06 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 9.815126752981365e-06, "loss": 0.8131, "step": 3700, "vit_lr": 1.9630253505962726e-06 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 9.81290266184239e-06, "loss": 0.8323, "step": 3720, "vit_lr": 1.9625805323684777e-06 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 9.81066552730779e-06, "loss": 0.8205, "step": 3740, "vit_lr": 1.962133105461558e-06 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 9.8084153554404e-06, "loss": 0.8163, "step": 3760, "vit_lr": 1.96168307108808e-06 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 9.806152152338387e-06, "loss": 0.8273, "step": 3780, "vit_lr": 1.9612304304676774e-06 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 9.803875924135234e-06, "loss": 0.8085, "step": 3800, "vit_lr": 1.9607751848270465e-06 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 9.801586676999719e-06, "loss": 0.8084, "step": 3820, "vit_lr": 1.9603173353999435e-06 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 9.799284417135907e-06, "loss": 0.8038, "step": 3840, "vit_lr": 1.959856883427181e-06 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 9.796969150783132e-06, "loss": 0.8123, "step": 3860, "vit_lr": 1.959393830156626e-06 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 9.794640884215966e-06, "loss": 0.804, "step": 3880, "vit_lr": 1.958928176843193e-06 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 9.792299623744224e-06, "loss": 0.8095, "step": 3900, "vit_lr": 1.9584599247488443e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.78994537571293e-06, "loss": 0.8045, "step": 3920, "vit_lr": 1.9579890751425857e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.787578146502303e-06, "loss": 0.8196, "step": 3940, "vit_lr": 1.9575156293004605e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.78519794252775e-06, "loss": 0.816, "step": 3960, "vit_lr": 1.9570395885055496e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.78280477023983e-06, "loss": 0.8178, "step": 3980, "vit_lr": 1.956560954047966e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.780398636124259e-06, "loss": 0.8205, "step": 4000, "vit_lr": 1.9560797272248517e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.777979546701874e-06, "loss": 0.8044, "step": 4020, "vit_lr": 1.9555959093403745e-06 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 9.775547508528622e-06, "loss": 0.8082, "step": 4040, "vit_lr": 1.9551095017057243e-06 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 9.773102528195545e-06, "loss": 0.8262, "step": 4060, "vit_lr": 1.954620505639109e-06 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 9.770644612328755e-06, "loss": 0.7949, "step": 4080, "vit_lr": 1.954128922465751e-06 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 9.768173767589426e-06, "loss": 0.8074, "step": 4100, "vit_lr": 1.953634753517885e-06 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 9.765690000673767e-06, "loss": 0.8139, "step": 4120, "vit_lr": 1.953138000134753e-06 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 9.763193318313007e-06, "loss": 0.8153, "step": 4140, "vit_lr": 1.952638663662601e-06 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 9.76068372727338e-06, "loss": 0.8047, "step": 4160, "vit_lr": 1.9521367454546755e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.758161234356098e-06, "loss": 0.8208, "step": 4180, "vit_lr": 1.9516322468712195e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.755625846397346e-06, "loss": 0.8055, "step": 4200, "vit_lr": 1.9511251692794688e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.753077570268247e-06, "loss": 0.8185, "step": 4220, "vit_lr": 1.950615514053649e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.750516412874861e-06, "loss": 0.8337, "step": 4240, "vit_lr": 1.9501032825749723e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.747942381158154e-06, "loss": 0.8152, "step": 4260, "vit_lr": 1.9495884762316305e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.745355482093977e-06, "loss": 0.813, "step": 4280, "vit_lr": 1.949071096418795e-06 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 9.742755722693059e-06, "loss": 0.806, "step": 4300, "vit_lr": 1.9485511445386114e-06 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 9.740143110000978e-06, "loss": 0.828, "step": 4320, "vit_lr": 1.9480286220001958e-06 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 9.737517651098153e-06, "loss": 0.8105, "step": 4340, "vit_lr": 1.9475035302196304e-06 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 9.734879353099807e-06, "loss": 0.816, "step": 4360, "vit_lr": 1.946975870619961e-06 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 9.732228223155964e-06, "loss": 0.8128, "step": 4380, "vit_lr": 1.946445644631193e-06 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 9.729564268451427e-06, "loss": 0.8005, "step": 4400, "vit_lr": 1.945912853690285e-06 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 9.726887496205744e-06, "loss": 0.8, "step": 4420, "vit_lr": 1.9453774992411484e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.72419791367321e-06, "loss": 0.8174, "step": 4440, "vit_lr": 1.9448395827346416e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.721495528142832e-06, "loss": 0.811, "step": 4460, "vit_lr": 1.9442991056285664e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.71878034693832e-06, "loss": 0.8019, "step": 4480, "vit_lr": 1.9437560693876634e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.716052377418053e-06, "loss": 0.8066, "step": 4500, "vit_lr": 1.9432104754836104e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.713311626975072e-06, "loss": 0.8303, "step": 4520, "vit_lr": 1.9426623253950143e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.71055810303706e-06, "loss": 0.8044, "step": 4540, "vit_lr": 1.942111620607412e-06 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 9.707791813066307e-06, "loss": 0.8192, "step": 4560, "vit_lr": 1.9415583626132613e-06 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 9.70501276455971e-06, "loss": 0.8267, "step": 4580, "vit_lr": 1.9410025529119416e-06 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 9.702220965048735e-06, "loss": 0.8124, "step": 4600, "vit_lr": 1.9404441930097466e-06 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 9.699416422099407e-06, "loss": 0.8081, "step": 4620, "vit_lr": 1.939883284419881e-06 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 9.696599143312291e-06, "loss": 0.8108, "step": 4640, "vit_lr": 1.939319828662458e-06 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 9.693769136322462e-06, "loss": 0.8012, "step": 4660, "vit_lr": 1.9387538272644923e-06 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 9.690926408799493e-06, "loss": 0.809, "step": 4680, "vit_lr": 1.9381852817598985e-06 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 9.688070968447428e-06, "loss": 0.8239, "step": 4700, "vit_lr": 1.9376141936894853e-06 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 9.685202823004764e-06, "loss": 0.8068, "step": 4720, "vit_lr": 1.9370405646009525e-06 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 9.682321980244434e-06, "loss": 0.7931, "step": 4740, "vit_lr": 1.9364643960488865e-06 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 9.679428447973777e-06, "loss": 0.8254, "step": 4760, "vit_lr": 1.9358856895947552e-06 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 9.676522234034522e-06, "loss": 0.7999, "step": 4780, "vit_lr": 1.935304446806904e-06 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 9.673603346302771e-06, "loss": 0.8128, "step": 4800, "vit_lr": 1.934720669260554e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.67067179268897e-06, "loss": 0.8047, "step": 4820, "vit_lr": 1.9341343585377937e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.667727581137888e-06, "loss": 0.8111, "step": 4840, "vit_lr": 1.9335455162275776e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.664770719628605e-06, "loss": 0.8193, "step": 4860, "vit_lr": 1.9329541439257208e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.661801216174476e-06, "loss": 0.8219, "step": 4880, "vit_lr": 1.9323602432348947e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.658819078823123e-06, "loss": 0.8145, "step": 4900, "vit_lr": 1.9317638157646243e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.655824315656402e-06, "loss": 0.8043, "step": 4920, "vit_lr": 1.93116486313128e-06 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 9.652816934790388e-06, "loss": 0.8227, "step": 4940, "vit_lr": 1.9305633869580774e-06 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 9.649796944375355e-06, "loss": 0.8046, "step": 4960, "vit_lr": 1.929959388875071e-06 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 9.646764352595744e-06, "loss": 0.8195, "step": 4980, "vit_lr": 1.9293528705191482e-06 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 9.643719167670149e-06, "loss": 0.7984, "step": 5000, "vit_lr": 1.9287438335340296e-06 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 9.640661397851293e-06, "loss": 0.8335, "step": 5020, "vit_lr": 1.9281322795702583e-06 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 9.637591051426006e-06, "loss": 0.8123, "step": 5040, "vit_lr": 1.9275182102852006e-06 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 9.6345081367152e-06, "loss": 0.7952, "step": 5060, "vit_lr": 1.9269016273430397e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.631412662073849e-06, "loss": 0.7999, "step": 5080, "vit_lr": 1.9262825324147694e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.628304635890966e-06, "loss": 0.829, "step": 5100, "vit_lr": 1.925660927178193e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.625184066589578e-06, "loss": 0.8079, "step": 5120, "vit_lr": 1.9250368133179154e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.622050962626709e-06, "loss": 0.8114, "step": 5140, "vit_lr": 1.9244101925253416e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.618905332493347e-06, "loss": 0.8108, "step": 5160, "vit_lr": 1.9237810664986693e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.615747184714432e-06, "loss": 0.8092, "step": 5180, "vit_lr": 1.923149436942886e-06 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 9.612576527848826e-06, "loss": 0.8083, "step": 5200, "vit_lr": 1.9225153055697653e-06 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 9.609393370489293e-06, "loss": 0.8026, "step": 5220, "vit_lr": 1.9218786740978583e-06 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 9.60619772126247e-06, "loss": 0.8201, "step": 5240, "vit_lr": 1.921239544252494e-06 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 9.602989588828854e-06, "loss": 0.8101, "step": 5260, "vit_lr": 1.9205979177657706e-06 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 9.599768981882773e-06, "loss": 0.8138, "step": 5280, "vit_lr": 1.9199537963765544e-06 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 9.596535909152354e-06, "loss": 0.8038, "step": 5300, "vit_lr": 1.9193071818304706e-06 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 9.593290379399514e-06, "loss": 0.8097, "step": 5320, "vit_lr": 1.9186580758799025e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.59003240141993e-06, "loss": 0.8061, "step": 5340, "vit_lr": 1.918006480283986e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.586761984043013e-06, "loss": 0.8131, "step": 5360, "vit_lr": 1.9173523968086024e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.583479136131884e-06, "loss": 0.7934, "step": 5380, "vit_lr": 1.9166958272263763e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.580183866583354e-06, "loss": 0.797, "step": 5400, "vit_lr": 1.9160367733166707e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.576876184327899e-06, "loss": 0.8014, "step": 5420, "vit_lr": 1.9153752368655794e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.573556098329631e-06, "loss": 0.7895, "step": 5440, "vit_lr": 1.9147112196659257e-06 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 9.57022361758628e-06, "loss": 0.8151, "step": 5460, "vit_lr": 1.914044723517256e-06 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 9.566878751129168e-06, "loss": 0.8092, "step": 5480, "vit_lr": 1.913375750225833e-06 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 9.563521508023179e-06, "loss": 0.8022, "step": 5500, "vit_lr": 1.9127043016046356e-06 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 9.560151897366743e-06, "loss": 0.7962, "step": 5520, "vit_lr": 1.9120303794733485e-06 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 9.556769928291804e-06, "loss": 0.8207, "step": 5540, "vit_lr": 1.911353985658361e-06 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 9.553375609963801e-06, "loss": 0.8118, "step": 5560, "vit_lr": 1.9106751219927604e-06 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 9.54996895158164e-06, "loss": 0.8147, "step": 5580, "vit_lr": 1.909993790316328e-06 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 9.546549962377669e-06, "loss": 0.8009, "step": 5600, "vit_lr": 1.9093099924755335e-06 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 9.543118651617653e-06, "loss": 0.7966, "step": 5620, "vit_lr": 1.9086237303235303e-06 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 9.53967502860075e-06, "loss": 0.8126, "step": 5640, "vit_lr": 1.9079350057201498e-06 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 9.536219102659486e-06, "loss": 0.7974, "step": 5660, "vit_lr": 1.907243820531897e-06 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 9.532750883159729e-06, "loss": 0.8273, "step": 5680, "vit_lr": 1.9065501766319456e-06 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 9.529270379500664e-06, "loss": 0.8004, "step": 5700, "vit_lr": 1.9058540759001324e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.525777601114764e-06, "loss": 0.8101, "step": 5720, "vit_lr": 1.9051555202229528e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.522272557467774e-06, "loss": 0.7881, "step": 5740, "vit_lr": 1.9044545114935544e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.518755258058672e-06, "loss": 0.8015, "step": 5760, "vit_lr": 1.903751051611734e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.515225712419652e-06, "loss": 0.8033, "step": 5780, "vit_lr": 1.9030451424839301e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.511683930116101e-06, "loss": 0.8107, "step": 5800, "vit_lr": 1.9023367860232197e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.508129920746563e-06, "loss": 0.8083, "step": 5820, "vit_lr": 1.9016259841493124e-06 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 9.50456369394272e-06, "loss": 0.7959, "step": 5840, "vit_lr": 1.900912738788544e-06 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 9.50098525936937e-06, "loss": 0.8038, "step": 5860, "vit_lr": 1.9001970518738735e-06 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 9.49739462672438e-06, "loss": 0.8067, "step": 5880, "vit_lr": 1.899478925344876e-06 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 9.493791805738694e-06, "loss": 0.7944, "step": 5900, "vit_lr": 1.8987583611477386e-06 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 9.490176806176274e-06, "loss": 0.8088, "step": 5920, "vit_lr": 1.8980353612352546e-06 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 9.486549637834092e-06, "loss": 0.807, "step": 5940, "vit_lr": 1.8973099275668183e-06 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 9.4829103105421e-06, "loss": 0.7834, "step": 5960, "vit_lr": 1.8965820621084197e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.479258834163196e-06, "loss": 0.8163, "step": 5980, "vit_lr": 1.8958517668326388e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.475595218593207e-06, "loss": 0.7919, "step": 6000, "vit_lr": 1.8951190437186412e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.471919473760862e-06, "loss": 0.8216, "step": 6020, "vit_lr": 1.8943838947521723e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.468231609627756e-06, "loss": 0.7898, "step": 6040, "vit_lr": 1.8936463219255507e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.464531636188326e-06, "loss": 0.7933, "step": 6060, "vit_lr": 1.8929063272376648e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.460819563469835e-06, "loss": 0.8089, "step": 6080, "vit_lr": 1.8921639126939665e-06 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 9.457095401532327e-06, "loss": 0.7982, "step": 6100, "vit_lr": 1.8914190803064654e-06 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 9.453359160468618e-06, "loss": 0.7816, "step": 6120, "vit_lr": 1.8906718320937234e-06 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 9.449610850404251e-06, "loss": 0.8067, "step": 6140, "vit_lr": 1.8899221700808498e-06 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 9.44585048149748e-06, "loss": 0.7882, "step": 6160, "vit_lr": 1.8891700962994958e-06 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 9.442078063939245e-06, "loss": 0.7962, "step": 6180, "vit_lr": 1.8884156127878487e-06 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 9.438293607953129e-06, "loss": 0.7973, "step": 6200, "vit_lr": 1.8876587215906257e-06 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 9.434497123795349e-06, "loss": 0.8017, "step": 6220, "vit_lr": 1.8868994247590695e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.430688621754713e-06, "loss": 0.7961, "step": 6240, "vit_lr": 1.8861377243509423e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.426868112152602e-06, "loss": 0.8006, "step": 6260, "vit_lr": 1.8853736224305204e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.423035605342941e-06, "loss": 0.7921, "step": 6280, "vit_lr": 1.8846071210685879e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.419191111712163e-06, "loss": 0.785, "step": 6300, "vit_lr": 1.8838382223424324e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.415334641679185e-06, "loss": 0.8109, "step": 6320, "vit_lr": 1.8830669283358368e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.411466205695392e-06, "loss": 0.8108, "step": 6340, "vit_lr": 1.8822932411390783e-06 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 9.407585814244587e-06, "loss": 0.7929, "step": 6360, "vit_lr": 1.881517162848917e-06 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 9.403693477842975e-06, "loss": 0.8104, "step": 6380, "vit_lr": 1.8807386955685947e-06 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 9.399789207039138e-06, "loss": 0.7944, "step": 6400, "vit_lr": 1.8799578414078274e-06 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 9.395873012413997e-06, "loss": 0.8208, "step": 6420, "vit_lr": 1.8791746024827992e-06 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 9.39194490458079e-06, "loss": 0.8024, "step": 6440, "vit_lr": 1.8783889809161578e-06 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 9.388004894185038e-06, "loss": 0.7998, "step": 6460, "vit_lr": 1.8776009788370074e-06 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 9.384052991904524e-06, "loss": 0.7947, "step": 6480, "vit_lr": 1.8768105983809047e-06 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 9.380089208449252e-06, "loss": 0.7877, "step": 6500, "vit_lr": 1.87601784168985e-06 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 9.37611355456143e-06, "loss": 0.7842, "step": 6520, "vit_lr": 1.8752227109122855e-06 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 9.372126041015435e-06, "loss": 0.7939, "step": 6540, "vit_lr": 1.8744252082030867e-06 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 9.368126678617783e-06, "loss": 0.7876, "step": 6560, "vit_lr": 1.8736253357235566e-06 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 9.364115478207106e-06, "loss": 0.7894, "step": 6580, "vit_lr": 1.8728230956414211e-06 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 9.360092450654112e-06, "loss": 0.7877, "step": 6600, "vit_lr": 1.8720184901308223e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.356057606861569e-06, "loss": 0.8084, "step": 6620, "vit_lr": 1.8712115213723135e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.352010957764257e-06, "loss": 0.808, "step": 6640, "vit_lr": 1.8704021915528511e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.347952514328961e-06, "loss": 0.7892, "step": 6660, "vit_lr": 1.869590502865792e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.343882287554424e-06, "loss": 0.8027, "step": 6680, "vit_lr": 1.8687764575108846e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.339800288471324e-06, "loss": 0.7994, "step": 6700, "vit_lr": 1.8679600576942644e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.335706528142242e-06, "loss": 0.7893, "step": 6720, "vit_lr": 1.867141305628448e-06 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 9.331601017661631e-06, "loss": 0.8013, "step": 6740, "vit_lr": 1.866320203532326e-06 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 9.327483768155794e-06, "loss": 0.7902, "step": 6760, "vit_lr": 1.8654967536311587e-06 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 9.323354790782847e-06, "loss": 0.7887, "step": 6780, "vit_lr": 1.864670958156569e-06 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 9.319214096732681e-06, "loss": 0.7999, "step": 6800, "vit_lr": 1.8638428193465358e-06 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 9.315061697226951e-06, "loss": 0.8224, "step": 6820, "vit_lr": 1.8630123394453898e-06 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 9.310897603519026e-06, "loss": 0.7879, "step": 6840, "vit_lr": 1.8621795207038052e-06 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 9.306721826893976e-06, "loss": 0.7864, "step": 6860, "vit_lr": 1.861344365378795e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.302534378668526e-06, "loss": 0.7923, "step": 6880, "vit_lr": 1.860506875733705e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.298335270191035e-06, "loss": 0.8017, "step": 6900, "vit_lr": 1.8596670540382066e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.294124512841458e-06, "loss": 0.7945, "step": 6920, "vit_lr": 1.8588249025682914e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.289902118031328e-06, "loss": 0.7991, "step": 6940, "vit_lr": 1.8579804236062656e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.285668097203711e-06, "loss": 0.7989, "step": 6960, "vit_lr": 1.8571336194407418e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.281422461833179e-06, "loss": 0.7832, "step": 6980, "vit_lr": 1.8562844923666354e-06 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 9.277165223425784e-06, "loss": 0.8116, "step": 7000, "vit_lr": 1.8554330446851565e-06 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.27289639351902e-06, "loss": 0.8006, "step": 7020, "vit_lr": 1.8545792787038037e-06 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.268615983681799e-06, "loss": 0.7925, "step": 7040, "vit_lr": 1.8537231967363596e-06 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.264324005514414e-06, "loss": 0.7863, "step": 7060, "vit_lr": 1.8528648011028825e-06 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.260020470648508e-06, "loss": 0.8054, "step": 7080, "vit_lr": 1.8520040941297013e-06 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.255705390747043e-06, "loss": 0.786, "step": 7100, "vit_lr": 1.8511410781494084e-06 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.251378777504272e-06, "loss": 0.7918, "step": 7120, "vit_lr": 1.8502757555008543e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.247040642645704e-06, "loss": 0.796, "step": 7140, "vit_lr": 1.8494081285291405e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.24269099792807e-06, "loss": 0.8021, "step": 7160, "vit_lr": 1.8485381995856137e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.238329855139293e-06, "loss": 0.7991, "step": 7180, "vit_lr": 1.8476659710278585e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.233957226098465e-06, "loss": 0.785, "step": 7200, "vit_lr": 1.8467914452196926e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.229573122655795e-06, "loss": 0.7868, "step": 7220, "vit_lr": 1.8459146245311588e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.225177556692599e-06, "loss": 0.7931, "step": 7240, "vit_lr": 1.8450355113385193e-06 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 9.22077054012125e-06, "loss": 0.8098, "step": 7260, "vit_lr": 1.8441541080242495e-06 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 9.216352084885155e-06, "loss": 0.7872, "step": 7280, "vit_lr": 1.8432704169770307e-06 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 9.211922202958726e-06, "loss": 0.7887, "step": 7300, "vit_lr": 1.8423844405917449e-06 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 9.207480906347336e-06, "loss": 0.7917, "step": 7320, "vit_lr": 1.8414961812694667e-06 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 9.203028207087292e-06, "loss": 0.7927, "step": 7340, "vit_lr": 1.8406056414174583e-06 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 9.198564117245811e-06, "loss": 0.7774, "step": 7360, "vit_lr": 1.839712823449162e-06 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 9.194088648920976e-06, "loss": 0.7925, "step": 7380, "vit_lr": 1.8388177297841949e-06 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 9.1896018142417e-06, "loss": 0.7821, "step": 7400, "vit_lr": 1.8379203628483397e-06 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 9.185103625367709e-06, "loss": 0.792, "step": 7420, "vit_lr": 1.8370207250735414e-06 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 9.180594094489495e-06, "loss": 0.8115, "step": 7440, "vit_lr": 1.8361188188978987e-06 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 9.176073233828288e-06, "loss": 0.7916, "step": 7460, "vit_lr": 1.8352146467656572e-06 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 9.171541055636025e-06, "loss": 0.7844, "step": 7480, "vit_lr": 1.8343082111272049e-06 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 9.166997572195314e-06, "loss": 0.7716, "step": 7500, "vit_lr": 1.8333995144390623e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.1624427958194e-06, "loss": 0.7802, "step": 7520, "vit_lr": 1.8324885591638796e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.157876738852132e-06, "loss": 0.799, "step": 7540, "vit_lr": 1.831575347770426e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.153299413667933e-06, "loss": 0.7742, "step": 7560, "vit_lr": 1.8306598827335865e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.148710832671763e-06, "loss": 0.7864, "step": 7580, "vit_lr": 1.829742166534352e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.144111008299084e-06, "loss": 0.7992, "step": 7600, "vit_lr": 1.8288222016598166e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.139499953015834e-06, "loss": 0.7896, "step": 7620, "vit_lr": 1.8278999906031665e-06 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 9.134877679318379e-06, "loss": 0.7933, "step": 7640, "vit_lr": 1.8269755358636756e-06 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 9.130244199733498e-06, "loss": 0.8013, "step": 7660, "vit_lr": 1.8260488399466993e-06 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 9.125599526818332e-06, "loss": 0.8095, "step": 7680, "vit_lr": 1.8251199053636662e-06 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 9.120943673160358e-06, "loss": 0.7714, "step": 7700, "vit_lr": 1.8241887346320715e-06 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 9.116276651377358e-06, "loss": 0.7869, "step": 7720, "vit_lr": 1.8232553302754713e-06 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 9.111598474117375e-06, "loss": 0.7914, "step": 7740, "vit_lr": 1.8223196948234746e-06 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 9.106909154058687e-06, "loss": 0.7881, "step": 7760, "vit_lr": 1.821381830811737e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.102208703909769e-06, "loss": 0.7719, "step": 7780, "vit_lr": 1.8204417407819537e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.097497136409261e-06, "loss": 0.7938, "step": 7800, "vit_lr": 1.819499427281852e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.092774464325932e-06, "loss": 0.7974, "step": 7820, "vit_lr": 1.8185548928651865e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.088040700458645e-06, "loss": 0.7757, "step": 7840, "vit_lr": 1.8176081400917289e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.08329585763632e-06, "loss": 0.7983, "step": 7860, "vit_lr": 1.816659171527264e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.078539948717909e-06, "loss": 0.7793, "step": 7880, "vit_lr": 1.8157079897435813e-06 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 9.073772986592345e-06, "loss": 0.7776, "step": 7900, "vit_lr": 1.8147545973184686e-06 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 9.068994984178521e-06, "loss": 0.7918, "step": 7920, "vit_lr": 1.813798996835704e-06 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 9.064205954425252e-06, "loss": 0.7852, "step": 7940, "vit_lr": 1.81284119088505e-06 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 9.059405910311233e-06, "loss": 0.79, "step": 7960, "vit_lr": 1.8118811820622466e-06 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 9.054594864845018e-06, "loss": 0.7743, "step": 7980, "vit_lr": 1.8109189729690033e-06 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 9.049772831064963e-06, "loss": 0.7931, "step": 8000, "vit_lr": 1.8099545662129923e-06 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 9.044939822039211e-06, "loss": 0.7992, "step": 8020, "vit_lr": 1.808987964407842e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.04009585086565e-06, "loss": 0.7822, "step": 8040, "vit_lr": 1.8080191701731297e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.035240930671871e-06, "loss": 0.7849, "step": 8060, "vit_lr": 1.807048186134374e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.030375074615145e-06, "loss": 0.7907, "step": 8080, "vit_lr": 1.806075014923029e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.025498295882373e-06, "loss": 0.7776, "step": 8100, "vit_lr": 1.8050996591764743e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.02061060769006e-06, "loss": 0.7793, "step": 8120, "vit_lr": 1.8041221215380115e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.015712023284277e-06, "loss": 0.7932, "step": 8140, "vit_lr": 1.8031424046568552e-06 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 9.01080255594063e-06, "loss": 0.7924, "step": 8160, "vit_lr": 1.8021605111881257e-06 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 9.005882218964206e-06, "loss": 0.7788, "step": 8180, "vit_lr": 1.801176443792841e-06 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 9.000951025689562e-06, "loss": 0.7827, "step": 8200, "vit_lr": 1.800190205137912e-06 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 8.996008989480672e-06, "loss": 0.7873, "step": 8220, "vit_lr": 1.799201797896134e-06 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 8.99105612373089e-06, "loss": 0.7741, "step": 8240, "vit_lr": 1.7982112247461778e-06 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 8.98609244186293e-06, "loss": 0.7853, "step": 8260, "vit_lr": 1.7972184883725857e-06 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 8.981117957328812e-06, "loss": 0.7764, "step": 8280, "vit_lr": 1.796223591465762e-06 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 8.976132683609829e-06, "loss": 0.7967, "step": 8300, "vit_lr": 1.7952265367219655e-06 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 8.97113663421652e-06, "loss": 0.7648, "step": 8320, "vit_lr": 1.7942273268433036e-06 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 8.966129822688622e-06, "loss": 0.7814, "step": 8340, "vit_lr": 1.7932259645377243e-06 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 8.961112262595048e-06, "loss": 0.7885, "step": 8360, "vit_lr": 1.7922224525190094e-06 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 8.956083967533827e-06, "loss": 0.7805, "step": 8380, "vit_lr": 1.791216793506765e-06 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 8.951044951132088e-06, "loss": 0.7638, "step": 8400, "vit_lr": 1.7902089902264174e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.945995227046018e-06, "loss": 0.7876, "step": 8420, "vit_lr": 1.7891990454092033e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.940934808960816e-06, "loss": 0.7978, "step": 8440, "vit_lr": 1.788186961792163e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.93586371059067e-06, "loss": 0.8009, "step": 8460, "vit_lr": 1.7871727421181338e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.930781945678706e-06, "loss": 0.792, "step": 8480, "vit_lr": 1.786156389135741e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.925689527996964e-06, "loss": 0.7795, "step": 8500, "vit_lr": 1.7851379055993925e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.920586471346346e-06, "loss": 0.8004, "step": 8520, "vit_lr": 1.784117294269269e-06 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 8.915472789556595e-06, "loss": 0.7928, "step": 8540, "vit_lr": 1.7830945579113185e-06 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 8.910348496486242e-06, "loss": 0.7832, "step": 8560, "vit_lr": 1.7820696992972481e-06 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 8.905213606022578e-06, "loss": 0.7728, "step": 8580, "vit_lr": 1.7810427212045153e-06 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 8.900068132081617e-06, "loss": 0.7717, "step": 8600, "vit_lr": 1.7800136264163232e-06 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 8.89491208860805e-06, "loss": 0.7797, "step": 8620, "vit_lr": 1.77898241772161e-06 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 8.889745489575217e-06, "loss": 0.7811, "step": 8640, "vit_lr": 1.7779490979150432e-06 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 8.88456834898506e-06, "loss": 0.7882, "step": 8660, "vit_lr": 1.7769136697970119e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.879380680868096e-06, "loss": 0.7902, "step": 8680, "vit_lr": 1.7758761361736187e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.874182499283362e-06, "loss": 0.772, "step": 8700, "vit_lr": 1.774836499856672e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.868973818318399e-06, "loss": 0.7794, "step": 8720, "vit_lr": 1.7737947636636793e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.863754652089194e-06, "loss": 0.7841, "step": 8740, "vit_lr": 1.7727509304178387e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.858525014740157e-06, "loss": 0.7838, "step": 8760, "vit_lr": 1.771705002948031e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.853284920444068e-06, "loss": 0.7755, "step": 8780, "vit_lr": 1.7706569840888132e-06 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 8.848034383402052e-06, "loss": 0.7917, "step": 8800, "vit_lr": 1.7696068766804103e-06 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 8.842773417843534e-06, "loss": 0.7726, "step": 8820, "vit_lr": 1.7685546835687063e-06 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 8.837502038026196e-06, "loss": 0.7841, "step": 8840, "vit_lr": 1.7675004076052388e-06 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 8.83222025823595e-06, "loss": 0.777, "step": 8860, "vit_lr": 1.7664440516471899e-06 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 8.826928092786891e-06, "loss": 0.7816, "step": 8880, "vit_lr": 1.7653856185573782e-06 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 8.82162555602126e-06, "loss": 0.778, "step": 8900, "vit_lr": 1.764325111204252e-06 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 8.816312662309404e-06, "loss": 0.7809, "step": 8920, "vit_lr": 1.7632625324618807e-06 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 8.810989426049738e-06, "loss": 0.7873, "step": 8940, "vit_lr": 1.7621978852099476e-06 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 8.805655861668709e-06, "loss": 0.7741, "step": 8960, "vit_lr": 1.7611311723337415e-06 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 8.80031198362075e-06, "loss": 0.7876, "step": 8980, "vit_lr": 1.76006239672415e-06 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 8.79495780638825e-06, "loss": 0.7729, "step": 9000, "vit_lr": 1.7589915612776496e-06 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 8.789593344481504e-06, "loss": 0.7809, "step": 9020, "vit_lr": 1.7579186688963005e-06 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 8.784218612438686e-06, "loss": 0.7859, "step": 9040, "vit_lr": 1.7568437224877369e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.778833624825795e-06, "loss": 0.7801, "step": 9060, "vit_lr": 1.7557667249651589e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.773438396236633e-06, "loss": 0.7858, "step": 9080, "vit_lr": 1.7546876792473262e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.768032941292746e-06, "loss": 0.7821, "step": 9100, "vit_lr": 1.753606588258549e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.762617274643402e-06, "loss": 0.7801, "step": 9120, "vit_lr": 1.7525234549286803e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.75719141096554e-06, "loss": 0.7952, "step": 9140, "vit_lr": 1.7514382821931077e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.751755364963736e-06, "loss": 0.7698, "step": 9160, "vit_lr": 1.750351072992747e-06 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 8.746309151370157e-06, "loss": 0.7748, "step": 9180, "vit_lr": 1.7492618302740312e-06 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 8.74085278494453e-06, "loss": 0.7949, "step": 9200, "vit_lr": 1.7481705569889056e-06 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 8.735386280474094e-06, "loss": 0.7882, "step": 9220, "vit_lr": 1.7470772560948186e-06 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 8.729909652773563e-06, "loss": 0.7799, "step": 9240, "vit_lr": 1.7459819305547123e-06 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 8.724422916685089e-06, "loss": 0.7736, "step": 9260, "vit_lr": 1.7448845833370176e-06 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 8.718926087078214e-06, "loss": 0.78, "step": 9280, "vit_lr": 1.7437852174156424e-06 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 8.713419178849839e-06, "loss": 0.782, "step": 9300, "vit_lr": 1.7426838357699673e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.707902206924173e-06, "loss": 0.7888, "step": 9320, "vit_lr": 1.7415804413848342e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.702375186252706e-06, "loss": 0.7824, "step": 9340, "vit_lr": 1.7404750372505408e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.696838131814154e-06, "loss": 0.7677, "step": 9360, "vit_lr": 1.7393676263628305e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.69129105861443e-06, "loss": 0.7832, "step": 9380, "vit_lr": 1.738258211722886e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.685733981686599e-06, "loss": 0.7726, "step": 9400, "vit_lr": 1.7371467963373196e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.680166916090833e-06, "loss": 0.7929, "step": 9420, "vit_lr": 1.7360333832181663e-06 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 8.674589876914377e-06, "loss": 0.7801, "step": 9440, "vit_lr": 1.7349179753828752e-06 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 8.669002879271504e-06, "loss": 0.783, "step": 9460, "vit_lr": 1.7338005758543006e-06 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 8.66340593830348e-06, "loss": 0.7865, "step": 9480, "vit_lr": 1.7326811876606954e-06 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 8.657799069178508e-06, "loss": 0.7783, "step": 9500, "vit_lr": 1.7315598138357012e-06 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 8.652182287091708e-06, "loss": 0.7907, "step": 9520, "vit_lr": 1.7304364574183415e-06 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 8.646555607265059e-06, "loss": 0.7678, "step": 9540, "vit_lr": 1.7293111214530115e-06 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 8.640919044947367e-06, "loss": 0.7785, "step": 9560, "vit_lr": 1.7281838089894731e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.635272615414216e-06, "loss": 0.7715, "step": 9580, "vit_lr": 1.727054523082843e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.629616333967937e-06, "loss": 0.7632, "step": 9600, "vit_lr": 1.725923266793587e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.623950215937556e-06, "loss": 0.7951, "step": 9620, "vit_lr": 1.7247900431875108e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.618274276678756e-06, "loss": 0.775, "step": 9640, "vit_lr": 1.723654855335751e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.612588531573842e-06, "loss": 0.7886, "step": 9660, "vit_lr": 1.7225177063147681e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.606892996031691e-06, "loss": 0.7825, "step": 9680, "vit_lr": 1.7213785992063378e-06 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 8.601187685487709e-06, "loss": 0.7774, "step": 9700, "vit_lr": 1.7202375370975417e-06 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 8.5954726154038e-06, "loss": 0.7738, "step": 9720, "vit_lr": 1.7190945230807596e-06 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 8.589747801268311e-06, "loss": 0.773, "step": 9740, "vit_lr": 1.717949560253662e-06 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 8.584013258596003e-06, "loss": 0.7812, "step": 9760, "vit_lr": 1.7168026517192002e-06 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 8.578269002927994e-06, "loss": 0.7744, "step": 9780, "vit_lr": 1.7156538005855984e-06 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 8.572515049831732e-06, "loss": 0.7498, "step": 9800, "vit_lr": 1.714503009966346e-06 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 8.56675141490094e-06, "loss": 0.7731, "step": 9820, "vit_lr": 1.713350282980188e-06 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 8.560978113755588e-06, "loss": 0.778, "step": 9840, "vit_lr": 1.7121956227511174e-06 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 8.555195162041833e-06, "loss": 0.7759, "step": 9860, "vit_lr": 1.7110390324083666e-06 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 8.549402575431994e-06, "loss": 0.7621, "step": 9880, "vit_lr": 1.7098805150863985e-06 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 8.543600369624493e-06, "loss": 0.7841, "step": 9900, "vit_lr": 1.7087200739248983e-06 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 8.537788560343828e-06, "loss": 0.76, "step": 9920, "vit_lr": 1.7075577120687654e-06 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 8.53196716334052e-06, "loss": 0.7851, "step": 9940, "vit_lr": 1.7063934326681038e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.526136194391074e-06, "loss": 0.7804, "step": 9960, "vit_lr": 1.7052272388782147e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.520295669297939e-06, "loss": 0.7672, "step": 9980, "vit_lr": 1.7040591338595875e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.514445603889455e-06, "loss": 0.795, "step": 10000, "vit_lr": 1.7028891207778909e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.508586014019824e-06, "loss": 0.771, "step": 10020, "vit_lr": 1.7017172028039646e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.502716915569054e-06, "loss": 0.7809, "step": 10040, "vit_lr": 1.7005433831138107e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.49683832444293e-06, "loss": 0.762, "step": 10060, "vit_lr": 1.6993676648885858e-06 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 8.490950256572955e-06, "loss": 0.7826, "step": 10080, "vit_lr": 1.698190051314591e-06 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 8.485052727916322e-06, "loss": 0.7856, "step": 10100, "vit_lr": 1.697010545583264e-06 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 8.479145754455852e-06, "loss": 0.7771, "step": 10120, "vit_lr": 1.6958291508911704e-06 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 8.47322935219998e-06, "loss": 0.7779, "step": 10140, "vit_lr": 1.6946458704399957e-06 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 8.467303537182679e-06, "loss": 0.7831, "step": 10160, "vit_lr": 1.6934607074365354e-06 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 8.461368325463436e-06, "loss": 0.7561, "step": 10180, "vit_lr": 1.6922736650926867e-06 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 8.455423733127204e-06, "loss": 0.7718, "step": 10200, "vit_lr": 1.6910847466254406e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.44946977628436e-06, "loss": 0.7739, "step": 10220, "vit_lr": 1.6898939552568717e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.443506471070658e-06, "loss": 0.7967, "step": 10240, "vit_lr": 1.6887012942141313e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.437533833647186e-06, "loss": 0.8039, "step": 10260, "vit_lr": 1.6875067667294372e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.431551880200328e-06, "loss": 0.793, "step": 10280, "vit_lr": 1.6863103760400655e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.425560626941707e-06, "loss": 0.7792, "step": 10300, "vit_lr": 1.6851121253883413e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.419560090108158e-06, "loss": 0.7996, "step": 10320, "vit_lr": 1.6839120180216314e-06 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 8.413550285961667e-06, "loss": 0.7836, "step": 10340, "vit_lr": 1.6827100571923332e-06 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 8.407531230789343e-06, "loss": 0.7793, "step": 10360, "vit_lr": 1.6815062461578684e-06 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 8.401502940903359e-06, "loss": 0.7772, "step": 10380, "vit_lr": 1.6803005881806715e-06 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 8.395465432640923e-06, "loss": 0.7664, "step": 10400, "vit_lr": 1.6790930865281842e-06 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 8.389418722364218e-06, "loss": 0.767, "step": 10420, "vit_lr": 1.6778837444728434e-06 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 8.383362826460366e-06, "loss": 0.7824, "step": 10440, "vit_lr": 1.676672565292073e-06 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 8.377297761341392e-06, "loss": 0.782, "step": 10460, "vit_lr": 1.6754595522682781e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.371223543444157e-06, "loss": 0.7944, "step": 10480, "vit_lr": 1.674244708688831e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.365140189230337e-06, "loss": 0.7773, "step": 10500, "vit_lr": 1.673028037846067e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.359047715186363e-06, "loss": 0.7748, "step": 10520, "vit_lr": 1.6718095430372724e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.352946137823386e-06, "loss": 0.7838, "step": 10540, "vit_lr": 1.670589227564677e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.346835473677221e-06, "loss": 0.7735, "step": 10560, "vit_lr": 1.669367094735444e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.340715739308316e-06, "loss": 0.765, "step": 10580, "vit_lr": 1.668143147861663e-06 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 8.334586951301697e-06, "loss": 0.7869, "step": 10600, "vit_lr": 1.666917390260339e-06 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 8.328449126266924e-06, "loss": 0.7639, "step": 10620, "vit_lr": 1.6656898252533846e-06 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 8.322302280838053e-06, "loss": 0.7662, "step": 10640, "vit_lr": 1.6644604561676103e-06 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 8.316146431673581e-06, "loss": 0.789, "step": 10660, "vit_lr": 1.6632292863347162e-06 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 8.309981595456413e-06, "loss": 0.7662, "step": 10680, "vit_lr": 1.6619963190912824e-06 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 8.303807788893801e-06, "loss": 0.7785, "step": 10700, "vit_lr": 1.66076155777876e-06 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 8.297625028717312e-06, "loss": 0.7774, "step": 10720, "vit_lr": 1.6595250057434621e-06 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 8.29143333168278e-06, "loss": 0.8019, "step": 10740, "vit_lr": 1.6582866663365556e-06 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 8.285232714570253e-06, "loss": 0.7733, "step": 10760, "vit_lr": 1.6570465429140502e-06 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 8.27902319418396e-06, "loss": 0.7703, "step": 10780, "vit_lr": 1.6558046388367917e-06 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 8.272804787352253e-06, "loss": 0.7801, "step": 10800, "vit_lr": 1.6545609574704505e-06 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 8.26657751092757e-06, "loss": 0.7776, "step": 10820, "vit_lr": 1.6533155021855138e-06 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 8.260341381786388e-06, "loss": 0.7748, "step": 10840, "vit_lr": 1.6520682763572774e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.254096416829169e-06, "loss": 0.7745, "step": 10860, "vit_lr": 1.6508192833658336e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.247842632980328e-06, "loss": 0.7738, "step": 10880, "vit_lr": 1.6495685265960654e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.241580047188176e-06, "loss": 0.7782, "step": 10900, "vit_lr": 1.6483160094376351e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.23530867642488e-06, "loss": 0.7697, "step": 10920, "vit_lr": 1.6470617352849758e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.229028537686415e-06, "loss": 0.7968, "step": 10940, "vit_lr": 1.6458057075372828e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.222739647992517e-06, "loss": 0.7769, "step": 10960, "vit_lr": 1.644547929598503e-06 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 8.216442024386637e-06, "loss": 0.7711, "step": 10980, "vit_lr": 1.6432884048773272e-06 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 8.210135683935896e-06, "loss": 0.7678, "step": 11000, "vit_lr": 1.642027136787179e-06 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 8.203820643731043e-06, "loss": 0.7736, "step": 11020, "vit_lr": 1.6407641287462082e-06 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 8.197496920886397e-06, "loss": 0.7603, "step": 11040, "vit_lr": 1.6394993841772792e-06 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 8.191164532539813e-06, "loss": 0.7576, "step": 11060, "vit_lr": 1.6382329065079622e-06 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 8.184823495852626e-06, "loss": 0.7771, "step": 11080, "vit_lr": 1.636964699170525e-06 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 8.178473828009615e-06, "loss": 0.757, "step": 11100, "vit_lr": 1.6356947656019226e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.172115546218942e-06, "loss": 0.7642, "step": 11120, "vit_lr": 1.6344231092437884e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.165748667712122e-06, "loss": 0.7657, "step": 11140, "vit_lr": 1.633149733542424e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.159373209743962e-06, "loss": 0.785, "step": 11160, "vit_lr": 1.6318746419487921e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.152989189592521e-06, "loss": 0.792, "step": 11180, "vit_lr": 1.630597837918504e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.146596624559064e-06, "loss": 0.7575, "step": 11200, "vit_lr": 1.6293193249118128e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.140195531968014e-06, "loss": 0.7706, "step": 11220, "vit_lr": 1.6280391063936026e-06 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 8.1337859291669e-06, "loss": 0.779, "step": 11240, "vit_lr": 1.6267571858333797e-06 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 8.12736783352632e-06, "loss": 0.7906, "step": 11260, "vit_lr": 1.625473566705264e-06 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 8.120941262439885e-06, "loss": 0.7749, "step": 11280, "vit_lr": 1.624188252487977e-06 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 8.114506233324176e-06, "loss": 0.7662, "step": 11300, "vit_lr": 1.6229012466648351e-06 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 8.108062763618697e-06, "loss": 0.7752, "step": 11320, "vit_lr": 1.621612552723739e-06 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 8.101610870785822e-06, "loss": 0.7769, "step": 11340, "vit_lr": 1.6203221741571644e-06 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 8.095150572310762e-06, "loss": 0.7817, "step": 11360, "vit_lr": 1.6190301144621522e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.0886818857015e-06, "loss": 0.7867, "step": 11380, "vit_lr": 1.6177363771402997e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.08220482848875e-06, "loss": 0.7898, "step": 11400, "vit_lr": 1.61644096569775e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.075719418225921e-06, "loss": 0.7711, "step": 11420, "vit_lr": 1.6151438836451838e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.069225672489048e-06, "loss": 0.7813, "step": 11440, "vit_lr": 1.6138451344978095e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.062723608876765e-06, "loss": 0.7757, "step": 11460, "vit_lr": 1.6125447217753527e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.056213245010243e-06, "loss": 0.7708, "step": 11480, "vit_lr": 1.6112426490020485e-06 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 8.04969459853315e-06, "loss": 0.776, "step": 11500, "vit_lr": 1.60993891970663e-06 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 8.0431676871116e-06, "loss": 0.7759, "step": 11520, "vit_lr": 1.6086335374223198e-06 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 8.036632528434102e-06, "loss": 0.7705, "step": 11540, "vit_lr": 1.6073265056868204e-06 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 8.030089140211523e-06, "loss": 0.7895, "step": 11560, "vit_lr": 1.6060178280423045e-06 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 8.023537540177027e-06, "loss": 0.7677, "step": 11580, "vit_lr": 1.6047075080354051e-06 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 8.016977746086034e-06, "loss": 0.7861, "step": 11600, "vit_lr": 1.6033955492172064e-06 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 8.010409775716171e-06, "loss": 0.7577, "step": 11620, "vit_lr": 1.602081955143234e-06 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 8.003833646867227e-06, "loss": 0.7774, "step": 11640, "vit_lr": 1.600766729373445e-06 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 7.997249377361094e-06, "loss": 0.7784, "step": 11660, "vit_lr": 1.5994498754722186e-06 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 7.990656985041732e-06, "loss": 0.7687, "step": 11680, "vit_lr": 1.598131397008346e-06 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 7.984056487775112e-06, "loss": 0.7634, "step": 11700, "vit_lr": 1.596811297555022e-06 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 7.977447903449171e-06, "loss": 0.7764, "step": 11720, "vit_lr": 1.5954895806898341e-06 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 7.970831249973761e-06, "loss": 0.7818, "step": 11740, "vit_lr": 1.5941662499947519e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.964206545280601e-06, "loss": 0.7594, "step": 11760, "vit_lr": 1.5928413090561202e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.957573807323239e-06, "loss": 0.7904, "step": 11780, "vit_lr": 1.5915147614646475e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.95093305407698e-06, "loss": 0.7709, "step": 11800, "vit_lr": 1.5901866108153956e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.94428430353886e-06, "loss": 0.7733, "step": 11820, "vit_lr": 1.5888568607077718e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.937627573727586e-06, "loss": 0.7893, "step": 11840, "vit_lr": 1.587525514745517e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.93096288268349e-06, "loss": 0.7817, "step": 11860, "vit_lr": 1.586192576536698e-06 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 7.92429024846848e-06, "loss": 0.7546, "step": 11880, "vit_lr": 1.5848580496936957e-06 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 7.917609689165989e-06, "loss": 0.7804, "step": 11900, "vit_lr": 1.5835219378331975e-06 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 7.910921222880928e-06, "loss": 0.7625, "step": 11920, "vit_lr": 1.5821842445761854e-06 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 7.904224867739639e-06, "loss": 0.7643, "step": 11940, "vit_lr": 1.5808449735479273e-06 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 7.89752064188984e-06, "loss": 0.7651, "step": 11960, "vit_lr": 1.5795041283779677e-06 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 7.890808563500583e-06, "loss": 0.7721, "step": 11980, "vit_lr": 1.5781617127001166e-06 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 7.8840886507622e-06, "loss": 0.7806, "step": 12000, "vit_lr": 1.5768177301524397e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.877360921886252e-06, "loss": 0.7664, "step": 12020, "vit_lr": 1.5754721843772504e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.870625395105485e-06, "loss": 0.7674, "step": 12040, "vit_lr": 1.574125079021097e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.863882088673779e-06, "loss": 0.7767, "step": 12060, "vit_lr": 1.5727764177347555e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.857131020866094e-06, "loss": 0.7956, "step": 12080, "vit_lr": 1.5714262041732185e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.850372209978428e-06, "loss": 0.7729, "step": 12100, "vit_lr": 1.5700744419956853e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.843605674327758e-06, "loss": 0.7797, "step": 12120, "vit_lr": 1.5687211348655515e-06 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 7.836831432252005e-06, "loss": 0.7657, "step": 12140, "vit_lr": 1.567366286450401e-06 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 7.830049502109966e-06, "loss": 0.7758, "step": 12160, "vit_lr": 1.5660099004219931e-06 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 7.823259902281276e-06, "loss": 0.7623, "step": 12180, "vit_lr": 1.5646519804562548e-06 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 7.816462651166356e-06, "loss": 0.7805, "step": 12200, "vit_lr": 1.5632925302332711e-06 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 7.809657767186366e-06, "loss": 0.7694, "step": 12220, "vit_lr": 1.5619315534372732e-06 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 7.802845268783148e-06, "loss": 0.7697, "step": 12240, "vit_lr": 1.5605690537566293e-06 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 7.79602517441918e-06, "loss": 0.7635, "step": 12260, "vit_lr": 1.5592050348838358e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.789197502577523e-06, "loss": 0.7783, "step": 12280, "vit_lr": 1.5578395005155045e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.782362271761783e-06, "loss": 0.7712, "step": 12300, "vit_lr": 1.5564724543523563e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.77551950049604e-06, "loss": 0.7877, "step": 12320, "vit_lr": 1.5551039000992076e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.768669207324816e-06, "loss": 0.7705, "step": 12340, "vit_lr": 1.553733841464963e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.76181141081302e-06, "loss": 0.7675, "step": 12360, "vit_lr": 1.5523622821626038e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.754946129545886e-06, "loss": 0.7621, "step": 12380, "vit_lr": 1.550989225909177e-06 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 7.748073382128944e-06, "loss": 0.7759, "step": 12400, "vit_lr": 1.5496146764257887e-06 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 7.741193187187955e-06, "loss": 0.7646, "step": 12420, "vit_lr": 1.5482386374375907e-06 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 7.734305563368852e-06, "loss": 0.7566, "step": 12440, "vit_lr": 1.5468611126737704e-06 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 7.727410529337718e-06, "loss": 0.7522, "step": 12460, "vit_lr": 1.5454821058675433e-06 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 7.720508103780708e-06, "loss": 0.7728, "step": 12480, "vit_lr": 1.5441016207561416e-06 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 7.71359830540401e-06, "loss": 0.7808, "step": 12500, "vit_lr": 1.542719661080802e-06 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 7.706681152933794e-06, "loss": 0.7706, "step": 12520, "vit_lr": 1.5413362305867587e-06 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 7.699756665116164e-06, "loss": 0.7642, "step": 12540, "vit_lr": 1.5399513330232327e-06 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 7.692824860717096e-06, "loss": 0.7542, "step": 12560, "vit_lr": 1.5385649721434192e-06 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 7.6858857585224e-06, "loss": 0.7623, "step": 12580, "vit_lr": 1.5371771517044797e-06 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 7.678939377337662e-06, "loss": 0.7701, "step": 12600, "vit_lr": 1.5357878754675323e-06 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 7.671985735988193e-06, "loss": 0.7611, "step": 12620, "vit_lr": 1.5343971471976384e-06 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 7.665024853318985e-06, "loss": 0.7744, "step": 12640, "vit_lr": 1.5330049706637966e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.658056748194646e-06, "loss": 0.7574, "step": 12660, "vit_lr": 1.531611349638929e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.651081439499369e-06, "loss": 0.7608, "step": 12680, "vit_lr": 1.5302162878998735e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.644098946136856e-06, "loss": 0.7921, "step": 12700, "vit_lr": 1.528819789227371e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.63710928703029e-06, "loss": 0.7641, "step": 12720, "vit_lr": 1.527421857406058e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.630112481122276e-06, "loss": 0.7867, "step": 12740, "vit_lr": 1.526022496224455e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.623108547374774e-06, "loss": 0.7825, "step": 12760, "vit_lr": 1.5246217094749545e-06 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 7.616097504769073e-06, "loss": 0.7585, "step": 12780, "vit_lr": 1.5232195009538145e-06 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 7.6090793723057245e-06, "loss": 0.772, "step": 12800, "vit_lr": 1.5218158744611446e-06 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 7.602054169004493e-06, "loss": 0.7624, "step": 12820, "vit_lr": 1.5204108338008986e-06 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 7.595021913904305e-06, "loss": 0.7556, "step": 12840, "vit_lr": 1.519004382780861e-06 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 7.5879826260632025e-06, "loss": 0.7526, "step": 12860, "vit_lr": 1.5175965252126403e-06 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 7.580936324558281e-06, "loss": 0.7163, "step": 12880, "vit_lr": 1.516187264911656e-06 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 7.573883028485646e-06, "loss": 0.7064, "step": 12900, "vit_lr": 1.514776605697129e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.566822756960362e-06, "loss": 0.6915, "step": 12920, "vit_lr": 1.5133645513920721e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.559755529116391e-06, "loss": 0.6964, "step": 12940, "vit_lr": 1.511951105823278e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.552681364106551e-06, "loss": 0.7055, "step": 12960, "vit_lr": 1.51053627282131e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.5456002811024654e-06, "loss": 0.6965, "step": 12980, "vit_lr": 1.5091200562204928e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.538512299294496e-06, "loss": 0.6988, "step": 13000, "vit_lr": 1.507702459858899e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.531417437891708e-06, "loss": 0.724, "step": 13020, "vit_lr": 1.5062834875783414e-06 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 7.524315716121808e-06, "loss": 0.7041, "step": 13040, "vit_lr": 1.5048631432243615e-06 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 7.517207153231097e-06, "loss": 0.7064, "step": 13060, "vit_lr": 1.503441430646219e-06 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 7.510091768484411e-06, "loss": 0.6994, "step": 13080, "vit_lr": 1.502018353696882e-06 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 7.502969581165083e-06, "loss": 0.6991, "step": 13100, "vit_lr": 1.5005939162330163e-06 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 7.495840610574872e-06, "loss": 0.6882, "step": 13120, "vit_lr": 1.499168122114974e-06 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 7.488704876033924e-06, "loss": 0.7095, "step": 13140, "vit_lr": 1.4977409752067846e-06 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 7.4815623968807195e-06, "loss": 0.7174, "step": 13160, "vit_lr": 1.4963124793761436e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.474413192472013e-06, "loss": 0.7211, "step": 13180, "vit_lr": 1.4948826384944025e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.467257282182785e-06, "loss": 0.7218, "step": 13200, "vit_lr": 1.4934514564365568e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.460094685406192e-06, "loss": 0.7039, "step": 13220, "vit_lr": 1.4920189370812381e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.45292542155351e-06, "loss": 0.6999, "step": 13240, "vit_lr": 1.4905850843107017e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.445749510054083e-06, "loss": 0.6943, "step": 13260, "vit_lr": 1.4891499020108164e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.438566970355273e-06, "loss": 0.7037, "step": 13280, "vit_lr": 1.4877133940710545e-06 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 7.431377821922402e-06, "loss": 0.6955, "step": 13300, "vit_lr": 1.4862755643844803e-06 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 7.424182084238705e-06, "loss": 0.7014, "step": 13320, "vit_lr": 1.4848364168477408e-06 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 7.416979776805271e-06, "loss": 0.7135, "step": 13340, "vit_lr": 1.483395955361054e-06 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 7.409770919140999e-06, "loss": 0.6949, "step": 13360, "vit_lr": 1.4819541838281995e-06 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 7.402555530782533e-06, "loss": 0.6978, "step": 13380, "vit_lr": 1.4805111061565066e-06 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 7.395333631284221e-06, "loss": 0.7036, "step": 13400, "vit_lr": 1.4790667262568438e-06 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 7.388105240218055e-06, "loss": 0.7174, "step": 13420, "vit_lr": 1.477621048043611e-06 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 7.380870377173619e-06, "loss": 0.7118, "step": 13440, "vit_lr": 1.4761740754347236e-06 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 7.373629061758037e-06, "loss": 0.7201, "step": 13460, "vit_lr": 1.4747258123516073e-06 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 7.366381313595923e-06, "loss": 0.7223, "step": 13480, "vit_lr": 1.4732762627191843e-06 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 7.359127152329317e-06, "loss": 0.7002, "step": 13500, "vit_lr": 1.4718254304658632e-06 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 7.351866597617646e-06, "loss": 0.7007, "step": 13520, "vit_lr": 1.470373319523529e-06 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 7.34459966913766e-06, "loss": 0.713, "step": 13540, "vit_lr": 1.4689199338275317e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.337326386583384e-06, "loss": 0.7001, "step": 13560, "vit_lr": 1.4674652773166765e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.330046769666062e-06, "loss": 0.7003, "step": 13580, "vit_lr": 1.4660093539332122e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.322760838114104e-06, "loss": 0.7002, "step": 13600, "vit_lr": 1.4645521676228205e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.315468611673037e-06, "loss": 0.7144, "step": 13620, "vit_lr": 1.4630937223346074e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.3081701101054446e-06, "loss": 0.7165, "step": 13640, "vit_lr": 1.4616340220210889e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.300865353190918e-06, "loss": 0.6985, "step": 13660, "vit_lr": 1.4601730706381834e-06 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 7.293554360725998e-06, "loss": 0.718, "step": 13680, "vit_lr": 1.4587108721451994e-06 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 7.2862371525241295e-06, "loss": 0.7178, "step": 13700, "vit_lr": 1.4572474305048256e-06 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 7.278913748415597e-06, "loss": 0.7059, "step": 13720, "vit_lr": 1.4557827496831193e-06 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 7.2715841682474794e-06, "loss": 0.7039, "step": 13740, "vit_lr": 1.4543168336494957e-06 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 7.264248431883595e-06, "loss": 0.6927, "step": 13760, "vit_lr": 1.4528496863767189e-06 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 7.2569065592044396e-06, "loss": 0.7195, "step": 13780, "vit_lr": 1.4513813118408878e-06 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 7.249558570107148e-06, "loss": 0.6938, "step": 13800, "vit_lr": 1.4499117140214295e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.242204484505424e-06, "loss": 0.7139, "step": 13820, "vit_lr": 1.4484408969010845e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.234844322329494e-06, "loss": 0.7184, "step": 13840, "vit_lr": 1.4469688644658986e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.227478103526058e-06, "loss": 0.703, "step": 13860, "vit_lr": 1.4454956207052114e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.220105848058222e-06, "loss": 0.711, "step": 13880, "vit_lr": 1.4440211696116443e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.21272757590546e-06, "loss": 0.7035, "step": 13900, "vit_lr": 1.4425455151810916e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.205343307063545e-06, "loss": 0.7008, "step": 13920, "vit_lr": 1.4410686614127087e-06 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 7.197953061544506e-06, "loss": 0.703, "step": 13940, "vit_lr": 1.439590612308901e-06 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 7.190556859376568e-06, "loss": 0.7136, "step": 13960, "vit_lr": 1.4381113718753134e-06 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 7.1831547206040965e-06, "loss": 0.7086, "step": 13980, "vit_lr": 1.436630944120819e-06 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 7.17574666528755e-06, "loss": 0.7204, "step": 14000, "vit_lr": 1.4351493330575097e-06 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 7.168332713503419e-06, "loss": 0.7039, "step": 14020, "vit_lr": 1.4336665427006835e-06 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 7.160912885344172e-06, "loss": 0.7152, "step": 14040, "vit_lr": 1.4321825770688344e-06 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 7.153487200918211e-06, "loss": 0.701, "step": 14060, "vit_lr": 1.430697440183642e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.146055680349797e-06, "loss": 0.7352, "step": 14080, "vit_lr": 1.4292111360699592e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.138618343779018e-06, "loss": 0.7114, "step": 14100, "vit_lr": 1.4277236687558035e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.131175211361717e-06, "loss": 0.7084, "step": 14120, "vit_lr": 1.4262350422723432e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.123726303269449e-06, "loss": 0.7027, "step": 14140, "vit_lr": 1.4247452606538897e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.116271639689418e-06, "loss": 0.7007, "step": 14160, "vit_lr": 1.4232543279378834e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.10881124082443e-06, "loss": 0.7102, "step": 14180, "vit_lr": 1.4217622481648857e-06 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 7.101345126892828e-06, "loss": 0.7136, "step": 14200, "vit_lr": 1.4202690253785654e-06 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 7.093873318128448e-06, "loss": 0.7229, "step": 14220, "vit_lr": 1.4187746636256894e-06 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 7.08639583478056e-06, "loss": 0.7107, "step": 14240, "vit_lr": 1.4172791669561117e-06 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 7.078912697113809e-06, "loss": 0.7138, "step": 14260, "vit_lr": 1.4157825394227617e-06 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 7.0714239254081674e-06, "loss": 0.7135, "step": 14280, "vit_lr": 1.4142847850816334e-06 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 7.063929539958873e-06, "loss": 0.7025, "step": 14300, "vit_lr": 1.4127859079917746e-06 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 7.056429561076381e-06, "loss": 0.703, "step": 14320, "vit_lr": 1.411285912215276e-06 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 7.048924009086302e-06, "loss": 0.711, "step": 14340, "vit_lr": 1.4097848018172602e-06 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 7.041412904329352e-06, "loss": 0.6939, "step": 14360, "vit_lr": 1.4082825808658702e-06 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 7.033896267161294e-06, "loss": 0.7111, "step": 14380, "vit_lr": 1.4067792534322587e-06 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 7.026374117952887e-06, "loss": 0.706, "step": 14400, "vit_lr": 1.4052748235905772e-06 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 7.018846477089825e-06, "loss": 0.7176, "step": 14420, "vit_lr": 1.4037692954179647e-06 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 7.011313364972688e-06, "loss": 0.6884, "step": 14440, "vit_lr": 1.4022626729945372e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 7.00377480201688e-06, "loss": 0.6953, "step": 14460, "vit_lr": 1.400754960403376e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 6.9962308086525796e-06, "loss": 0.7144, "step": 14480, "vit_lr": 1.3992461617305159e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 6.988681405324685e-06, "loss": 0.7063, "step": 14500, "vit_lr": 1.397736281064937e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 6.981126612492748e-06, "loss": 0.7007, "step": 14520, "vit_lr": 1.3962253224985494e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 6.973566450630934e-06, "loss": 0.7065, "step": 14540, "vit_lr": 1.3947132901261866e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 6.966000940227957e-06, "loss": 0.7113, "step": 14560, "vit_lr": 1.3932001880455912e-06 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 6.958430101787022e-06, "loss": 0.708, "step": 14580, "vit_lr": 1.3916860203574043e-06 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 6.950853955825781e-06, "loss": 0.71, "step": 14600, "vit_lr": 1.3901707911651558e-06 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 6.943272522876261e-06, "loss": 0.6974, "step": 14620, "vit_lr": 1.3886545045752521e-06 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 6.935685823484824e-06, "loss": 0.7116, "step": 14640, "vit_lr": 1.3871371646969647e-06 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 6.928093878212104e-06, "loss": 0.7076, "step": 14660, "vit_lr": 1.3856187756424204e-06 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 6.9204967076329455e-06, "loss": 0.7005, "step": 14680, "vit_lr": 1.384099341526589e-06 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 6.9128943323363615e-06, "loss": 0.7063, "step": 14700, "vit_lr": 1.3825788664672722e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.905286772925467e-06, "loss": 0.71, "step": 14720, "vit_lr": 1.3810573545850932e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.897674050017427e-06, "loss": 0.687, "step": 14740, "vit_lr": 1.3795348100034853e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.890056184243399e-06, "loss": 0.7231, "step": 14760, "vit_lr": 1.3780112368486797e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.882433196248481e-06, "loss": 0.711, "step": 14780, "vit_lr": 1.3764866392496958e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.8748051066916485e-06, "loss": 0.6934, "step": 14800, "vit_lr": 1.3749610213383297e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.867171936245705e-06, "loss": 0.7065, "step": 14820, "vit_lr": 1.3734343872491407e-06 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 6.859533705597223e-06, "loss": 0.7149, "step": 14840, "vit_lr": 1.3719067411194445e-06 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 6.851890435446491e-06, "loss": 0.7111, "step": 14860, "vit_lr": 1.370378087089298e-06 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 6.844242146507451e-06, "loss": 0.7092, "step": 14880, "vit_lr": 1.36884842930149e-06 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 6.83658885950765e-06, "loss": 0.7071, "step": 14900, "vit_lr": 1.3673177719015298e-06 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 6.828930595188177e-06, "loss": 0.7064, "step": 14920, "vit_lr": 1.3657861190376352e-06 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 6.821267374303614e-06, "loss": 0.7172, "step": 14940, "vit_lr": 1.3642534748607227e-06 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 6.813599217621969e-06, "loss": 0.7023, "step": 14960, "vit_lr": 1.3627198435243936e-06 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 6.805926145924636e-06, "loss": 0.7184, "step": 14980, "vit_lr": 1.361185229184927e-06 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 6.798248180006322e-06, "loss": 0.7071, "step": 15000, "vit_lr": 1.3596496360012641e-06 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 6.790565340674997e-06, "loss": 0.7057, "step": 15020, "vit_lr": 1.3581130681349992e-06 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 6.782877648751844e-06, "loss": 0.6992, "step": 15040, "vit_lr": 1.3565755297503686e-06 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 6.775185125071193e-06, "loss": 0.6923, "step": 15060, "vit_lr": 1.3550370250142385e-06 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 6.76748779048047e-06, "loss": 0.702, "step": 15080, "vit_lr": 1.3534975580960938e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.759785665840138e-06, "loss": 0.6971, "step": 15100, "vit_lr": 1.3519571331680276e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.752078772023642e-06, "loss": 0.7254, "step": 15120, "vit_lr": 1.3504157544047282e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.744367129917353e-06, "loss": 0.6953, "step": 15140, "vit_lr": 1.3488734259834704e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.7366507604205065e-06, "loss": 0.7088, "step": 15160, "vit_lr": 1.3473301520841012e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.728929684445155e-06, "loss": 0.7034, "step": 15180, "vit_lr": 1.345785936889031e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.721203922916102e-06, "loss": 0.7057, "step": 15200, "vit_lr": 1.3442407845832202e-06 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 6.7134734967708506e-06, "loss": 0.7046, "step": 15220, "vit_lr": 1.34269469935417e-06 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 6.705738426959547e-06, "loss": 0.7071, "step": 15240, "vit_lr": 1.3411476853919093e-06 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 6.697998734444917e-06, "loss": 0.7048, "step": 15260, "vit_lr": 1.3395997468889833e-06 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 6.690254440202221e-06, "loss": 0.7112, "step": 15280, "vit_lr": 1.338050888040444e-06 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 6.682505565219185e-06, "loss": 0.7021, "step": 15300, "vit_lr": 1.3365011130438369e-06 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 6.674752130495954e-06, "loss": 0.7014, "step": 15320, "vit_lr": 1.3349504260991906e-06 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 6.666994157045027e-06, "loss": 0.7038, "step": 15340, "vit_lr": 1.3333988314090052e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.659231665891204e-06, "loss": 0.7062, "step": 15360, "vit_lr": 1.3318463331782407e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.6514646780715274e-06, "loss": 0.7051, "step": 15380, "vit_lr": 1.3302929356143052e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.643693214635227e-06, "loss": 0.7054, "step": 15400, "vit_lr": 1.3287386429270452e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.635917296643666e-06, "loss": 0.7016, "step": 15420, "vit_lr": 1.327183459328733e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.628136945170274e-06, "loss": 0.7062, "step": 15440, "vit_lr": 1.3256273890340545e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.620352181300494e-06, "loss": 0.6992, "step": 15460, "vit_lr": 1.3240704362600985e-06 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 6.612563026131735e-06, "loss": 0.7037, "step": 15480, "vit_lr": 1.3225126052263468e-06 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 6.604769500773302e-06, "loss": 0.7057, "step": 15500, "vit_lr": 1.3209539001546602e-06 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 6.596971626346342e-06, "loss": 0.711, "step": 15520, "vit_lr": 1.3193943252692683e-06 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 6.5891694239837924e-06, "loss": 0.7138, "step": 15540, "vit_lr": 1.3178338847967584e-06 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 6.581362914830316e-06, "loss": 0.7032, "step": 15560, "vit_lr": 1.316272582966063e-06 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 6.57355212004225e-06, "loss": 0.7059, "step": 15580, "vit_lr": 1.3147104240084498e-06 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 6.565737060787545e-06, "loss": 0.7056, "step": 15600, "vit_lr": 1.3131474121575088e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.557917758245711e-06, "loss": 0.7125, "step": 15620, "vit_lr": 1.311583551649142e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.550094233607751e-06, "loss": 0.7026, "step": 15640, "vit_lr": 1.31001884672155e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.542266508076119e-06, "loss": 0.7034, "step": 15660, "vit_lr": 1.3084533016152235e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.5344346028646474e-06, "loss": 0.7011, "step": 15680, "vit_lr": 1.3068869205729294e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.526598539198497e-06, "loss": 0.6924, "step": 15700, "vit_lr": 1.3053197078396994e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.518758338314103e-06, "loss": 0.7012, "step": 15720, "vit_lr": 1.3037516676628205e-06 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 6.510914021459107e-06, "loss": 0.7058, "step": 15740, "vit_lr": 1.3021828042918212e-06 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 6.503065609892308e-06, "loss": 0.6878, "step": 15760, "vit_lr": 1.3006131219784615e-06 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 6.495213124883602e-06, "loss": 0.7026, "step": 15780, "vit_lr": 1.29904262497672e-06 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 6.487356587713922e-06, "loss": 0.7127, "step": 15800, "vit_lr": 1.2974713175427843e-06 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 6.479496019675187e-06, "loss": 0.7124, "step": 15820, "vit_lr": 1.2958992039350375e-06 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 6.471631442070236e-06, "loss": 0.7083, "step": 15840, "vit_lr": 1.2943262884140472e-06 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 6.463762876212779e-06, "loss": 0.713, "step": 15860, "vit_lr": 1.2927525752425557e-06 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 6.455890343427329e-06, "loss": 0.7151, "step": 15880, "vit_lr": 1.2911780686854657e-06 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 6.448013865049152e-06, "loss": 0.7038, "step": 15900, "vit_lr": 1.2896027730098301e-06 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 6.440133462424208e-06, "loss": 0.7164, "step": 15920, "vit_lr": 1.2880266924848413e-06 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 6.432249156909091e-06, "loss": 0.7172, "step": 15940, "vit_lr": 1.286449831381818e-06 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 6.424360969870972e-06, "loss": 0.7033, "step": 15960, "vit_lr": 1.2848721939741942e-06 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 6.416468922687542e-06, "loss": 0.7157, "step": 15980, "vit_lr": 1.2832937845375082e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.408573036746954e-06, "loss": 0.7, "step": 16000, "vit_lr": 1.2817146073493908e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.400673333447762e-06, "loss": 0.7031, "step": 16020, "vit_lr": 1.2801346666895524e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.392769834198866e-06, "loss": 0.7101, "step": 16040, "vit_lr": 1.278553966839773e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.384862560419458e-06, "loss": 0.7049, "step": 16060, "vit_lr": 1.2769725120838915e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.376951533538951e-06, "loss": 0.7061, "step": 16080, "vit_lr": 1.27539030670779e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.369036774996935e-06, "loss": 0.7047, "step": 16100, "vit_lr": 1.2738073549993868e-06 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 6.361118306243114e-06, "loss": 0.7124, "step": 16120, "vit_lr": 1.2722236612486226e-06 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 6.353196148737241e-06, "loss": 0.7039, "step": 16140, "vit_lr": 1.270639229747448e-06 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 6.345270323949073e-06, "loss": 0.7106, "step": 16160, "vit_lr": 1.2690540647898145e-06 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 6.3373408533583e-06, "loss": 0.6979, "step": 16180, "vit_lr": 1.26746817067166e-06 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 6.329407758454498e-06, "loss": 0.7117, "step": 16200, "vit_lr": 1.2658815516908995e-06 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 6.321471060737058e-06, "loss": 0.7131, "step": 16220, "vit_lr": 1.2642942121474113e-06 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 6.313530781715142e-06, "loss": 0.7029, "step": 16240, "vit_lr": 1.2627061563430281e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.305586942907611e-06, "loss": 0.7086, "step": 16260, "vit_lr": 1.261117388581522e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.2976395658429815e-06, "loss": 0.694, "step": 16280, "vit_lr": 1.259527913168596e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.289688672059351e-06, "loss": 0.7207, "step": 16300, "vit_lr": 1.25793773441187e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.281734283104352e-06, "loss": 0.7274, "step": 16320, "vit_lr": 1.2563468566208704e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.273776420535089e-06, "loss": 0.7144, "step": 16340, "vit_lr": 1.2547552841070178e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.26581510591808e-06, "loss": 0.6983, "step": 16360, "vit_lr": 1.2531630211836157e-06 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 6.257850360829196e-06, "loss": 0.7176, "step": 16380, "vit_lr": 1.2515700721658392e-06 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 6.249882206853608e-06, "loss": 0.7054, "step": 16400, "vit_lr": 1.2499764413707214e-06 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 6.241910665585723e-06, "loss": 0.7021, "step": 16420, "vit_lr": 1.2483821331171445e-06 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 6.233935758629131e-06, "loss": 0.6787, "step": 16440, "vit_lr": 1.246787151725826e-06 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 6.22595750759654e-06, "loss": 0.7114, "step": 16460, "vit_lr": 1.245191501519308e-06 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 6.2179759341097235e-06, "loss": 0.7064, "step": 16480, "vit_lr": 1.2435951868219446e-06 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 6.2099910597994565e-06, "loss": 0.6932, "step": 16500, "vit_lr": 1.241998211959891e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.202002906305461e-06, "loss": 0.6969, "step": 16520, "vit_lr": 1.240400581261092e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.194011495276344e-06, "loss": 0.6947, "step": 16540, "vit_lr": 1.2388022990552688e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.1860168483695485e-06, "loss": 0.712, "step": 16560, "vit_lr": 1.2372033696739094e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.178018987251272e-06, "loss": 0.6835, "step": 16580, "vit_lr": 1.2356037974502543e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.170017933596438e-06, "loss": 0.6922, "step": 16600, "vit_lr": 1.2340035867192875e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.162013709088614e-06, "loss": 0.7053, "step": 16620, "vit_lr": 1.2324027418177228e-06 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 6.1540063354199645e-06, "loss": 0.7205, "step": 16640, "vit_lr": 1.2308012670839926e-06 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 6.145995834291182e-06, "loss": 0.7259, "step": 16660, "vit_lr": 1.2291991668582362e-06 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 6.137982227411443e-06, "loss": 0.7147, "step": 16680, "vit_lr": 1.2275964454822885e-06 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 6.129965536498337e-06, "loss": 0.7008, "step": 16700, "vit_lr": 1.2259931072996672e-06 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 6.121945783277809e-06, "loss": 0.7009, "step": 16720, "vit_lr": 1.2243891566555617e-06 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 6.113922989484108e-06, "loss": 0.7013, "step": 16740, "vit_lr": 1.2227845978968214e-06 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 6.105897176859721e-06, "loss": 0.6888, "step": 16760, "vit_lr": 1.221179435371944e-06 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 6.097868367155315e-06, "loss": 0.707, "step": 16780, "vit_lr": 1.219573673431063e-06 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 6.089836582129681e-06, "loss": 0.7118, "step": 16800, "vit_lr": 1.217967316425936e-06 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 6.081801843549675e-06, "loss": 0.6979, "step": 16820, "vit_lr": 1.2163603687099349e-06 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 6.073764173190153e-06, "loss": 0.708, "step": 16840, "vit_lr": 1.2147528346380305e-06 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 6.0657235928339194e-06, "loss": 0.7097, "step": 16860, "vit_lr": 1.2131447185667837e-06 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 6.057680124271665e-06, "loss": 0.7111, "step": 16880, "vit_lr": 1.2115360248543327e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.049633789301906e-06, "loss": 0.7033, "step": 16900, "vit_lr": 1.2099267578603812e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.0415846097309296e-06, "loss": 0.6813, "step": 16920, "vit_lr": 1.2083169219461858e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.033532607372729e-06, "loss": 0.7021, "step": 16940, "vit_lr": 1.2067065214745458e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.025477804048951e-06, "loss": 0.7047, "step": 16960, "vit_lr": 1.20509556080979e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.017420221588829e-06, "loss": 0.7121, "step": 16980, "vit_lr": 1.2034840443177656e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.0093598818291335e-06, "loss": 0.7057, "step": 17000, "vit_lr": 1.2018719763658264e-06 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 6.001296806614099e-06, "loss": 0.7189, "step": 17020, "vit_lr": 1.2002593613228195e-06 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 5.993231017795385e-06, "loss": 0.6914, "step": 17040, "vit_lr": 1.1986462035590767e-06 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 5.985162537231995e-06, "loss": 0.7036, "step": 17060, "vit_lr": 1.1970325074463989e-06 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 5.9770913867902316e-06, "loss": 0.7118, "step": 17080, "vit_lr": 1.195418277358046e-06 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 5.969017588343633e-06, "loss": 0.7046, "step": 17100, "vit_lr": 1.1938035176687264e-06 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 5.960941163772915e-06, "loss": 0.6758, "step": 17120, "vit_lr": 1.192188232754583e-06 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 5.952862134965907e-06, "loss": 0.6899, "step": 17140, "vit_lr": 1.190572426993181e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.944780523817497e-06, "loss": 0.7004, "step": 17160, "vit_lr": 1.1889561047634994e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.936696352229573e-06, "loss": 0.71, "step": 17180, "vit_lr": 1.1873392704459143e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.928609642110964e-06, "loss": 0.7082, "step": 17200, "vit_lr": 1.1857219284221926e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.920520415377371e-06, "loss": 0.7181, "step": 17220, "vit_lr": 1.1841040830754741e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.912428693951324e-06, "loss": 0.7054, "step": 17240, "vit_lr": 1.1824857387902646e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.90433449976211e-06, "loss": 0.7197, "step": 17260, "vit_lr": 1.180866899952422e-06 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 5.896237854745714e-06, "loss": 0.7067, "step": 17280, "vit_lr": 1.1792475709491428e-06 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 5.88813878084477e-06, "loss": 0.7281, "step": 17300, "vit_lr": 1.177627756168954e-06 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 5.88003730000849e-06, "loss": 0.6952, "step": 17320, "vit_lr": 1.1760074600016979e-06 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 5.871933434192608e-06, "loss": 0.7071, "step": 17340, "vit_lr": 1.1743866868385214e-06 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 5.863827205359326e-06, "loss": 0.714, "step": 17360, "vit_lr": 1.172765441071865e-06 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 5.855718635477245e-06, "loss": 0.7047, "step": 17380, "vit_lr": 1.171143727095449e-06 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 5.8476077465213145e-06, "loss": 0.7054, "step": 17400, "vit_lr": 1.1695215493042628e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.839494560472766e-06, "loss": 0.6912, "step": 17420, "vit_lr": 1.167898912094553e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.83137909931906e-06, "loss": 0.717, "step": 17440, "vit_lr": 1.1662758198638118e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.823261385053817e-06, "loss": 0.7038, "step": 17460, "vit_lr": 1.164652277010763e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.815141439676769e-06, "loss": 0.7048, "step": 17480, "vit_lr": 1.1630282879353536e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.807019285193694e-06, "loss": 0.7088, "step": 17500, "vit_lr": 1.1614038570387385e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.798894943616349e-06, "loss": 0.6984, "step": 17520, "vit_lr": 1.1597789887232697e-06 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 5.790768436962431e-06, "loss": 0.6998, "step": 17540, "vit_lr": 1.158153687392486e-06 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 5.782639787255496e-06, "loss": 0.6925, "step": 17560, "vit_lr": 1.1565279574510992e-06 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 5.77450901652491e-06, "loss": 0.7088, "step": 17580, "vit_lr": 1.1549018033049819e-06 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 5.766376146805787e-06, "loss": 0.7037, "step": 17600, "vit_lr": 1.1532752293611571e-06 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 5.7582412001389275e-06, "loss": 0.7014, "step": 17620, "vit_lr": 1.1516482400277854e-06 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 5.750104198570768e-06, "loss": 0.7081, "step": 17640, "vit_lr": 1.1500208397141533e-06 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 5.7419651641533025e-06, "loss": 0.704, "step": 17660, "vit_lr": 1.1483930328306605e-06 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 5.733824118944045e-06, "loss": 0.6999, "step": 17680, "vit_lr": 1.1467648237888087e-06 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 5.725681085005954e-06, "loss": 0.6977, "step": 17700, "vit_lr": 1.1451362170011908e-06 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 5.7175360844073745e-06, "loss": 0.6977, "step": 17720, "vit_lr": 1.1435072168814749e-06 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 5.709389139221988e-06, "loss": 0.7032, "step": 17740, "vit_lr": 1.1418778278443974e-06 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 5.7012402715287415e-06, "loss": 0.6936, "step": 17760, "vit_lr": 1.1402480543057481e-06 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 5.693089503411795e-06, "loss": 0.7144, "step": 17780, "vit_lr": 1.1386179006823589e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.684936856960457e-06, "loss": 0.7023, "step": 17800, "vit_lr": 1.1369873713920912e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.676782354269125e-06, "loss": 0.6999, "step": 17820, "vit_lr": 1.1353564708538247e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.668626017437228e-06, "loss": 0.7081, "step": 17840, "vit_lr": 1.1337252034874455e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.660467868569171e-06, "loss": 0.702, "step": 17860, "vit_lr": 1.132093573713834e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.652307929774262e-06, "loss": 0.6937, "step": 17880, "vit_lr": 1.1304615859548523e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.644146223166664e-06, "loss": 0.7135, "step": 17900, "vit_lr": 1.1288292446333328e-06 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 5.63598277086533e-06, "loss": 0.71, "step": 17920, "vit_lr": 1.1271965541730657e-06 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 5.627817594993944e-06, "loss": 0.6867, "step": 17940, "vit_lr": 1.1255635189987887e-06 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 5.61965071768086e-06, "loss": 0.6986, "step": 17960, "vit_lr": 1.123930143536172e-06 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 5.611482161059046e-06, "loss": 0.7042, "step": 17980, "vit_lr": 1.122296432211809e-06 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 5.6033119472660205e-06, "loss": 0.7117, "step": 18000, "vit_lr": 1.1206623894532039e-06 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 5.595140098443787e-06, "loss": 0.6981, "step": 18020, "vit_lr": 1.1190280196887573e-06 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 5.586966636738791e-06, "loss": 0.6957, "step": 18040, "vit_lr": 1.117393327347758e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.578791584301837e-06, "loss": 0.7047, "step": 18060, "vit_lr": 1.1157583168603673e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.570614963288051e-06, "loss": 0.7166, "step": 18080, "vit_lr": 1.1141229926576101e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.562436795856803e-06, "loss": 0.688, "step": 18100, "vit_lr": 1.1124873591713604e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.554257104171658e-06, "loss": 0.6956, "step": 18120, "vit_lr": 1.1108514208343315e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.546075910400309e-06, "loss": 0.6987, "step": 18140, "vit_lr": 1.1092151820800617e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.537893236714521e-06, "loss": 0.7087, "step": 18160, "vit_lr": 1.107578647342904e-06 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 5.529709105290071e-06, "loss": 0.6938, "step": 18180, "vit_lr": 1.105941821058014e-06 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 5.521523538306684e-06, "loss": 0.6985, "step": 18200, "vit_lr": 1.1043047076613368e-06 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 5.513336557947978e-06, "loss": 0.6817, "step": 18220, "vit_lr": 1.1026673115895954e-06 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 5.5051481864014e-06, "loss": 0.7059, "step": 18240, "vit_lr": 1.1010296372802798e-06 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 5.496958445858166e-06, "loss": 0.7148, "step": 18260, "vit_lr": 1.099391689171633e-06 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 5.4887673585132065e-06, "loss": 0.7054, "step": 18280, "vit_lr": 1.0977534717026411e-06 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 5.480574946565094e-06, "loss": 0.6813, "step": 18300, "vit_lr": 1.0961149893130185e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.472381232216003e-06, "loss": 0.692, "step": 18320, "vit_lr": 1.0944762464432005e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.464186237671625e-06, "loss": 0.7027, "step": 18340, "vit_lr": 1.0928372475343247e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.455989985141125e-06, "loss": 0.702, "step": 18360, "vit_lr": 1.0911979970282251e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.447792496837085e-06, "loss": 0.7107, "step": 18380, "vit_lr": 1.0895584993674169e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.439593794975425e-06, "loss": 0.7043, "step": 18400, "vit_lr": 1.0879187589950848e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.43139390177536e-06, "loss": 0.6997, "step": 18420, "vit_lr": 1.086278780355072e-06 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 5.4231928394593315e-06, "loss": 0.6831, "step": 18440, "vit_lr": 1.0846385678918662e-06 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 5.414990630252951e-06, "loss": 0.6976, "step": 18460, "vit_lr": 1.0829981260505901e-06 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 5.406787296384937e-06, "loss": 0.7113, "step": 18480, "vit_lr": 1.0813574592769873e-06 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 5.398582860087054e-06, "loss": 0.7054, "step": 18500, "vit_lr": 1.0797165720174105e-06 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 5.390377343594061e-06, "loss": 0.707, "step": 18520, "vit_lr": 1.0780754687188121e-06 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 5.382170769143636e-06, "loss": 0.7173, "step": 18540, "vit_lr": 1.076434153828727e-06 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 5.3739631589763276e-06, "loss": 0.7146, "step": 18560, "vit_lr": 1.0747926317952654e-06 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 5.365754535335496e-06, "loss": 0.6981, "step": 18580, "vit_lr": 1.073150907067099e-06 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 5.357544920467237e-06, "loss": 0.7129, "step": 18600, "vit_lr": 1.0715089840934472e-06 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 5.3493343366203456e-06, "loss": 0.7067, "step": 18620, "vit_lr": 1.069866867324069e-06 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 5.341122806046234e-06, "loss": 0.6949, "step": 18640, "vit_lr": 1.0682245612092466e-06 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 5.332910350998882e-06, "loss": 0.7046, "step": 18660, "vit_lr": 1.0665820701997764e-06 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 5.324696993734777e-06, "loss": 0.6894, "step": 18680, "vit_lr": 1.0649393987469553e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.31648275651285e-06, "loss": 0.6898, "step": 18700, "vit_lr": 1.0632965513025698e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.308267661594414e-06, "loss": 0.7093, "step": 18720, "vit_lr": 1.0616535323188826e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.300051731243111e-06, "loss": 0.7039, "step": 18740, "vit_lr": 1.060010346248622e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.291834987724848e-06, "loss": 0.7141, "step": 18760, "vit_lr": 1.0583669975449697e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.28361745330773e-06, "loss": 0.6995, "step": 18780, "vit_lr": 1.056723490661546e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.275399150262008e-06, "loss": 0.6921, "step": 18800, "vit_lr": 1.0550798300524012e-06 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 5.2671801008600175e-06, "loss": 0.6981, "step": 18820, "vit_lr": 1.0534360201720033e-06 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 5.2589603273761145e-06, "loss": 0.6954, "step": 18840, "vit_lr": 1.051792065475223e-06 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 5.2507398520866215e-06, "loss": 0.7016, "step": 18860, "vit_lr": 1.050147970417324e-06 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 5.242518697269757e-06, "loss": 0.7007, "step": 18880, "vit_lr": 1.0485037394539513e-06 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 5.234296885205586e-06, "loss": 0.714, "step": 18900, "vit_lr": 1.046859377041117e-06 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 5.226074438175952e-06, "loss": 0.7097, "step": 18920, "vit_lr": 1.0452148876351903e-06 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 5.217851378464419e-06, "loss": 0.7104, "step": 18940, "vit_lr": 1.0435702756928837e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.209627728356216e-06, "loss": 0.7036, "step": 18960, "vit_lr": 1.041925545671243e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.201403510138163e-06, "loss": 0.7137, "step": 18980, "vit_lr": 1.0402807020276324e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.19317874609863e-06, "loss": 0.7001, "step": 19000, "vit_lr": 1.0386357492197257e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.18495345852746e-06, "loss": 0.6997, "step": 19020, "vit_lr": 1.0369906917054918e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.176727669715913e-06, "loss": 0.6981, "step": 19040, "vit_lr": 1.0353455339431824e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.1685014019566145e-06, "loss": 0.697, "step": 19060, "vit_lr": 1.0337002803913228e-06 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 5.160274677543484e-06, "loss": 0.6963, "step": 19080, "vit_lr": 1.0320549355086968e-06 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 5.152047518771676e-06, "loss": 0.6963, "step": 19100, "vit_lr": 1.0304095037543351e-06 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 5.1438199479375285e-06, "loss": 0.6907, "step": 19120, "vit_lr": 1.0287639895875056e-06 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 5.135591987338488e-06, "loss": 0.697, "step": 19140, "vit_lr": 1.0271183974676975e-06 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 5.1273636592730654e-06, "loss": 0.688, "step": 19160, "vit_lr": 1.025472731854613e-06 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 5.119134986040764e-06, "loss": 0.6957, "step": 19180, "vit_lr": 1.0238269972081527e-06 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 5.1109059899420185e-06, "loss": 0.6969, "step": 19200, "vit_lr": 1.0221811979884036e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.1026766932781465e-06, "loss": 0.7041, "step": 19220, "vit_lr": 1.0205353386556292e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.094447118351274e-06, "loss": 0.6919, "step": 19240, "vit_lr": 1.0188894236702546e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.086217287464285e-06, "loss": 0.7068, "step": 19260, "vit_lr": 1.017243457492857e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.0779872229207536e-06, "loss": 0.6995, "step": 19280, "vit_lr": 1.0155974445841507e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.069756947024891e-06, "loss": 0.6905, "step": 19300, "vit_lr": 1.013951389404978e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.061526482081479e-06, "loss": 0.7068, "step": 19320, "vit_lr": 1.0123052964162957e-06 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 5.0532958503958095e-06, "loss": 0.7086, "step": 19340, "vit_lr": 1.0106591700791618e-06 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 5.045065074273631e-06, "loss": 0.701, "step": 19360, "vit_lr": 1.009013014854726e-06 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 5.036834176021079e-06, "loss": 0.6914, "step": 19380, "vit_lr": 1.0073668352042158e-06 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 5.028603177944627e-06, "loss": 0.6843, "step": 19400, "vit_lr": 1.0057206355889252e-06 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 5.020372102351008e-06, "loss": 0.6947, "step": 19420, "vit_lr": 1.0040744204702013e-06 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 5.0121409715471735e-06, "loss": 0.6988, "step": 19440, "vit_lr": 1.0024281943094346e-06 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 5.003909807840225e-06, "loss": 0.7035, "step": 19460, "vit_lr": 1.0007819615680448e-06 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 4.995678633537348e-06, "loss": 0.6969, "step": 19480, "vit_lr": 9.991357267074695e-07 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 4.987447470945759e-06, "loss": 0.7061, "step": 19500, "vit_lr": 9.974894941891518e-07 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 4.979216342372643e-06, "loss": 0.6957, "step": 19520, "vit_lr": 9.958432684745285e-07 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 4.9709852701250946e-06, "loss": 0.697, "step": 19540, "vit_lr": 9.941970540250187e-07 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 4.962754276510053e-06, "loss": 0.7115, "step": 19560, "vit_lr": 9.925508553020104e-07 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 4.954523383834243e-06, "loss": 0.7002, "step": 19580, "vit_lr": 9.909046767668485e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.946292614404122e-06, "loss": 0.7009, "step": 19600, "vit_lr": 9.892585228808244e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.93806199052581e-06, "loss": 0.6964, "step": 19620, "vit_lr": 9.876123981051618e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.929831534505027e-06, "loss": 0.7123, "step": 19640, "vit_lr": 9.859663069010054e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.921601268647047e-06, "loss": 0.7047, "step": 19660, "vit_lr": 9.843202537294092e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.913371215256623e-06, "loss": 0.7067, "step": 19680, "vit_lr": 9.826742430513245e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.905141396637935e-06, "loss": 0.7017, "step": 19700, "vit_lr": 9.810282793275868e-07 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 4.896911835094526e-06, "loss": 0.7064, "step": 19720, "vit_lr": 9.79382367018905e-07 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 4.888682552929242e-06, "loss": 0.6919, "step": 19740, "vit_lr": 9.777365105858483e-07 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 4.88045357244417e-06, "loss": 0.692, "step": 19760, "vit_lr": 9.76090714488834e-07 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 4.872224915940583e-06, "loss": 0.6897, "step": 19780, "vit_lr": 9.744449831881164e-07 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 4.863996605718873e-06, "loss": 0.6787, "step": 19800, "vit_lr": 9.727993211437745e-07 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 4.855768664078498e-06, "loss": 0.6825, "step": 19820, "vit_lr": 9.711537328156995e-07 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 4.847541113317909e-06, "loss": 0.7039, "step": 19840, "vit_lr": 9.695082226635819e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.839313975734509e-06, "loss": 0.6965, "step": 19860, "vit_lr": 9.678627951469017e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.831087273624569e-06, "loss": 0.6981, "step": 19880, "vit_lr": 9.662174547249138e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.822861029283189e-06, "loss": 0.6969, "step": 19900, "vit_lr": 9.645722058566378e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.814635265004222e-06, "loss": 0.6936, "step": 19920, "vit_lr": 9.629270530008444e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.806410003080228e-06, "loss": 0.7005, "step": 19940, "vit_lr": 9.612820006160455e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.798185265802397e-06, "loss": 0.6871, "step": 19960, "vit_lr": 9.596370531604792e-07 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 4.789961075460502e-06, "loss": 0.6935, "step": 19980, "vit_lr": 9.579922150921004e-07 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 4.781737454342832e-06, "loss": 0.7121, "step": 20000, "vit_lr": 9.563474908685664e-07 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 4.773514424736133e-06, "loss": 0.7023, "step": 20020, "vit_lr": 9.547028849472266e-07 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 4.765292008925552e-06, "loss": 0.6997, "step": 20040, "vit_lr": 9.530584017851102e-07 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 4.757070229194567e-06, "loss": 0.6842, "step": 20060, "vit_lr": 9.514140458389133e-07 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 4.7488491078249335e-06, "loss": 0.6919, "step": 20080, "vit_lr": 9.497698215649866e-07 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 4.740628667096628e-06, "loss": 0.7129, "step": 20100, "vit_lr": 9.481257334193254e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.732408929287776e-06, "loss": 0.7124, "step": 20120, "vit_lr": 9.464817858575553e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.724189916674601e-06, "loss": 0.7068, "step": 20140, "vit_lr": 9.4483798333492e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.715971651531357e-06, "loss": 0.7011, "step": 20160, "vit_lr": 9.431943303062712e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.70775415613028e-06, "loss": 0.6923, "step": 20180, "vit_lr": 9.415508312260558e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.699537452741514e-06, "loss": 0.7018, "step": 20200, "vit_lr": 9.399074905483028e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.691321563633058e-06, "loss": 0.6881, "step": 20220, "vit_lr": 9.382643127266114e-07 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 4.683106511070708e-06, "loss": 0.7097, "step": 20240, "vit_lr": 9.366213022141416e-07 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 4.674892317317984e-06, "loss": 0.7087, "step": 20260, "vit_lr": 9.349784634635967e-07 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 4.666679004636086e-06, "loss": 0.7005, "step": 20280, "vit_lr": 9.333358009272172e-07 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 4.658466595283826e-06, "loss": 0.697, "step": 20300, "vit_lr": 9.316933190567651e-07 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 4.650255111517563e-06, "loss": 0.6817, "step": 20320, "vit_lr": 9.300510223035125e-07 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 4.642044575591154e-06, "loss": 0.7034, "step": 20340, "vit_lr": 9.284089151182306e-07 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 4.633835009755883e-06, "loss": 0.7099, "step": 20360, "vit_lr": 9.267670019511764e-07 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 4.625626436260403e-06, "loss": 0.7096, "step": 20380, "vit_lr": 9.251252872520805e-07 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 4.617418877350683e-06, "loss": 0.7136, "step": 20400, "vit_lr": 9.234837754701366e-07 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 4.60921235526994e-06, "loss": 0.7086, "step": 20420, "vit_lr": 9.218424710539878e-07 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 4.60100689225858e-06, "loss": 0.7183, "step": 20440, "vit_lr": 9.202013784517159e-07 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 4.592802510554141e-06, "loss": 0.7139, "step": 20460, "vit_lr": 9.185605021108282e-07 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 4.584599232391228e-06, "loss": 0.6958, "step": 20480, "vit_lr": 9.169198464782454e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.57639708000146e-06, "loss": 0.7057, "step": 20500, "vit_lr": 9.152794160002918e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.568196075613397e-06, "loss": 0.7008, "step": 20520, "vit_lr": 9.136392151226793e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.559996241452496e-06, "loss": 0.7036, "step": 20540, "vit_lr": 9.119992482904991e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.5517975997410366e-06, "loss": 0.701, "step": 20560, "vit_lr": 9.103595199482072e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.543600172698073e-06, "loss": 0.6852, "step": 20580, "vit_lr": 9.087200345396144e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.53540398253936e-06, "loss": 0.7124, "step": 20600, "vit_lr": 9.070807965078719e-07 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 4.5272090514773085e-06, "loss": 0.7005, "step": 20620, "vit_lr": 9.054418102954615e-07 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 4.519015401720909e-06, "loss": 0.6999, "step": 20640, "vit_lr": 9.038030803441817e-07 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 4.510823055475683e-06, "loss": 0.6931, "step": 20660, "vit_lr": 9.021646110951364e-07 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 4.5026320349436225e-06, "loss": 0.6897, "step": 20680, "vit_lr": 9.005264069887244e-07 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 4.494442362323123e-06, "loss": 0.7111, "step": 20700, "vit_lr": 8.988884724646245e-07 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 4.486254059808927e-06, "loss": 0.6927, "step": 20720, "vit_lr": 8.972508119617852e-07 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 4.478067149592069e-06, "loss": 0.7014, "step": 20740, "vit_lr": 8.956134299184136e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.469881653859799e-06, "loss": 0.6854, "step": 20760, "vit_lr": 8.939763307719596e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.4616975947955456e-06, "loss": 0.6901, "step": 20780, "vit_lr": 8.92339518959109e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.453514994578838e-06, "loss": 0.6838, "step": 20800, "vit_lr": 8.907029989157676e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.445333875385253e-06, "loss": 0.6814, "step": 20820, "vit_lr": 8.890667750770505e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.437154259386354e-06, "loss": 0.7169, "step": 20840, "vit_lr": 8.874308518772706e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.42897616874963e-06, "loss": 0.6943, "step": 20860, "vit_lr": 8.857952337499259e-07 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 4.420799625638436e-06, "loss": 0.6952, "step": 20880, "vit_lr": 8.84159925127687e-07 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 4.412624652211931e-06, "loss": 0.687, "step": 20900, "vit_lr": 8.825249304423861e-07 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 4.404451270625026e-06, "loss": 0.6977, "step": 20920, "vit_lr": 8.808902541250051e-07 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 4.396279503028313e-06, "loss": 0.6861, "step": 20940, "vit_lr": 8.792559006056625e-07 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 4.38810937156801e-06, "loss": 0.713, "step": 20960, "vit_lr": 8.776218743136019e-07 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 4.379940898385905e-06, "loss": 0.6871, "step": 20980, "vit_lr": 8.759881796771809e-07 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 4.3717741056192884e-06, "loss": 0.6891, "step": 21000, "vit_lr": 8.743548211238575e-07 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 4.363609015400895e-06, "loss": 0.6869, "step": 21020, "vit_lr": 8.727218030801789e-07 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 4.35544564985885e-06, "loss": 0.7141, "step": 21040, "vit_lr": 8.710891299717697e-07 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 4.347284031116598e-06, "loss": 0.6783, "step": 21060, "vit_lr": 8.694568062233196e-07 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 4.339124181292861e-06, "loss": 0.6981, "step": 21080, "vit_lr": 8.67824836258572e-07 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 4.3309661225015556e-06, "loss": 0.7035, "step": 21100, "vit_lr": 8.661932245003111e-07 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 4.322809876851752e-06, "loss": 0.704, "step": 21120, "vit_lr": 8.645619753703502e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.314655466447598e-06, "loss": 0.6924, "step": 21140, "vit_lr": 8.629310932895195e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.306502913388278e-06, "loss": 0.6875, "step": 21160, "vit_lr": 8.613005826776556e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.298352239767938e-06, "loss": 0.6946, "step": 21180, "vit_lr": 8.596704479535875e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.290203467675629e-06, "loss": 0.7084, "step": 21200, "vit_lr": 8.580406935351257e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.282056619195252e-06, "loss": 0.7142, "step": 21220, "vit_lr": 8.564113238390503e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.273911716405493e-06, "loss": 0.6936, "step": 21240, "vit_lr": 8.547823432810984e-07 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 4.265768781379763e-06, "loss": 0.7021, "step": 21260, "vit_lr": 8.531537562759525e-07 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 4.257627836186145e-06, "loss": 0.6934, "step": 21280, "vit_lr": 8.515255672372289e-07 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 4.249488902887325e-06, "loss": 0.7036, "step": 21300, "vit_lr": 8.498977805774648e-07 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 4.241352003540539e-06, "loss": 0.7074, "step": 21320, "vit_lr": 8.482704007081079e-07 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 4.233217160197513e-06, "loss": 0.7011, "step": 21340, "vit_lr": 8.466434320395024e-07 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 4.2250843949043935e-06, "loss": 0.7011, "step": 21360, "vit_lr": 8.450168789808786e-07 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 4.216953729701706e-06, "loss": 0.6783, "step": 21380, "vit_lr": 8.43390745940341e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.208825186624271e-06, "loss": 0.7058, "step": 21400, "vit_lr": 8.41765037324854e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.200698787701171e-06, "loss": 0.7146, "step": 21420, "vit_lr": 8.40139757540234e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.19257455495567e-06, "loss": 0.6889, "step": 21440, "vit_lr": 8.385149109911339e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.184452510405164e-06, "loss": 0.6793, "step": 21460, "vit_lr": 8.368905020810326e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.176332676061119e-06, "loss": 0.7073, "step": 21480, "vit_lr": 8.352665352122237e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.1682150739290114e-06, "loss": 0.6902, "step": 21500, "vit_lr": 8.336430147858021e-07 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 4.160099726008265e-06, "loss": 0.6932, "step": 21520, "vit_lr": 8.320199452016529e-07 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 4.151986654292196e-06, "loss": 0.6975, "step": 21540, "vit_lr": 8.303973308584391e-07 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 4.143875880767956e-06, "loss": 0.6983, "step": 21560, "vit_lr": 8.287751761535911e-07 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 4.135767427416462e-06, "loss": 0.6987, "step": 21580, "vit_lr": 8.271534854832925e-07 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 4.127661316212348e-06, "loss": 0.7217, "step": 21600, "vit_lr": 8.255322632424695e-07 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 4.119557569123901e-06, "loss": 0.6861, "step": 21620, "vit_lr": 8.2391151382478e-07 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 4.111456208112991e-06, "loss": 0.6933, "step": 21640, "vit_lr": 8.22291241622598e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.103357255135034e-06, "loss": 0.6808, "step": 21660, "vit_lr": 8.206714510270067e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.095260732138916e-06, "loss": 0.6918, "step": 21680, "vit_lr": 8.19052146427783e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.087166661066934e-06, "loss": 0.6925, "step": 21700, "vit_lr": 8.174333322133868e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.0790750638547455e-06, "loss": 0.6877, "step": 21720, "vit_lr": 8.15815012770949e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.070985962431301e-06, "loss": 0.7083, "step": 21740, "vit_lr": 8.141971924862601e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.0628993787187834e-06, "loss": 0.706, "step": 21760, "vit_lr": 8.125798757437567e-07 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 4.054815334632558e-06, "loss": 0.6978, "step": 21780, "vit_lr": 8.109630669265115e-07 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 4.046733852081107e-06, "loss": 0.7018, "step": 21800, "vit_lr": 8.093467704162213e-07 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 4.038654952965968e-06, "loss": 0.694, "step": 21820, "vit_lr": 8.077309905931935e-07 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 4.0305786591816765e-06, "loss": 0.7117, "step": 21840, "vit_lr": 8.061157318363352e-07 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 4.022504992615714e-06, "loss": 0.7023, "step": 21860, "vit_lr": 8.045009985231426e-07 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 4.014433975148433e-06, "loss": 0.6977, "step": 21880, "vit_lr": 8.028867950296866e-07 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 4.006365628653013e-06, "loss": 0.6922, "step": 21900, "vit_lr": 8.012731257306024e-07 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 3.998299974995391e-06, "loss": 0.7, "step": 21920, "vit_lr": 7.996599949990782e-07 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 3.990237036034209e-06, "loss": 0.6883, "step": 21940, "vit_lr": 7.980474072068417e-07 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 3.982176833620752e-06, "loss": 0.7097, "step": 21960, "vit_lr": 7.964353667241504e-07 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 3.9741193895988876e-06, "loss": 0.6915, "step": 21980, "vit_lr": 7.948238779197773e-07 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 3.966064725805007e-06, "loss": 0.703, "step": 22000, "vit_lr": 7.932129451610013e-07 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 3.958012864067968e-06, "loss": 0.7069, "step": 22020, "vit_lr": 7.916025728135934e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.949963826209033e-06, "loss": 0.6895, "step": 22040, "vit_lr": 7.899927652418064e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.941917634041814e-06, "loss": 0.6809, "step": 22060, "vit_lr": 7.883835268083627e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.9338743093722104e-06, "loss": 0.6916, "step": 22080, "vit_lr": 7.86774861874442e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.925833873998348e-06, "loss": 0.6914, "step": 22100, "vit_lr": 7.851667747996695e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.917796349710527e-06, "loss": 0.709, "step": 22120, "vit_lr": 7.835592699421053e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.909761758291152e-06, "loss": 0.699, "step": 22140, "vit_lr": 7.819523516582304e-07 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 3.901730121514685e-06, "loss": 0.6834, "step": 22160, "vit_lr": 7.803460243029368e-07 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 3.893701461147575e-06, "loss": 0.6942, "step": 22180, "vit_lr": 7.787402922295149e-07 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 3.8856757989482115e-06, "loss": 0.6952, "step": 22200, "vit_lr": 7.771351597896421e-07 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 3.877653156666854e-06, "loss": 0.6926, "step": 22220, "vit_lr": 7.755306313333706e-07 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 3.869633556045577e-06, "loss": 0.6906, "step": 22240, "vit_lr": 7.739267112091152e-07 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 3.861617018818219e-06, "loss": 0.7046, "step": 22260, "vit_lr": 7.723234037636436e-07 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 3.8536035667103035e-06, "loss": 0.689, "step": 22280, "vit_lr": 7.707207133420605e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.845593221439006e-06, "loss": 0.6995, "step": 22300, "vit_lr": 7.691186442878011e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.8375860047130755e-06, "loss": 0.691, "step": 22320, "vit_lr": 7.67517200942615e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.829581938232782e-06, "loss": 0.6789, "step": 22340, "vit_lr": 7.659163876465563e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.821581043689863e-06, "loss": 0.6998, "step": 22360, "vit_lr": 7.643162087379725e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.813583342767457e-06, "loss": 0.7086, "step": 22380, "vit_lr": 7.627166685534913e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.805588857140043e-06, "loss": 0.7019, "step": 22400, "vit_lr": 7.611177714280085e-07 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 3.797597608473391e-06, "loss": 0.6797, "step": 22420, "vit_lr": 7.595195216946782e-07 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 3.789609618424501e-06, "loss": 0.6991, "step": 22440, "vit_lr": 7.579219236849001e-07 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 3.7816249086415373e-06, "loss": 0.7098, "step": 22460, "vit_lr": 7.563249817283074e-07 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 3.7736435007637747e-06, "loss": 0.6921, "step": 22480, "vit_lr": 7.547287001527549e-07 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 3.765665416421544e-06, "loss": 0.6949, "step": 22500, "vit_lr": 7.531330832843087e-07 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 3.75769067723616e-06, "loss": 0.6889, "step": 22520, "vit_lr": 7.515381354472319e-07 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 3.7497193048198817e-06, "loss": 0.6904, "step": 22540, "vit_lr": 7.499438609639762e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.741751320775838e-06, "loss": 0.6977, "step": 22560, "vit_lr": 7.483502641551675e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.733786746697976e-06, "loss": 0.6965, "step": 22580, "vit_lr": 7.467573493395951e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.7258256041710046e-06, "loss": 0.7035, "step": 22600, "vit_lr": 7.451651208342008e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.717867914770329e-06, "loss": 0.6872, "step": 22620, "vit_lr": 7.435735829540657e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.709913700061997e-06, "loss": 0.6939, "step": 22640, "vit_lr": 7.419827400123992e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.701962981602639e-06, "loss": 0.6879, "step": 22660, "vit_lr": 7.403925963205277e-07 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 3.6940157809394127e-06, "loss": 0.7011, "step": 22680, "vit_lr": 7.388031561878824e-07 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 3.6860721196099416e-06, "loss": 0.6888, "step": 22700, "vit_lr": 7.372144239219882e-07 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 3.678132019142256e-06, "loss": 0.7236, "step": 22720, "vit_lr": 7.356264038284511e-07 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 3.670195501054736e-06, "loss": 0.6965, "step": 22740, "vit_lr": 7.34039100210947e-07 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 3.6622625868560556e-06, "loss": 0.7007, "step": 22760, "vit_lr": 7.32452517371211e-07 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 3.6543332980451187e-06, "loss": 0.7029, "step": 22780, "vit_lr": 7.308666596090236e-07 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 3.6464076561110047e-06, "loss": 0.6971, "step": 22800, "vit_lr": 7.292815312222009e-07 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 3.6384856825329107e-06, "loss": 0.6953, "step": 22820, "vit_lr": 7.276971365065821e-07 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 3.6305673987800925e-06, "loss": 0.7024, "step": 22840, "vit_lr": 7.261134797560184e-07 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 3.6226528263118066e-06, "loss": 0.7078, "step": 22860, "vit_lr": 7.245305652623612e-07 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 3.61474198657725e-06, "loss": 0.7009, "step": 22880, "vit_lr": 7.229483973154499e-07 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 3.606834901015502e-06, "loss": 0.7003, "step": 22900, "vit_lr": 7.213669802031004e-07 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 3.5989315910554712e-06, "loss": 0.697, "step": 22920, "vit_lr": 7.197863182110942e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.5910320781158336e-06, "loss": 0.7008, "step": 22940, "vit_lr": 7.182064156231666e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.583136383604974e-06, "loss": 0.6837, "step": 22960, "vit_lr": 7.166272767209947e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.575244528920927e-06, "loss": 0.6936, "step": 22980, "vit_lr": 7.150489057841853e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.5673565354513274e-06, "loss": 0.6892, "step": 23000, "vit_lr": 7.134713070902654e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.5594724245733353e-06, "loss": 0.6949, "step": 23020, "vit_lr": 7.11894484914667e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.551592217653599e-06, "loss": 0.6919, "step": 23040, "vit_lr": 7.103184435307197e-07 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 3.5437159360481787e-06, "loss": 0.6955, "step": 23060, "vit_lr": 7.087431872096356e-07 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 3.5358436011025047e-06, "loss": 0.6923, "step": 23080, "vit_lr": 7.071687202205009e-07 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 3.5279752341513047e-06, "loss": 0.6991, "step": 23100, "vit_lr": 7.055950468302608e-07 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 3.520110856518554e-06, "loss": 0.69, "step": 23120, "vit_lr": 7.040221713037107e-07 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 3.5122504895174225e-06, "loss": 0.6813, "step": 23140, "vit_lr": 7.024500979034844e-07 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 3.5043941544501993e-06, "loss": 0.6973, "step": 23160, "vit_lr": 7.008788308900398e-07 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 3.4965418726082575e-06, "loss": 0.693, "step": 23180, "vit_lr": 6.993083745216514e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.4886936652719805e-06, "loss": 0.6894, "step": 23200, "vit_lr": 6.97738733054396e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.4808495537107096e-06, "loss": 0.6906, "step": 23220, "vit_lr": 6.961699107421418e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.4730095591826886e-06, "loss": 0.6914, "step": 23240, "vit_lr": 6.946019118365376e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.465173702935003e-06, "loss": 0.6917, "step": 23260, "vit_lr": 6.930347405870005e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.45734200620352e-06, "loss": 0.7, "step": 23280, "vit_lr": 6.914684012407039e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.449514490212837e-06, "loss": 0.6881, "step": 23300, "vit_lr": 6.899028980425673e-07 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 3.4416911761762195e-06, "loss": 0.6919, "step": 23320, "vit_lr": 6.883382352352439e-07 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 3.43387208529555e-06, "loss": 0.7141, "step": 23340, "vit_lr": 6.867744170591099e-07 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 3.42605723876126e-06, "loss": 0.6988, "step": 23360, "vit_lr": 6.852114477522519e-07 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 3.418246657752282e-06, "loss": 0.6964, "step": 23380, "vit_lr": 6.836493315504563e-07 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 3.410440363435985e-06, "loss": 0.6941, "step": 23400, "vit_lr": 6.820880726871969e-07 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 3.4026383769681257e-06, "loss": 0.6962, "step": 23420, "vit_lr": 6.805276753936251e-07 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 3.3948407194927835e-06, "loss": 0.6846, "step": 23440, "vit_lr": 6.789681438985566e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.3870474121423036e-06, "loss": 0.7006, "step": 23460, "vit_lr": 6.774094824284607e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.379258476037246e-06, "loss": 0.6893, "step": 23480, "vit_lr": 6.758516952074491e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.371473932286324e-06, "loss": 0.6841, "step": 23500, "vit_lr": 6.742947864572647e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.363693801986342e-06, "loss": 0.6973, "step": 23520, "vit_lr": 6.727387603972683e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.35591810622215e-06, "loss": 0.7013, "step": 23540, "vit_lr": 6.711836212444298e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.348146866066575e-06, "loss": 0.7054, "step": 23560, "vit_lr": 6.696293732133149e-07 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 3.340380102580373e-06, "loss": 0.6926, "step": 23580, "vit_lr": 6.680760205160745e-07 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 3.3326178368121665e-06, "loss": 0.6965, "step": 23600, "vit_lr": 6.665235673624332e-07 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 3.324860089798385e-06, "loss": 0.6914, "step": 23620, "vit_lr": 6.649720179596769e-07 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 3.317106882563222e-06, "loss": 0.6902, "step": 23640, "vit_lr": 6.634213765126443e-07 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 3.3093582361185526e-06, "loss": 0.7109, "step": 23660, "vit_lr": 6.618716472237104e-07 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 3.3016141714639044e-06, "loss": 0.6987, "step": 23680, "vit_lr": 6.603228342927808e-07 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 3.2938747095863823e-06, "loss": 0.6873, "step": 23700, "vit_lr": 6.587749419172763e-07 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 3.2861398714606196e-06, "loss": 0.6951, "step": 23720, "vit_lr": 6.572279742921238e-07 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 3.2784096780487175e-06, "loss": 0.6993, "step": 23740, "vit_lr": 6.556819356097434e-07 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 3.2706841503001904e-06, "loss": 0.6983, "step": 23760, "vit_lr": 6.541368300600379e-07 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 3.2629633091519053e-06, "loss": 0.699, "step": 23780, "vit_lr": 6.52592661830381e-07 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 3.2552471755280318e-06, "loss": 0.696, "step": 23800, "vit_lr": 6.510494351056063e-07 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 3.2475357703399814e-06, "loss": 0.7062, "step": 23820, "vit_lr": 6.495071540679962e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.2398291144863505e-06, "loss": 0.6989, "step": 23840, "vit_lr": 6.479658228972699e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.2321272288528606e-06, "loss": 0.7116, "step": 23860, "vit_lr": 6.46425445770572e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.2244301343123153e-06, "loss": 0.7037, "step": 23880, "vit_lr": 6.44886026862463e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.216737851724521e-06, "loss": 0.695, "step": 23900, "vit_lr": 6.43347570344904e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.2090504019362533e-06, "loss": 0.6997, "step": 23920, "vit_lr": 6.418100803872506e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.201367805781186e-06, "loss": 0.698, "step": 23940, "vit_lr": 6.402735611562371e-07 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 3.1936900840798397e-06, "loss": 0.6914, "step": 23960, "vit_lr": 6.387380168159679e-07 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 3.1860172576395267e-06, "loss": 0.6999, "step": 23980, "vit_lr": 6.372034515279052e-07 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 3.1783493472542904e-06, "loss": 0.696, "step": 24000, "vit_lr": 6.35669869450858e-07 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 3.1706863737048534e-06, "loss": 0.6879, "step": 24020, "vit_lr": 6.341372747409705e-07 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 3.1630283577585543e-06, "loss": 0.7048, "step": 24040, "vit_lr": 6.326056715517107e-07 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 3.1553753201693026e-06, "loss": 0.7008, "step": 24060, "vit_lr": 6.310750640338605e-07 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 3.1477272816775117e-06, "loss": 0.6896, "step": 24080, "vit_lr": 6.295454563355022e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.140084263010048e-06, "loss": 0.6968, "step": 24100, "vit_lr": 6.280168526020095e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.1324462848801757e-06, "loss": 0.6966, "step": 24120, "vit_lr": 6.264892569760351e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.124813367987497e-06, "loss": 0.6983, "step": 24140, "vit_lr": 6.249626735974993e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.1171855330178956e-06, "loss": 0.6957, "step": 24160, "vit_lr": 6.23437106603579e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.109562800643486e-06, "loss": 0.6864, "step": 24180, "vit_lr": 6.219125601286971e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.1019451915225516e-06, "loss": 0.6959, "step": 24200, "vit_lr": 6.203890383045103e-07 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 3.094332726299495e-06, "loss": 0.6938, "step": 24220, "vit_lr": 6.188665452598989e-07 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 3.086725425604775e-06, "loss": 0.7028, "step": 24240, "vit_lr": 6.17345085120955e-07 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 3.079123310054857e-06, "loss": 0.7013, "step": 24260, "vit_lr": 6.158246620109714e-07 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 3.07152640025215e-06, "loss": 0.6948, "step": 24280, "vit_lr": 6.143052800504299e-07 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 3.0639347167849566e-06, "loss": 0.7006, "step": 24300, "vit_lr": 6.127869433569912e-07 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 3.056348280227419e-06, "loss": 0.6959, "step": 24320, "vit_lr": 6.112696560454838e-07 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 3.048767111139457e-06, "loss": 0.7009, "step": 24340, "vit_lr": 6.097534222278912e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 3.0411912300667123e-06, "loss": 0.6993, "step": 24360, "vit_lr": 6.082382460133423e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 3.0336206575405027e-06, "loss": 0.6802, "step": 24380, "vit_lr": 6.067241315081005e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 3.026055414077752e-06, "loss": 0.6966, "step": 24400, "vit_lr": 6.052110828155503e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 3.0184955201809464e-06, "loss": 0.6967, "step": 24420, "vit_lr": 6.036991040361891e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 3.010940996338071e-06, "loss": 0.6927, "step": 24440, "vit_lr": 6.021881992676141e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 3.003391863022562e-06, "loss": 0.696, "step": 24460, "vit_lr": 6.006783726045124e-07 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 2.995848140693244e-06, "loss": 0.6898, "step": 24480, "vit_lr": 5.991696281386487e-07 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 2.988309849794274e-06, "loss": 0.7004, "step": 24500, "vit_lr": 5.976619699588548e-07 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 2.9807770107551e-06, "loss": 0.6939, "step": 24520, "vit_lr": 5.961554021510198e-07 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 2.97324964399038e-06, "loss": 0.6973, "step": 24540, "vit_lr": 5.94649928798076e-07 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 2.9657277698999535e-06, "loss": 0.6966, "step": 24560, "vit_lr": 5.931455539799907e-07 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 2.958211408868771e-06, "loss": 0.6926, "step": 24580, "vit_lr": 5.91642281773754e-07 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 2.950700581266839e-06, "loss": 0.687, "step": 24600, "vit_lr": 5.901401162533677e-07 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 2.943195307449172e-06, "loss": 0.6972, "step": 24620, "vit_lr": 5.886390614898344e-07 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 2.935695607755733e-06, "loss": 0.6954, "step": 24640, "vit_lr": 5.871391215511465e-07 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 2.9282015025113753e-06, "loss": 0.6923, "step": 24660, "vit_lr": 5.85640300502275e-07 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 2.9207130120257913e-06, "loss": 0.7017, "step": 24680, "vit_lr": 5.841426024051582e-07 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 2.913230156593462e-06, "loss": 0.6904, "step": 24700, "vit_lr": 5.826460313186923e-07 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 2.9057529564935905e-06, "loss": 0.697, "step": 24720, "vit_lr": 5.81150591298718e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.8982814319900586e-06, "loss": 0.6943, "step": 24740, "vit_lr": 5.796562863980116e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.890815603331364e-06, "loss": 0.7081, "step": 24760, "vit_lr": 5.781631206662727e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.8833554907505647e-06, "loss": 0.6971, "step": 24780, "vit_lr": 5.766710981501129e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.875901114465234e-06, "loss": 0.6906, "step": 24800, "vit_lr": 5.751802228930467e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.8684524946773993e-06, "loss": 0.7012, "step": 24820, "vit_lr": 5.736904989354799e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.8610096515734796e-06, "loss": 0.6633, "step": 24840, "vit_lr": 5.722019303146958e-07 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 2.8535726053242473e-06, "loss": 0.6958, "step": 24860, "vit_lr": 5.707145210648494e-07 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 2.8461413760847646e-06, "loss": 0.6864, "step": 24880, "vit_lr": 5.692282752169528e-07 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 2.838715983994317e-06, "loss": 0.7145, "step": 24900, "vit_lr": 5.677431967988633e-07 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 2.831296449176384e-06, "loss": 0.7045, "step": 24920, "vit_lr": 5.662592898352767e-07 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 2.8238827917385692e-06, "loss": 0.693, "step": 24940, "vit_lr": 5.647765583477138e-07 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 2.816475031772542e-06, "loss": 0.6909, "step": 24960, "vit_lr": 5.632950063545083e-07 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 2.8090731893539945e-06, "loss": 0.687, "step": 24980, "vit_lr": 5.618146378707989e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.801677284542582e-06, "loss": 0.6964, "step": 25000, "vit_lr": 5.603354569085164e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.7942873373818643e-06, "loss": 0.6962, "step": 25020, "vit_lr": 5.588574674763727e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.786903367899255e-06, "loss": 0.696, "step": 25040, "vit_lr": 5.57380673579851e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.7795253961059744e-06, "loss": 0.6801, "step": 25060, "vit_lr": 5.559050792211948e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.7721534419969835e-06, "loss": 0.6881, "step": 25080, "vit_lr": 5.544306883993967e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.7647875255509337e-06, "loss": 0.7018, "step": 25100, "vit_lr": 5.529575051101866e-07 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 2.7574276667301164e-06, "loss": 0.6742, "step": 25120, "vit_lr": 5.514855333460232e-07 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 2.750073885480409e-06, "loss": 0.7001, "step": 25140, "vit_lr": 5.500147770960817e-07 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 2.742726201731211e-06, "loss": 0.7063, "step": 25160, "vit_lr": 5.485452403462422e-07 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 2.7353846353954004e-06, "loss": 0.7031, "step": 25180, "vit_lr": 5.4707692707908e-07 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 2.7280492063692792e-06, "loss": 0.7071, "step": 25200, "vit_lr": 5.456098412738557e-07 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 2.720719934532516e-06, "loss": 0.6933, "step": 25220, "vit_lr": 5.441439869065031e-07 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 2.713396839748087e-06, "loss": 0.6846, "step": 25240, "vit_lr": 5.426793679496173e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.706079941862236e-06, "loss": 0.6958, "step": 25260, "vit_lr": 5.412159883724472e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.6987692607044093e-06, "loss": 0.6931, "step": 25280, "vit_lr": 5.397538521408818e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.6914648160872e-06, "loss": 0.6839, "step": 25300, "vit_lr": 5.3829296321744e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.6841666278063095e-06, "loss": 0.6959, "step": 25320, "vit_lr": 5.368333255612618e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.67687471564048e-06, "loss": 0.6873, "step": 25340, "vit_lr": 5.35374943128096e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.66958909935144e-06, "loss": 0.6852, "step": 25360, "vit_lr": 5.33917819870288e-07 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 2.662309798683862e-06, "loss": 0.7056, "step": 25380, "vit_lr": 5.324619597367723e-07 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 2.6550368333653017e-06, "loss": 0.7032, "step": 25400, "vit_lr": 5.310073666730602e-07 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 2.647770223106142e-06, "loss": 0.6875, "step": 25420, "vit_lr": 5.295540446212283e-07 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 2.640509987599544e-06, "loss": 0.6922, "step": 25440, "vit_lr": 5.281019975199088e-07 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 2.6332561465213956e-06, "loss": 0.6818, "step": 25460, "vit_lr": 5.266512293042791e-07 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 2.626008719530254e-06, "loss": 0.6955, "step": 25480, "vit_lr": 5.252017439060507e-07 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 2.61876772626729e-06, "loss": 0.6799, "step": 25500, "vit_lr": 5.237535452534579e-07 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 2.6115331863562453e-06, "loss": 0.6976, "step": 25520, "vit_lr": 5.223066372712491e-07 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 2.604305119403364e-06, "loss": 0.6775, "step": 25540, "vit_lr": 5.208610238806728e-07 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 2.5970835449973577e-06, "loss": 0.6825, "step": 25560, "vit_lr": 5.194167089994715e-07 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 2.589868482709332e-06, "loss": 0.6889, "step": 25580, "vit_lr": 5.179736965418663e-07 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 2.582659952092752e-06, "loss": 0.6877, "step": 25600, "vit_lr": 5.165319904185504e-07 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 2.5754579726833804e-06, "loss": 0.6867, "step": 25620, "vit_lr": 5.150915945366761e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.5682625639992226e-06, "loss": 0.7088, "step": 25640, "vit_lr": 5.136525127998445e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.5610737455404755e-06, "loss": 0.686, "step": 25660, "vit_lr": 5.12214749108095e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.5538915367894834e-06, "loss": 0.6941, "step": 25680, "vit_lr": 5.107783073578966e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.546715957210668e-06, "loss": 0.6886, "step": 25700, "vit_lr": 5.093431914421335e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.5395470262504927e-06, "loss": 0.6419, "step": 25720, "vit_lr": 5.079094052500985e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.5323847633374016e-06, "loss": 0.6431, "step": 25740, "vit_lr": 5.064769526674803e-07 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 2.5252291878817643e-06, "loss": 0.6343, "step": 25760, "vit_lr": 5.050458375763527e-07 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 2.5180803192758265e-06, "loss": 0.6471, "step": 25780, "vit_lr": 5.036160638551652e-07 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 2.5109381768936625e-06, "loss": 0.6493, "step": 25800, "vit_lr": 5.021876353787324e-07 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 2.5038027800911174e-06, "loss": 0.6187, "step": 25820, "vit_lr": 5.007605560182234e-07 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 2.496674148205749e-06, "loss": 0.6388, "step": 25840, "vit_lr": 4.993348296411497e-07 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 2.4895523005567866e-06, "loss": 0.6358, "step": 25860, "vit_lr": 4.979104601113573e-07 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 2.4824372564450766e-06, "loss": 0.6407, "step": 25880, "vit_lr": 4.964874512890153e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.47532903515302e-06, "loss": 0.6445, "step": 25900, "vit_lr": 4.950658070306039e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.4682276559445302e-06, "loss": 0.6425, "step": 25920, "vit_lr": 4.936455311889059e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.461133138064978e-06, "loss": 0.6424, "step": 25940, "vit_lr": 4.922266276129956e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.4540455007411445e-06, "loss": 0.6231, "step": 25960, "vit_lr": 4.908091001482289e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.4469647631811535e-06, "loss": 0.6456, "step": 25980, "vit_lr": 4.893929526362306e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.4398909445744377e-06, "loss": 0.6558, "step": 26000, "vit_lr": 4.879781889148875e-07 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 2.432824064091682e-06, "loss": 0.6221, "step": 26020, "vit_lr": 4.865648128183363e-07 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 2.425764140884753e-06, "loss": 0.6301, "step": 26040, "vit_lr": 4.851528281769506e-07 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 2.418711194086678e-06, "loss": 0.6508, "step": 26060, "vit_lr": 4.837422388173356e-07 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 2.4116652428115732e-06, "loss": 0.6486, "step": 26080, "vit_lr": 4.823330485623146e-07 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 2.404626306154592e-06, "loss": 0.6599, "step": 26100, "vit_lr": 4.809252612309184e-07 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 2.3975944031918825e-06, "loss": 0.6467, "step": 26120, "vit_lr": 4.795188806383764e-07 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 2.3905695529805305e-06, "loss": 0.6392, "step": 26140, "vit_lr": 4.781139105961061e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.3835517745585053e-06, "loss": 0.6384, "step": 26160, "vit_lr": 4.76710354911701e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.376541086944611e-06, "loss": 0.6502, "step": 26180, "vit_lr": 4.7530821738892214e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.3695375091384377e-06, "loss": 0.6362, "step": 26200, "vit_lr": 4.739075018276875e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.3625410601203084e-06, "loss": 0.6371, "step": 26220, "vit_lr": 4.725082120240617e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.3555517588512207e-06, "loss": 0.6287, "step": 26240, "vit_lr": 4.7111035177024405e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.348569624272806e-06, "loss": 0.6301, "step": 26260, "vit_lr": 4.697139248545612e-07 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 2.341594675307274e-06, "loss": 0.631, "step": 26280, "vit_lr": 4.683189350614547e-07 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 2.334626930857357e-06, "loss": 0.6318, "step": 26300, "vit_lr": 4.669253861714714e-07 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 2.3276664098062625e-06, "loss": 0.6336, "step": 26320, "vit_lr": 4.6553328196125243e-07 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 2.3207131310176246e-06, "loss": 0.6245, "step": 26340, "vit_lr": 4.6414262620352483e-07 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 2.313767113335451e-06, "loss": 0.6507, "step": 26360, "vit_lr": 4.627534226670902e-07 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 2.306828375584067e-06, "loss": 0.6309, "step": 26380, "vit_lr": 4.6136567511681335e-07 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 2.2998969365680733e-06, "loss": 0.6249, "step": 26400, "vit_lr": 4.5997938731361466e-07 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 2.292972815072286e-06, "loss": 0.6519, "step": 26420, "vit_lr": 4.5859456301445707e-07 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 2.2860560298616935e-06, "loss": 0.64, "step": 26440, "vit_lr": 4.5721120597233864e-07 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 2.2791465996813987e-06, "loss": 0.639, "step": 26460, "vit_lr": 4.558293199362797e-07 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 2.2722445432565755e-06, "loss": 0.6412, "step": 26480, "vit_lr": 4.5444890865131503e-07 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 2.2653498792924143e-06, "loss": 0.6408, "step": 26500, "vit_lr": 4.5306997585848286e-07 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 2.2584626264740676e-06, "loss": 0.6321, "step": 26520, "vit_lr": 4.516925252948135e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2515828034666044e-06, "loss": 0.6401, "step": 26540, "vit_lr": 4.503165606933208e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2447104289149603e-06, "loss": 0.6486, "step": 26560, "vit_lr": 4.4894208578299195e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2378455214438815e-06, "loss": 0.6347, "step": 26580, "vit_lr": 4.4756910428877625e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2309880996578816e-06, "loss": 0.6326, "step": 26600, "vit_lr": 4.461976199315762e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2241381821411862e-06, "loss": 0.6434, "step": 26620, "vit_lr": 4.448276364282372e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2172957874576815e-06, "loss": 0.6442, "step": 26640, "vit_lr": 4.434591574915363e-07 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 2.2104609341508665e-06, "loss": 0.6422, "step": 26660, "vit_lr": 4.4209218683017323e-07 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 2.2036336407438046e-06, "loss": 0.6456, "step": 26680, "vit_lr": 4.407267281487609e-07 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 2.196813925739072e-06, "loss": 0.6513, "step": 26700, "vit_lr": 4.393627851478143e-07 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 2.1900018076187013e-06, "loss": 0.6545, "step": 26720, "vit_lr": 4.380003615237402e-07 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 2.1831973048441417e-06, "loss": 0.6314, "step": 26740, "vit_lr": 4.366394609688283e-07 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 2.176400435856206e-06, "loss": 0.6439, "step": 26760, "vit_lr": 4.352800871712411e-07 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 2.1696112190750116e-06, "loss": 0.6548, "step": 26780, "vit_lr": 4.339222438150023e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.162829672899942e-06, "loss": 0.6456, "step": 26800, "vit_lr": 4.325659345799884e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.1560558157095933e-06, "loss": 0.6483, "step": 26820, "vit_lr": 4.312111631419186e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.1492896658617247e-06, "loss": 0.6382, "step": 26840, "vit_lr": 4.2985793317234487e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.142531241693202e-06, "loss": 0.6382, "step": 26860, "vit_lr": 4.2850624833864034e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.1357805615199605e-06, "loss": 0.6552, "step": 26880, "vit_lr": 4.271561123039921e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.129037643636948e-06, "loss": 0.6299, "step": 26900, "vit_lr": 4.2580752872738955e-07 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 2.122302506318068e-06, "loss": 0.6383, "step": 26920, "vit_lr": 4.2446050126361354e-07 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 2.115575167816146e-06, "loss": 0.6441, "step": 26940, "vit_lr": 4.231150335632291e-07 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 2.1088556463628707e-06, "loss": 0.6236, "step": 26960, "vit_lr": 4.2177112927257405e-07 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 2.1021439601687425e-06, "loss": 0.6337, "step": 26980, "vit_lr": 4.2042879203374846e-07 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 2.095440127423032e-06, "loss": 0.6422, "step": 27000, "vit_lr": 4.190880254846063e-07 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 2.0887441662937243e-06, "loss": 0.6346, "step": 27020, "vit_lr": 4.177488332587448e-07 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 2.0820560949274703e-06, "loss": 0.6424, "step": 27040, "vit_lr": 4.16411218985494e-07 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 2.0753759314495387e-06, "loss": 0.6506, "step": 27060, "vit_lr": 4.1507518628990766e-07 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 2.06870369396377e-06, "loss": 0.6313, "step": 27080, "vit_lr": 4.1374073879275394e-07 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 2.0620394005525248e-06, "loss": 0.6458, "step": 27100, "vit_lr": 4.124078801105049e-07 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 2.0553830692766297e-06, "loss": 0.6425, "step": 27120, "vit_lr": 4.1107661385532587e-07 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 2.0487347181753374e-06, "loss": 0.6498, "step": 27140, "vit_lr": 4.0974694363506745e-07 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 2.042094365266275e-06, "loss": 0.6375, "step": 27160, "vit_lr": 4.0841887305325496e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 2.03546202854539e-06, "loss": 0.6403, "step": 27180, "vit_lr": 4.0709240570907787e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 2.028837725986903e-06, "loss": 0.6468, "step": 27200, "vit_lr": 4.0576754519738054e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 2.022221475543269e-06, "loss": 0.6317, "step": 27220, "vit_lr": 4.0444429510865374e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 2.015613295145117e-06, "loss": 0.6444, "step": 27240, "vit_lr": 4.031226590290233e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 2.0090132027012027e-06, "loss": 0.6315, "step": 27260, "vit_lr": 4.018026405402405e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 2.0024212160983697e-06, "loss": 0.641, "step": 27280, "vit_lr": 4.004842432196739e-07 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 1.995837353201487e-06, "loss": 0.6449, "step": 27300, "vit_lr": 3.991674706402973e-07 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 1.9892616318534086e-06, "loss": 0.6351, "step": 27320, "vit_lr": 3.978523263706817e-07 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 1.9826940698749288e-06, "loss": 0.6161, "step": 27340, "vit_lr": 3.9653881397498565e-07 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 1.9761346850647272e-06, "loss": 0.6388, "step": 27360, "vit_lr": 3.952269370129454e-07 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 1.9695834951993193e-06, "loss": 0.6442, "step": 27380, "vit_lr": 3.939166990398638e-07 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 1.963040518033017e-06, "loss": 0.6554, "step": 27400, "vit_lr": 3.9260810360660335e-07 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 1.956505771297869e-06, "loss": 0.6345, "step": 27420, "vit_lr": 3.913011542595738e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.9499792727036253e-06, "loss": 0.6416, "step": 27440, "vit_lr": 3.8999585454072505e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.9434610399376764e-06, "loss": 0.6393, "step": 27460, "vit_lr": 3.886922079875352e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.936951090665016e-06, "loss": 0.6471, "step": 27480, "vit_lr": 3.873902181330031e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.9304494425281887e-06, "loss": 0.6498, "step": 27500, "vit_lr": 3.8608988850563775e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.9239561131472402e-06, "loss": 0.6371, "step": 27520, "vit_lr": 3.8479122262944795e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.9174711201196698e-06, "loss": 0.6475, "step": 27540, "vit_lr": 3.8349422402393395e-07 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 1.91099448102039e-06, "loss": 0.6498, "step": 27560, "vit_lr": 3.8219889620407796e-07 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 1.9045262134016718e-06, "loss": 0.6281, "step": 27580, "vit_lr": 3.809052426803343e-07 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 1.8980663347930938e-06, "loss": 0.6293, "step": 27600, "vit_lr": 3.796132669586187e-07 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 1.8916148627015056e-06, "loss": 0.6507, "step": 27620, "vit_lr": 3.783229725403011e-07 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 1.8851718146109738e-06, "loss": 0.6409, "step": 27640, "vit_lr": 3.770343629221947e-07 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 1.8787372079827315e-06, "loss": 0.6292, "step": 27660, "vit_lr": 3.7574744159654624e-07 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 1.8723110602551354e-06, "loss": 0.6379, "step": 27680, "vit_lr": 3.74462212051027e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8658933888436208e-06, "loss": 0.6469, "step": 27700, "vit_lr": 3.7317867776872414e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8594842111406515e-06, "loss": 0.6231, "step": 27720, "vit_lr": 3.7189684222813024e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8530835445156674e-06, "loss": 0.6506, "step": 27740, "vit_lr": 3.7061670890313346e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8466914063150482e-06, "loss": 0.634, "step": 27760, "vit_lr": 3.6933828126300956e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8403078138620623e-06, "loss": 0.648, "step": 27780, "vit_lr": 3.680615627724124e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8339327844568066e-06, "loss": 0.6458, "step": 27800, "vit_lr": 3.6678655689136127e-07 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 1.8275663353761847e-06, "loss": 0.6284, "step": 27820, "vit_lr": 3.655132670752369e-07 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 1.8212084838738426e-06, "loss": 0.6328, "step": 27840, "vit_lr": 3.6424169677476843e-07 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 1.8148592471801212e-06, "loss": 0.6302, "step": 27860, "vit_lr": 3.629718494360242e-07 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 1.8085186425020208e-06, "loss": 0.635, "step": 27880, "vit_lr": 3.617037285004041e-07 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 1.8021866870231475e-06, "loss": 0.6339, "step": 27900, "vit_lr": 3.6043733740462943e-07 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 1.7958633979036638e-06, "loss": 0.6445, "step": 27920, "vit_lr": 3.591726795807327e-07 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 1.7895487922802452e-06, "loss": 0.6446, "step": 27940, "vit_lr": 3.5790975845604897e-07 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 1.7832428872660384e-06, "loss": 0.6309, "step": 27960, "vit_lr": 3.5664857745320766e-07 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 1.7769456999506106e-06, "loss": 0.6475, "step": 27980, "vit_lr": 3.553891399901221e-07 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 1.7706572473998979e-06, "loss": 0.6454, "step": 28000, "vit_lr": 3.541314494799795e-07 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 1.7643775466561692e-06, "loss": 0.6558, "step": 28020, "vit_lr": 3.528755093312338e-07 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 1.758106614737975e-06, "loss": 0.6333, "step": 28040, "vit_lr": 3.516213229475949e-07 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 1.7518444686400993e-06, "loss": 0.6235, "step": 28060, "vit_lr": 3.5036889372801983e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.7455911253335145e-06, "loss": 0.6529, "step": 28080, "vit_lr": 3.4911822506670285e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.739346601765341e-06, "loss": 0.646, "step": 28100, "vit_lr": 3.4786932035306813e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.7331109148587965e-06, "loss": 0.6333, "step": 28120, "vit_lr": 3.466221829717593e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.7268840815131454e-06, "loss": 0.6542, "step": 28140, "vit_lr": 3.4537681630262907e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.720666118603665e-06, "loss": 0.6338, "step": 28160, "vit_lr": 3.4413322372073297e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.7144570429815876e-06, "loss": 0.6345, "step": 28180, "vit_lr": 3.428914085963175e-07 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 1.708256871474061e-06, "loss": 0.6286, "step": 28200, "vit_lr": 3.4165137429481215e-07 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 1.7020656208841046e-06, "loss": 0.6341, "step": 28220, "vit_lr": 3.404131241768209e-07 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 1.6958833079905612e-06, "loss": 0.641, "step": 28240, "vit_lr": 3.391766615981122e-07 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 1.689709949548048e-06, "loss": 0.6476, "step": 28260, "vit_lr": 3.3794198990960955e-07 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 1.683545562286919e-06, "loss": 0.6334, "step": 28280, "vit_lr": 3.3670911245738377e-07 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 1.6773901629132118e-06, "loss": 0.6315, "step": 28300, "vit_lr": 3.354780325826423e-07 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 1.6712437681086097e-06, "loss": 0.6373, "step": 28320, "vit_lr": 3.342487536217219e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6651063945303892e-06, "loss": 0.637, "step": 28340, "vit_lr": 3.330212789060778e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6589780588113813e-06, "loss": 0.6469, "step": 28360, "vit_lr": 3.3179561176227624e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6528587775599237e-06, "loss": 0.6285, "step": 28380, "vit_lr": 3.3057175551198466e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6467485673598138e-06, "loss": 0.6448, "step": 28400, "vit_lr": 3.293497134719627e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6406474447702642e-06, "loss": 0.6438, "step": 28420, "vit_lr": 3.281294889540528e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6345554263258634e-06, "loss": 0.6354, "step": 28440, "vit_lr": 3.2691108526517264e-07 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 1.6284725285365266e-06, "loss": 0.6426, "step": 28460, "vit_lr": 3.2569450570730526e-07 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 1.622398767887446e-06, "loss": 0.6359, "step": 28480, "vit_lr": 3.244797535774891e-07 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 1.616334160839056e-06, "loss": 0.6332, "step": 28500, "vit_lr": 3.2326683216781115e-07 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 1.6102787238269857e-06, "loss": 0.6388, "step": 28520, "vit_lr": 3.220557447653971e-07 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 1.6042324732620068e-06, "loss": 0.6314, "step": 28540, "vit_lr": 3.208464946524013e-07 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 1.5981954255299974e-06, "loss": 0.6526, "step": 28560, "vit_lr": 3.196390851059995e-07 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 1.5921675969918976e-06, "loss": 0.6527, "step": 28580, "vit_lr": 3.184335193983795e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.586149003983658e-06, "loss": 0.6242, "step": 28600, "vit_lr": 3.172298007967316e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.5801396628162035e-06, "loss": 0.6492, "step": 28620, "vit_lr": 3.1602793256324067e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.5741395897753875e-06, "loss": 0.6455, "step": 28640, "vit_lr": 3.1482791795507743e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.568148801121941e-06, "loss": 0.626, "step": 28660, "vit_lr": 3.1362976022438815e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.5621673130914338e-06, "loss": 0.6304, "step": 28680, "vit_lr": 3.1243346261828673e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.5561951418942346e-06, "loss": 0.6437, "step": 28700, "vit_lr": 3.1123902837884686e-07 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 1.550232303715461e-06, "loss": 0.6399, "step": 28720, "vit_lr": 3.1004646074309213e-07 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 1.544278814714934e-06, "loss": 0.6208, "step": 28740, "vit_lr": 3.0885576294298675e-07 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 1.5383346910271407e-06, "loss": 0.6506, "step": 28760, "vit_lr": 3.076669382054281e-07 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 1.5323999487611895e-06, "loss": 0.6382, "step": 28780, "vit_lr": 3.064799897522379e-07 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 1.5264746040007588e-06, "loss": 0.6589, "step": 28800, "vit_lr": 3.0529492080015175e-07 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 1.52055867280406e-06, "loss": 0.6229, "step": 28820, "vit_lr": 3.0411173456081197e-07 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 1.5146521712037965e-06, "loss": 0.6319, "step": 28840, "vit_lr": 3.029304342407593e-07 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 1.508755115207116e-06, "loss": 0.6249, "step": 28860, "vit_lr": 3.017510230414232e-07 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 1.5028675207955634e-06, "loss": 0.6296, "step": 28880, "vit_lr": 3.0057350415911265e-07 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 1.496989403925046e-06, "loss": 0.6383, "step": 28900, "vit_lr": 2.9939788078500915e-07 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 1.4911207805257883e-06, "loss": 0.6364, "step": 28920, "vit_lr": 2.982241561051576e-07 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 1.4852616665022773e-06, "loss": 0.6367, "step": 28940, "vit_lr": 2.970523333004554e-07 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 1.4794120777332381e-06, "loss": 0.6359, "step": 28960, "vit_lr": 2.958824155466476e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4735720300715789e-06, "loss": 0.6492, "step": 28980, "vit_lr": 2.947144060143157e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4677415393443472e-06, "loss": 0.6527, "step": 29000, "vit_lr": 2.9354830786886943e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4619206213526949e-06, "loss": 0.6256, "step": 29020, "vit_lr": 2.9238412427053894e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4561092918718305e-06, "loss": 0.6385, "step": 29040, "vit_lr": 2.9122185837436606e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4503075666509737e-06, "loss": 0.6328, "step": 29060, "vit_lr": 2.900615133301947e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4445154614133156e-06, "loss": 0.6558, "step": 29080, "vit_lr": 2.889030922826631e-07 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 1.4387329918559806e-06, "loss": 0.6279, "step": 29100, "vit_lr": 2.8774659837119606e-07 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 1.4329601736499783e-06, "loss": 0.6526, "step": 29120, "vit_lr": 2.865920347299956e-07 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 1.4271970224401578e-06, "loss": 0.6404, "step": 29140, "vit_lr": 2.854394044880315e-07 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 1.4214435538451754e-06, "loss": 0.643, "step": 29160, "vit_lr": 2.84288710769035e-07 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 1.4156997834574431e-06, "loss": 0.649, "step": 29180, "vit_lr": 2.831399566914886e-07 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 1.4099657268430928e-06, "loss": 0.6448, "step": 29200, "vit_lr": 2.819931453686185e-07 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 1.4042413995419268e-06, "loss": 0.6444, "step": 29220, "vit_lr": 2.808482799083853e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.3985268170673844e-06, "loss": 0.6373, "step": 29240, "vit_lr": 2.797053634134768e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.3928219949064953e-06, "loss": 0.628, "step": 29260, "vit_lr": 2.7856439898129904e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.3871269485198357e-06, "loss": 0.6333, "step": 29280, "vit_lr": 2.7742538970396714e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.381441693341487e-06, "loss": 0.6244, "step": 29300, "vit_lr": 2.7628833866829735e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.3757662447789999e-06, "loss": 0.6412, "step": 29320, "vit_lr": 2.751532489557999e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.3701006182133474e-06, "loss": 0.6508, "step": 29340, "vit_lr": 2.7402012364266945e-07 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 1.3644448289988816e-06, "loss": 0.6367, "step": 29360, "vit_lr": 2.7288896579977625e-07 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 1.358798892463295e-06, "loss": 0.6429, "step": 29380, "vit_lr": 2.7175977849265896e-07 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 1.3531628239075828e-06, "loss": 0.6431, "step": 29400, "vit_lr": 2.706325647815165e-07 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 1.3475366386059912e-06, "loss": 0.6524, "step": 29420, "vit_lr": 2.6950732772119824e-07 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 1.341920351805983e-06, "loss": 0.6382, "step": 29440, "vit_lr": 2.6838407036119657e-07 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 1.3363139787281998e-06, "loss": 0.6355, "step": 29460, "vit_lr": 2.6726279574563993e-07 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 1.3307175345664098e-06, "loss": 0.6383, "step": 29480, "vit_lr": 2.661435069132819e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.3251310344874767e-06, "loss": 0.6376, "step": 29500, "vit_lr": 2.6502620689749535e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.3195544936313166e-06, "loss": 0.6289, "step": 29520, "vit_lr": 2.6391089872626327e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.3139879271108508e-06, "loss": 0.6451, "step": 29540, "vit_lr": 2.6279758542217014e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.30843135001197e-06, "loss": 0.6412, "step": 29560, "vit_lr": 2.6168627000239396e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.3028847773934949e-06, "loss": 0.6405, "step": 29580, "vit_lr": 2.6057695547869896e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.2973482242871343e-06, "loss": 0.6258, "step": 29600, "vit_lr": 2.5946964485742685e-07 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 1.291821705697438e-06, "loss": 0.6551, "step": 29620, "vit_lr": 2.5836434113948755e-07 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 1.2863052366017653e-06, "loss": 0.6319, "step": 29640, "vit_lr": 2.5726104732035305e-07 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 1.2807988319502418e-06, "loss": 0.6401, "step": 29660, "vit_lr": 2.5615976639004833e-07 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 1.2753025066657138e-06, "loss": 0.6353, "step": 29680, "vit_lr": 2.5506050133314274e-07 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 1.2698162756437115e-06, "loss": 0.6389, "step": 29700, "vit_lr": 2.5396325512874227e-07 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 1.264340153752412e-06, "loss": 0.6423, "step": 29720, "vit_lr": 2.528680307504824e-07 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 1.2588741558325956e-06, "loss": 0.6508, "step": 29740, "vit_lr": 2.517748311665191e-07 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 1.2534182966976015e-06, "loss": 0.6398, "step": 29760, "vit_lr": 2.506836593395203e-07 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 1.2479725911332969e-06, "loss": 0.6331, "step": 29780, "vit_lr": 2.495945182266593e-07 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 1.2425370538980313e-06, "loss": 0.6471, "step": 29800, "vit_lr": 2.4850741077960623e-07 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 1.2371116997225906e-06, "loss": 0.6528, "step": 29820, "vit_lr": 2.474223399445181e-07 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 1.2316965433101719e-06, "loss": 0.6337, "step": 29840, "vit_lr": 2.4633930866203433e-07 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 1.2262915993363328e-06, "loss": 0.65, "step": 29860, "vit_lr": 2.452583198672665e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.220896882448951e-06, "loss": 0.6252, "step": 29880, "vit_lr": 2.4417937648979015e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.2155124072681923e-06, "loss": 0.6447, "step": 29900, "vit_lr": 2.4310248145363845e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.2101381883864665e-06, "loss": 0.6327, "step": 29920, "vit_lr": 2.4202763767729327e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.2047742403683844e-06, "loss": 0.643, "step": 29940, "vit_lr": 2.4095484807367684e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.1994205777507222e-06, "loss": 0.6431, "step": 29960, "vit_lr": 2.398841155501444e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.194077215042385e-06, "loss": 0.6394, "step": 29980, "vit_lr": 2.38815443008477e-07 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 1.1887441667243642e-06, "loss": 0.6455, "step": 30000, "vit_lr": 2.3774883334487282e-07 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 1.183421447249693e-06, "loss": 0.6321, "step": 30020, "vit_lr": 2.3668428944993857e-07 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 1.1781090710434195e-06, "loss": 0.6271, "step": 30040, "vit_lr": 2.3562181420868388e-07 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 1.1728070525025537e-06, "loss": 0.6444, "step": 30060, "vit_lr": 2.345614105005107e-07 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 1.167515405996042e-06, "loss": 0.6329, "step": 30080, "vit_lr": 2.3350308119920836e-07 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 1.1622341458647162e-06, "loss": 0.6463, "step": 30100, "vit_lr": 2.3244682917294322e-07 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 1.1569632864212633e-06, "loss": 0.6329, "step": 30120, "vit_lr": 2.3139265728425262e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.1517028419501846e-06, "loss": 0.6422, "step": 30140, "vit_lr": 2.303405683900369e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.1464528267077519e-06, "loss": 0.6354, "step": 30160, "vit_lr": 2.2929056534155033e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.141213254921974e-06, "loss": 0.6339, "step": 30180, "vit_lr": 2.2824265098439476e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.1359841407925603e-06, "loss": 0.6344, "step": 30200, "vit_lr": 2.2719682815851206e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.1307654984908745e-06, "loss": 0.6342, "step": 30220, "vit_lr": 2.2615309969817486e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.1255573421599036e-06, "loss": 0.6287, "step": 30240, "vit_lr": 2.251114684319807e-07 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 1.120359685914218e-06, "loss": 0.647, "step": 30260, "vit_lr": 2.2407193718284356e-07 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 1.1151725438399263e-06, "loss": 0.6501, "step": 30280, "vit_lr": 2.2303450876798524e-07 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 1.1099959299946495e-06, "loss": 0.6429, "step": 30300, "vit_lr": 2.2199918599892987e-07 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 1.1048298584074695e-06, "loss": 0.633, "step": 30320, "vit_lr": 2.2096597168149388e-07 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 1.0996743430789058e-06, "loss": 0.6387, "step": 30340, "vit_lr": 2.1993486861578114e-07 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 1.0945293979808602e-06, "loss": 0.6298, "step": 30360, "vit_lr": 2.1890587959617202e-07 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 1.0893950370565954e-06, "loss": 0.6312, "step": 30380, "vit_lr": 2.1787900741131903e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.084271274220689e-06, "loss": 0.6409, "step": 30400, "vit_lr": 2.1685425484413777e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.0791581233589931e-06, "loss": 0.6404, "step": 30420, "vit_lr": 2.158316246717986e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.0740555983286027e-06, "loss": 0.6264, "step": 30440, "vit_lr": 2.1481111966572053e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.068963712957816e-06, "loss": 0.6267, "step": 30460, "vit_lr": 2.137927425915632e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.0638824810460995e-06, "loss": 0.6344, "step": 30480, "vit_lr": 2.127764962092199e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.058811916364041e-06, "loss": 0.6384, "step": 30500, "vit_lr": 2.1176238327280816e-07 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 1.0537520326533246e-06, "loss": 0.6235, "step": 30520, "vit_lr": 2.107504065306649e-07 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 1.0487028436266888e-06, "loss": 0.6266, "step": 30540, "vit_lr": 2.0974056872533773e-07 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 1.043664362967884e-06, "loss": 0.6436, "step": 30560, "vit_lr": 2.0873287259357674e-07 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 1.038636604331642e-06, "loss": 0.6454, "step": 30580, "vit_lr": 2.0772732086632837e-07 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 1.0336195813436378e-06, "loss": 0.6423, "step": 30600, "vit_lr": 2.0672391626872753e-07 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 1.0286133076004535e-06, "loss": 0.632, "step": 30620, "vit_lr": 2.0572266152009065e-07 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 1.0236177966695338e-06, "loss": 0.628, "step": 30640, "vit_lr": 2.0472355933390673e-07 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 1.018633062089161e-06, "loss": 0.6367, "step": 30660, "vit_lr": 2.0372661241783217e-07 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 1.0136591173684136e-06, "loss": 0.6472, "step": 30680, "vit_lr": 2.0273182347368267e-07 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 1.0086959759871196e-06, "loss": 0.6504, "step": 30700, "vit_lr": 2.017391951974239e-07 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 1.0037436513958383e-06, "loss": 0.6365, "step": 30720, "vit_lr": 2.007487302791676e-07 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 9.988021570158113e-07, "loss": 0.6424, "step": 30740, "vit_lr": 1.9976043140316223e-07 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 9.938715062389282e-07, "loss": 0.6537, "step": 30760, "vit_lr": 1.987743012477856e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.889517124276927e-07, "loss": 0.6558, "step": 30780, "vit_lr": 1.977903424855385e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.840427889151865e-07, "loss": 0.6327, "step": 30800, "vit_lr": 1.9680855778303729e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.791447490050294e-07, "loss": 0.6219, "step": 30820, "vit_lr": 1.9582894980100584e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.742576059713443e-07, "loss": 0.646, "step": 30840, "vit_lr": 1.9485152119426885e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.693813730587276e-07, "loss": 0.6316, "step": 30860, "vit_lr": 1.9387627461174548e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.645160634822055e-07, "loss": 0.642, "step": 30880, "vit_lr": 1.9290321269644104e-07 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 9.596616904271994e-07, "loss": 0.6508, "step": 30900, "vit_lr": 1.9193233808543985e-07 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 9.548182670494953e-07, "loss": 0.6385, "step": 30920, "vit_lr": 1.9096365340989907e-07 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 9.49985806475201e-07, "loss": 0.6208, "step": 30940, "vit_lr": 1.8999716129504016e-07 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 9.451643218007172e-07, "loss": 0.6266, "step": 30960, "vit_lr": 1.8903286436014343e-07 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 9.403538260926958e-07, "loss": 0.6439, "step": 30980, "vit_lr": 1.8807076521853914e-07 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 9.355543323880107e-07, "loss": 0.6527, "step": 31000, "vit_lr": 1.871108664776021e-07 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 9.307658536937187e-07, "loss": 0.6355, "step": 31020, "vit_lr": 1.861531707387437e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 9.259884029870236e-07, "loss": 0.6384, "step": 31040, "vit_lr": 1.851976805974047e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 9.212219932152417e-07, "loss": 0.6373, "step": 31060, "vit_lr": 1.842443986430483e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 9.164666372957709e-07, "loss": 0.6414, "step": 31080, "vit_lr": 1.8329332745915416e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 9.117223481160475e-07, "loss": 0.6311, "step": 31100, "vit_lr": 1.8234446962320948e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 9.0698913853352e-07, "loss": 0.6443, "step": 31120, "vit_lr": 1.8139782770670397e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 9.022670213756101e-07, "loss": 0.6451, "step": 31140, "vit_lr": 1.80453404275122e-07 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 8.97556009439674e-07, "loss": 0.6381, "step": 31160, "vit_lr": 1.7951120188793478e-07 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 8.928561154929766e-07, "loss": 0.6345, "step": 31180, "vit_lr": 1.785712230985953e-07 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 8.881673522726475e-07, "loss": 0.6469, "step": 31200, "vit_lr": 1.7763347045452947e-07 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 8.834897324856556e-07, "loss": 0.6263, "step": 31220, "vit_lr": 1.766979464971311e-07 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 8.788232688087661e-07, "loss": 0.6418, "step": 31240, "vit_lr": 1.757646537617532e-07 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 8.741679738885123e-07, "loss": 0.6224, "step": 31260, "vit_lr": 1.7483359477770243e-07 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 8.695238603411605e-07, "loss": 0.6249, "step": 31280, "vit_lr": 1.7390477206823207e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.648909407526712e-07, "loss": 0.6237, "step": 31300, "vit_lr": 1.7297818815053421e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.602692276786695e-07, "loss": 0.636, "step": 31320, "vit_lr": 1.7205384553573387e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.5565873364441e-07, "loss": 0.6444, "step": 31340, "vit_lr": 1.7113174672888197e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.51059471144745e-07, "loss": 0.6449, "step": 31360, "vit_lr": 1.7021189422894898e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.464714526440837e-07, "loss": 0.6376, "step": 31380, "vit_lr": 1.6929429052881672e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.41894690576367e-07, "loss": 0.6207, "step": 31400, "vit_lr": 1.6837893811527336e-07 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 8.373291973450303e-07, "loss": 0.6364, "step": 31420, "vit_lr": 1.6746583946900606e-07 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 8.327749853229633e-07, "loss": 0.6401, "step": 31440, "vit_lr": 1.6655499706459263e-07 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 8.28232066852489e-07, "loss": 0.6305, "step": 31460, "vit_lr": 1.6564641337049779e-07 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 8.237004542453226e-07, "loss": 0.6547, "step": 31480, "vit_lr": 1.647400908490645e-07 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 8.191801597825372e-07, "loss": 0.6487, "step": 31500, "vit_lr": 1.6383603195650742e-07 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 8.146711957145343e-07, "loss": 0.6338, "step": 31520, "vit_lr": 1.6293423914290683e-07 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 8.101735742610106e-07, "loss": 0.6348, "step": 31540, "vit_lr": 1.620347148522021e-07 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 8.056873076109206e-07, "loss": 0.642, "step": 31560, "vit_lr": 1.611374615221841e-07 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 8.012124079224448e-07, "loss": 0.6391, "step": 31580, "vit_lr": 1.6024248158448896e-07 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 7.967488873229629e-07, "loss": 0.6321, "step": 31600, "vit_lr": 1.5934977746459256e-07 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 7.922967579090151e-07, "loss": 0.6478, "step": 31620, "vit_lr": 1.58459351581803e-07 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 7.878560317462664e-07, "loss": 0.6337, "step": 31640, "vit_lr": 1.5757120634925324e-07 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 7.834267208694823e-07, "loss": 0.6286, "step": 31660, "vit_lr": 1.5668534417389645e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.790088372824922e-07, "loss": 0.6433, "step": 31680, "vit_lr": 1.558017674564984e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.746023929581525e-07, "loss": 0.6339, "step": 31700, "vit_lr": 1.549204785916305e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.702073998383203e-07, "loss": 0.649, "step": 31720, "vit_lr": 1.5404147996766403e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.6582386983382e-07, "loss": 0.6287, "step": 31740, "vit_lr": 1.5316477396676397e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.614518148244094e-07, "loss": 0.6453, "step": 31760, "vit_lr": 1.5229036296488184e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.570912466587466e-07, "loss": 0.626, "step": 31780, "vit_lr": 1.5141824933174928e-07 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 7.527421771543619e-07, "loss": 0.629, "step": 31800, "vit_lr": 1.5054843543087237e-07 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 7.484046180976207e-07, "loss": 0.6552, "step": 31820, "vit_lr": 1.4968092361952412e-07 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 7.440785812436935e-07, "loss": 0.6333, "step": 31840, "vit_lr": 1.4881571624873867e-07 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 7.397640783165283e-07, "loss": 0.638, "step": 31860, "vit_lr": 1.4795281566330564e-07 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 7.35461121008812e-07, "loss": 0.625, "step": 31880, "vit_lr": 1.4709222420176237e-07 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 7.311697209819441e-07, "loss": 0.6421, "step": 31900, "vit_lr": 1.462339441963888e-07 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 7.268898898660009e-07, "loss": 0.6297, "step": 31920, "vit_lr": 1.4537797797320016e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 7.226216392597057e-07, "loss": 0.6401, "step": 31940, "vit_lr": 1.4452432785194113e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 7.183649807303994e-07, "loss": 0.6313, "step": 31960, "vit_lr": 1.4367299614607986e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 7.141199258140053e-07, "loss": 0.636, "step": 31980, "vit_lr": 1.4282398516280104e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 7.098864860150018e-07, "loss": 0.6436, "step": 32000, "vit_lr": 1.4197729720300033e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 7.056646728063893e-07, "loss": 0.6454, "step": 32020, "vit_lr": 1.4113293456127783e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 7.014544976296566e-07, "loss": 0.6298, "step": 32040, "vit_lr": 1.402908995259313e-07 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 6.972559718947558e-07, "loss": 0.6307, "step": 32060, "vit_lr": 1.3945119437895114e-07 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 6.930691069800644e-07, "loss": 0.6422, "step": 32080, "vit_lr": 1.3861382139601285e-07 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 6.888939142323614e-07, "loss": 0.6318, "step": 32100, "vit_lr": 1.3777878284647227e-07 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 6.847304049667902e-07, "loss": 0.6412, "step": 32120, "vit_lr": 1.3694608099335803e-07 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 6.805785904668327e-07, "loss": 0.634, "step": 32140, "vit_lr": 1.3611571809336652e-07 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 6.76438481984279e-07, "loss": 0.6312, "step": 32160, "vit_lr": 1.352876963968558e-07 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 6.7231009073919e-07, "loss": 0.638, "step": 32180, "vit_lr": 1.3446201814783797e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.681934279198738e-07, "loss": 0.6328, "step": 32200, "vit_lr": 1.3363868558397473e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.640885046828554e-07, "loss": 0.6306, "step": 32220, "vit_lr": 1.3281770093657107e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.599953321528435e-07, "loss": 0.6328, "step": 32240, "vit_lr": 1.3199906643056868e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.559139214226989e-07, "loss": 0.631, "step": 32260, "vit_lr": 1.3118278428453978e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.5184428355341e-07, "loss": 0.6372, "step": 32280, "vit_lr": 1.3036885671068198e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.47786429574061e-07, "loss": 0.6333, "step": 32300, "vit_lr": 1.2955728591481218e-07 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 6.43740370481793e-07, "loss": 0.6516, "step": 32320, "vit_lr": 1.2874807409635856e-07 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 6.397061172417895e-07, "loss": 0.6357, "step": 32340, "vit_lr": 1.279412234483579e-07 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 6.356836807872391e-07, "loss": 0.6247, "step": 32360, "vit_lr": 1.271367361574478e-07 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 6.316730720192993e-07, "loss": 0.6304, "step": 32380, "vit_lr": 1.2633461440385983e-07 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 6.276743018070802e-07, "loss": 0.6328, "step": 32400, "vit_lr": 1.2553486036141603e-07 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 6.236873809876071e-07, "loss": 0.6367, "step": 32420, "vit_lr": 1.247374761975214e-07 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 6.19712320365789e-07, "loss": 0.6334, "step": 32440, "vit_lr": 1.239424640731578e-07 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 6.157491307143936e-07, "loss": 0.6275, "step": 32460, "vit_lr": 1.2314982614287873e-07 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 6.117978227740212e-07, "loss": 0.632, "step": 32480, "vit_lr": 1.2235956455480422e-07 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 6.078584072530686e-07, "loss": 0.6359, "step": 32500, "vit_lr": 1.215716814506137e-07 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 6.039308948277012e-07, "loss": 0.6413, "step": 32520, "vit_lr": 1.2078617896554023e-07 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 6.000152961418298e-07, "loss": 0.6387, "step": 32540, "vit_lr": 1.2000305922836595e-07 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 5.961116218070767e-07, "loss": 0.6494, "step": 32560, "vit_lr": 1.1922232436141532e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.92219882402747e-07, "loss": 0.6297, "step": 32580, "vit_lr": 1.1844397648054938e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.883400884758e-07, "loss": 0.6418, "step": 32600, "vit_lr": 1.1766801769516e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.844722505408251e-07, "loss": 0.634, "step": 32620, "vit_lr": 1.1689445010816501e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.806163790800096e-07, "loss": 0.6358, "step": 32640, "vit_lr": 1.161232758160019e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.76772484543106e-07, "loss": 0.6409, "step": 32660, "vit_lr": 1.153544969086212e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.729405773474139e-07, "loss": 0.6356, "step": 32680, "vit_lr": 1.1458811546948277e-07 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 5.691206678777434e-07, "loss": 0.6502, "step": 32700, "vit_lr": 1.1382413357554865e-07 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 5.653127664863878e-07, "loss": 0.6394, "step": 32720, "vit_lr": 1.1306255329727755e-07 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 5.615168834931017e-07, "loss": 0.6428, "step": 32740, "vit_lr": 1.1230337669862033e-07 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 5.577330291850669e-07, "loss": 0.6398, "step": 32760, "vit_lr": 1.1154660583701336e-07 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 5.539612138168643e-07, "loss": 0.6438, "step": 32780, "vit_lr": 1.1079224276337285e-07 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 5.502014476104528e-07, "loss": 0.641, "step": 32800, "vit_lr": 1.1004028952209054e-07 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 5.464537407551312e-07, "loss": 0.6335, "step": 32820, "vit_lr": 1.0929074815102623e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.427181034075219e-07, "loss": 0.6431, "step": 32840, "vit_lr": 1.0854362068150436e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.389945456915335e-07, "loss": 0.6369, "step": 32860, "vit_lr": 1.0779890913830669e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.352830776983409e-07, "loss": 0.6485, "step": 32880, "vit_lr": 1.0705661553966817e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.31583709486354e-07, "loss": 0.6225, "step": 32900, "vit_lr": 1.063167418972708e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.278964510811891e-07, "loss": 0.6383, "step": 32920, "vit_lr": 1.055792902162378e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.242213124756473e-07, "loss": 0.6404, "step": 32940, "vit_lr": 1.0484426249512945e-07 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 5.205583036296796e-07, "loss": 0.6564, "step": 32960, "vit_lr": 1.0411166072593591e-07 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 5.169074344703695e-07, "loss": 0.6409, "step": 32980, "vit_lr": 1.0338148689407388e-07 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 5.132687148918963e-07, "loss": 0.6308, "step": 33000, "vit_lr": 1.0265374297837925e-07 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 5.096421547555153e-07, "loss": 0.6284, "step": 33020, "vit_lr": 1.0192843095110304e-07 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 5.060277638895283e-07, "loss": 0.6489, "step": 33040, "vit_lr": 1.0120555277790565e-07 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 5.024255520892574e-07, "loss": 0.6383, "step": 33060, "vit_lr": 1.0048511041785146e-07 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 4.988355291170166e-07, "loss": 0.632, "step": 33080, "vit_lr": 9.97671058234033e-08 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 4.952577047020896e-07, "loss": 0.6318, "step": 33100, "vit_lr": 9.90515409404179e-08 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 4.916920885406989e-07, "loss": 0.632, "step": 33120, "vit_lr": 9.833841770813977e-08 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 4.881386902959828e-07, "loss": 0.6352, "step": 33140, "vit_lr": 9.762773805919655e-08 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 4.845975195979685e-07, "loss": 0.6198, "step": 33160, "vit_lr": 9.691950391959369e-08 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 4.810685860435444e-07, "loss": 0.6246, "step": 33180, "vit_lr": 9.621371720870886e-08 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 4.775518991964329e-07, "loss": 0.6356, "step": 33200, "vit_lr": 9.551037983928656e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.74047468587171e-07, "loss": 0.6572, "step": 33220, "vit_lr": 9.480949371743418e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.7055530371307946e-07, "loss": 0.6416, "step": 33240, "vit_lr": 9.411106074261588e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.6707541403823444e-07, "loss": 0.6491, "step": 33260, "vit_lr": 9.341508280764687e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.6360780899344815e-07, "loss": 0.6451, "step": 33280, "vit_lr": 9.272156179868962e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.6015249797624063e-07, "loss": 0.6305, "step": 33300, "vit_lr": 9.203049959524811e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.567094903508124e-07, "loss": 0.6289, "step": 33320, "vit_lr": 9.134189807016246e-08 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 4.5327879544802e-07, "loss": 0.646, "step": 33340, "vit_lr": 9.065575908960399e-08 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 4.4986042256535466e-07, "loss": 0.6291, "step": 33360, "vit_lr": 8.997208451307092e-08 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 4.464543809669114e-07, "loss": 0.6418, "step": 33380, "vit_lr": 8.929087619338227e-08 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 4.4306067988336587e-07, "loss": 0.6391, "step": 33400, "vit_lr": 8.861213597667317e-08 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 4.3967932851195103e-07, "loss": 0.6284, "step": 33420, "vit_lr": 8.79358657023902e-08 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 4.363103360164328e-07, "loss": 0.6198, "step": 33440, "vit_lr": 8.726206720328655e-08 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 4.329537115270793e-07, "loss": 0.6542, "step": 33460, "vit_lr": 8.659074230541585e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.296094641406423e-07, "loss": 0.6189, "step": 33480, "vit_lr": 8.592189282812845e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.2627760292033085e-07, "loss": 0.6249, "step": 33500, "vit_lr": 8.525552058406615e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.2295813689578804e-07, "loss": 0.6466, "step": 33520, "vit_lr": 8.459162737915759e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.1965107506306e-07, "loss": 0.6265, "step": 33540, "vit_lr": 8.3930215012612e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.163564263845815e-07, "loss": 0.6337, "step": 33560, "vit_lr": 8.327128527691629e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.130741997891441e-07, "loss": 0.6386, "step": 33580, "vit_lr": 8.261483995782881e-08 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 4.098044041718735e-07, "loss": 0.6281, "step": 33600, "vit_lr": 8.196088083437469e-08 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 4.06547048394208e-07, "loss": 0.6407, "step": 33620, "vit_lr": 8.130940967884159e-08 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 4.033021412838739e-07, "loss": 0.6345, "step": 33640, "vit_lr": 8.066042825677477e-08 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 4.0006969163485675e-07, "loss": 0.6334, "step": 33660, "vit_lr": 8.001393832697135e-08 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 3.9684970820738576e-07, "loss": 0.6505, "step": 33680, "vit_lr": 7.936994164147715e-08 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 3.936421997279005e-07, "loss": 0.6271, "step": 33700, "vit_lr": 7.872843994558009e-08 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 3.904471748890382e-07, "loss": 0.6283, "step": 33720, "vit_lr": 7.808943497780763e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.87264642349599e-07, "loss": 0.6316, "step": 33740, "vit_lr": 7.74529284699198e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.840946107345306e-07, "loss": 0.6294, "step": 33760, "vit_lr": 7.68189221469061e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.8093708863490286e-07, "loss": 0.6232, "step": 33780, "vit_lr": 7.618741772698057e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.7779208460787975e-07, "loss": 0.6225, "step": 33800, "vit_lr": 7.555841692157594e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.746596071767056e-07, "loss": 0.6428, "step": 33820, "vit_lr": 7.493192143534111e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.715396648306713e-07, "loss": 0.6284, "step": 33840, "vit_lr": 7.430793296613425e-08 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 3.6843226602510107e-07, "loss": 0.6377, "step": 33860, "vit_lr": 7.36864532050202e-08 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 3.6533741918132135e-07, "loss": 0.6471, "step": 33880, "vit_lr": 7.306748383626427e-08 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 3.622551326866436e-07, "loss": 0.6412, "step": 33900, "vit_lr": 7.245102653732871e-08 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 3.5918541489434023e-07, "loss": 0.6514, "step": 33920, "vit_lr": 7.183708297886803e-08 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 3.5612827412361936e-07, "loss": 0.6373, "step": 33940, "vit_lr": 7.122565482472387e-08 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 3.530837186596048e-07, "loss": 0.6384, "step": 33960, "vit_lr": 7.061674373192094e-08 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 3.500517567533146e-07, "loss": 0.6386, "step": 33980, "vit_lr": 7.001035135066291e-08 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 3.470323966216349e-07, "loss": 0.6379, "step": 34000, "vit_lr": 6.940647932432697e-08 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 3.4402564644730187e-07, "loss": 0.6365, "step": 34020, "vit_lr": 6.880512928946036e-08 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 3.4103151437887727e-07, "loss": 0.6309, "step": 34040, "vit_lr": 6.820630287577544e-08 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 3.3805000853072534e-07, "loss": 0.6387, "step": 34060, "vit_lr": 6.761000170614506e-08 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 3.3508113698299206e-07, "loss": 0.6406, "step": 34080, "vit_lr": 6.70162273965984e-08 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 3.3212490778158525e-07, "loss": 0.6399, "step": 34100, "vit_lr": 6.642498155631704e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.291813289381507e-07, "loss": 0.6445, "step": 34120, "vit_lr": 6.583626578763013e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.262504084300483e-07, "loss": 0.6428, "step": 34140, "vit_lr": 6.525008168600965e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.233321542003337e-07, "loss": 0.6463, "step": 34160, "vit_lr": 6.466643084006673e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.2042657415773837e-07, "loss": 0.6473, "step": 34180, "vit_lr": 6.408531483154767e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.175336761766423e-07, "loss": 0.6352, "step": 34200, "vit_lr": 6.350673523532846e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.1465346809705643e-07, "loss": 0.6228, "step": 34220, "vit_lr": 6.293069361941127e-08 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 3.1178595772460187e-07, "loss": 0.6352, "step": 34240, "vit_lr": 6.235719154492037e-08 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 3.089311528304884e-07, "loss": 0.6026, "step": 34260, "vit_lr": 6.178623056609766e-08 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 3.0608906115149104e-07, "loss": 0.6389, "step": 34280, "vit_lr": 6.12178122302982e-08 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 3.0325969038993264e-07, "loss": 0.6438, "step": 34300, "vit_lr": 6.065193807798651e-08 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 3.004430482136611e-07, "loss": 0.6288, "step": 34320, "vit_lr": 6.008860964273221e-08 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 2.976391422560254e-07, "loss": 0.6181, "step": 34340, "vit_lr": 5.9527828451205074e-08 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 2.9484798011586267e-07, "loss": 0.6278, "step": 34360, "vit_lr": 5.896959602317253e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.9206956935747153e-07, "loss": 0.6399, "step": 34380, "vit_lr": 5.8413913871494304e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.8930391751059215e-07, "loss": 0.6324, "step": 34400, "vit_lr": 5.786078350211843e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.8655103207038846e-07, "loss": 0.6433, "step": 34420, "vit_lr": 5.731020641407769e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.8381092049742545e-07, "loss": 0.6481, "step": 34440, "vit_lr": 5.676218409948508e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.8108359021765065e-07, "loss": 0.6267, "step": 34460, "vit_lr": 5.621671804353012e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.7836904862237e-07, "loss": 0.6477, "step": 34480, "vit_lr": 5.5673809724474e-08 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 2.7566730306823606e-07, "loss": 0.6295, "step": 34500, "vit_lr": 5.513346061364721e-08 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 2.7297836087721965e-07, "loss": 0.635, "step": 34520, "vit_lr": 5.459567217544392e-08 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 2.703022293365931e-07, "loss": 0.6297, "step": 34540, "vit_lr": 5.4060445867318614e-08 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 2.676389156989129e-07, "loss": 0.6278, "step": 34560, "vit_lr": 5.3527783139782567e-08 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 2.6498842718199525e-07, "loss": 0.6399, "step": 34580, "vit_lr": 5.299768543639904e-08 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 2.6235077096890215e-07, "loss": 0.6402, "step": 34600, "vit_lr": 5.247015419378043e-08 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 2.5972595420791556e-07, "loss": 0.6335, "step": 34620, "vit_lr": 5.194519084158311e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.571139840125242e-07, "loss": 0.6372, "step": 34640, "vit_lr": 5.142279680250483e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.5451486746140065e-07, "loss": 0.6413, "step": 34660, "vit_lr": 5.090297349228012e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.5192861159838155e-07, "loss": 0.6368, "step": 34680, "vit_lr": 5.0385722319676305e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.49355223432452e-07, "loss": 0.6401, "step": 34700, "vit_lr": 4.9871044686490397e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.4679470993772327e-07, "loss": 0.6379, "step": 34720, "vit_lr": 4.9358941987544645e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.442470780534145e-07, "loss": 0.6253, "step": 34740, "vit_lr": 4.8849415610682895e-08 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 2.417123346838363e-07, "loss": 0.6497, "step": 34760, "vit_lr": 4.834246693676725e-08 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 2.3919048669836853e-07, "loss": 0.6391, "step": 34780, "vit_lr": 4.78380973396737e-08 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 2.3668154093144603e-07, "loss": 0.6436, "step": 34800, "vit_lr": 4.73363081862892e-08 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 2.3418550418253473e-07, "loss": 0.6189, "step": 34820, "vit_lr": 4.683710083650694e-08 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 2.3170238321611583e-07, "loss": 0.6268, "step": 34840, "vit_lr": 4.634047664322316e-08 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 2.292321847616702e-07, "loss": 0.629, "step": 34860, "vit_lr": 4.584643695233403e-08 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 2.267749155136545e-07, "loss": 0.6183, "step": 34880, "vit_lr": 4.535498310273089e-08 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 2.2433058213148894e-07, "loss": 0.6284, "step": 34900, "vit_lr": 4.486611642629778e-08 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 2.2189919123953517e-07, "loss": 0.6321, "step": 34920, "vit_lr": 4.437983824790703e-08 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 2.1948074942707898e-07, "loss": 0.6245, "step": 34940, "vit_lr": 4.389614988541579e-08 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 2.1707526324831306e-07, "loss": 0.6514, "step": 34960, "vit_lr": 4.341505264966261e-08 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 2.146827392223194e-07, "loss": 0.6325, "step": 34980, "vit_lr": 4.293654784446388e-08 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 2.1230318383305303e-07, "loss": 0.6224, "step": 35000, "vit_lr": 4.2460636766610605e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 2.099366035293199e-07, "loss": 0.6399, "step": 35020, "vit_lr": 4.198732070586397e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 2.07583004724764e-07, "loss": 0.6306, "step": 35040, "vit_lr": 4.1516600944952794e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 2.0524239379784927e-07, "loss": 0.6376, "step": 35060, "vit_lr": 4.104847875956985e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 2.029147770918394e-07, "loss": 0.6196, "step": 35080, "vit_lr": 4.058295541836787e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 2.006001609147823e-07, "loss": 0.6356, "step": 35100, "vit_lr": 4.012003218295645e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 1.9829855153949528e-07, "loss": 0.6301, "step": 35120, "vit_lr": 3.965971030789905e-08 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 1.9600995520354606e-07, "loss": 0.65, "step": 35140, "vit_lr": 3.92019910407092e-08 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 1.9373437810923268e-07, "loss": 0.6344, "step": 35160, "vit_lr": 3.874687562184653e-08 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 1.9147182642357377e-07, "loss": 0.6381, "step": 35180, "vit_lr": 3.8294365284714745e-08 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 1.892223062782872e-07, "loss": 0.6455, "step": 35200, "vit_lr": 3.7844461255657436e-08 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 1.8698582376977025e-07, "loss": 0.6269, "step": 35220, "vit_lr": 3.7397164753954045e-08 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 1.8476238495909182e-07, "loss": 0.6586, "step": 35240, "vit_lr": 3.695247699181836e-08 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 1.8255199587197014e-07, "loss": 0.6267, "step": 35260, "vit_lr": 3.6510399174394024e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.803546624987551e-07, "loss": 0.6364, "step": 35280, "vit_lr": 3.607093249975102e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.781703907944171e-07, "loss": 0.6329, "step": 35300, "vit_lr": 3.563407815888342e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.759991866785282e-07, "loss": 0.6505, "step": 35320, "vit_lr": 3.519983733570564e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.7384105603524437e-07, "loss": 0.6271, "step": 35340, "vit_lr": 3.476821120704887e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.7169600471329152e-07, "loss": 0.6425, "step": 35360, "vit_lr": 3.43392009426583e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.6956403852595116e-07, "loss": 0.6219, "step": 35380, "vit_lr": 3.391280770519023e-08 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 1.674451632510421e-07, "loss": 0.6313, "step": 35400, "vit_lr": 3.348903265020842e-08 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 1.6533938463090427e-07, "loss": 0.6413, "step": 35420, "vit_lr": 3.3067876926180846e-08 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 1.6324670837238655e-07, "loss": 0.6386, "step": 35440, "vit_lr": 3.2649341674477304e-08 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 1.6116714014682788e-07, "loss": 0.635, "step": 35460, "vit_lr": 3.223342802936557e-08 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 1.5910068559004398e-07, "loss": 0.6357, "step": 35480, "vit_lr": 3.182013711800879e-08 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 1.570473503023101e-07, "loss": 0.6366, "step": 35500, "vit_lr": 3.140947006046202e-08 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 1.5500713984834938e-07, "loss": 0.6442, "step": 35520, "vit_lr": 3.1001427969669866e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.5298005975731388e-07, "loss": 0.6458, "step": 35540, "vit_lr": 3.059601195146278e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.5096611552277097e-07, "loss": 0.6456, "step": 35560, "vit_lr": 3.0193223104554186e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.4896531260268965e-07, "loss": 0.6387, "step": 35580, "vit_lr": 2.9793062520537924e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.469776564194242e-07, "loss": 0.6178, "step": 35600, "vit_lr": 2.9395531283884833e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.4500315235970007e-07, "loss": 0.6179, "step": 35620, "vit_lr": 2.9000630471940014e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.4304180577459914e-07, "loss": 0.6372, "step": 35640, "vit_lr": 2.8608361154919824e-08 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 1.4109362197954724e-07, "loss": 0.6357, "step": 35660, "vit_lr": 2.8218724395909444e-08 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 1.3915860625429378e-07, "loss": 0.6397, "step": 35680, "vit_lr": 2.7831721250858754e-08 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 1.372367638429062e-07, "loss": 0.637, "step": 35700, "vit_lr": 2.7447352768581233e-08 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 1.3532809995374763e-07, "loss": 0.6292, "step": 35720, "vit_lr": 2.706561999074952e-08 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 1.3343261975946874e-07, "loss": 0.6408, "step": 35740, "vit_lr": 2.6686523951893746e-08 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 1.3155032839698823e-07, "loss": 0.6336, "step": 35760, "vit_lr": 2.6310065679397642e-08 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 1.2968123096748497e-07, "loss": 0.6476, "step": 35780, "vit_lr": 2.593624619349699e-08 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 1.278253325363793e-07, "loss": 0.6462, "step": 35800, "vit_lr": 2.5565066507275855e-08 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 1.2598263813332178e-07, "loss": 0.6311, "step": 35820, "vit_lr": 2.5196527626664355e-08 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 1.2415315275217832e-07, "loss": 0.6427, "step": 35840, "vit_lr": 2.483063055043566e-08 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 1.223368813510173e-07, "loss": 0.6397, "step": 35860, "vit_lr": 2.4467376270203455e-08 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 1.205338288520974e-07, "loss": 0.6363, "step": 35880, "vit_lr": 2.410676577041948e-08 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 1.187440001418505e-07, "loss": 0.6443, "step": 35900, "vit_lr": 2.3748800028370097e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.169674000708726e-07, "loss": 0.635, "step": 35920, "vit_lr": 2.3393480014174515e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.1520403345390896e-07, "loss": 0.6469, "step": 35940, "vit_lr": 2.3040806690781787e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.1345390506983967e-07, "loss": 0.643, "step": 35960, "vit_lr": 2.269078101396793e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.1171701966166848e-07, "loss": 0.6394, "step": 35980, "vit_lr": 2.2343403932333694e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.0999338193651065e-07, "loss": 0.6237, "step": 36000, "vit_lr": 2.199867638730213e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.0828299656557794e-07, "loss": 0.6384, "step": 36020, "vit_lr": 2.1656599313115587e-08 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 1.0658586818416694e-07, "loss": 0.633, "step": 36040, "vit_lr": 2.1317173636833384e-08 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 1.0490200139164797e-07, "loss": 0.6369, "step": 36060, "vit_lr": 2.0980400278329592e-08 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 1.0323140075144956e-07, "loss": 0.6333, "step": 36080, "vit_lr": 2.0646280150289908e-08 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 1.0157407079104786e-07, "loss": 0.6337, "step": 36100, "vit_lr": 2.031481415820957e-08 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 9.993001600195617e-08, "loss": 0.6322, "step": 36120, "vit_lr": 1.9986003200391232e-08 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 9.829924083970932e-08, "loss": 0.6249, "step": 36140, "vit_lr": 1.965984816794186e-08 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 9.668174972385258e-08, "loss": 0.6428, "step": 36160, "vit_lr": 1.9336349944770515e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 9.507754703793226e-08, "loss": 0.6216, "step": 36180, "vit_lr": 1.901550940758645e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 9.348663712948015e-08, "loss": 0.6402, "step": 36200, "vit_lr": 1.8697327425896024e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 9.190902431000515e-08, "loss": 0.6442, "step": 36220, "vit_lr": 1.8381804862001028e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 9.034471285497671e-08, "loss": 0.6446, "step": 36240, "vit_lr": 1.8068942570995338e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 8.879370700382028e-08, "loss": 0.6253, "step": 36260, "vit_lr": 1.7758741400764054e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 8.725601095989966e-08, "loss": 0.6407, "step": 36280, "vit_lr": 1.745120219197993e-08 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 8.573162889050746e-08, "loss": 0.6411, "step": 36300, "vit_lr": 1.714632577810149e-08 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 8.422056492685626e-08, "loss": 0.6273, "step": 36320, "vit_lr": 1.684411298537125e-08 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 8.272282316406366e-08, "loss": 0.6305, "step": 36340, "vit_lr": 1.654456463281273e-08 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 8.123840766114333e-08, "loss": 0.6314, "step": 36360, "vit_lr": 1.6247681532228664e-08 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 7.976732244099395e-08, "loss": 0.6273, "step": 36380, "vit_lr": 1.5953464488198788e-08 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 7.830957149038754e-08, "loss": 0.6372, "step": 36400, "vit_lr": 1.5661914298077506e-08 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 7.686515875995948e-08, "loss": 0.6334, "step": 36420, "vit_lr": 1.5373031751991895e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 7.543408816419739e-08, "loss": 0.6426, "step": 36440, "vit_lr": 1.5086817632839477e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 7.40163635814295e-08, "loss": 0.6326, "step": 36460, "vit_lr": 1.4803272716285897e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 7.261198885381682e-08, "loss": 0.6457, "step": 36480, "vit_lr": 1.4522397770763361e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 7.122096778733933e-08, "loss": 0.6328, "step": 36500, "vit_lr": 1.4244193557467866e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 6.984330415178819e-08, "loss": 0.6338, "step": 36520, "vit_lr": 1.3968660830357636e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 6.847900168075573e-08, "loss": 0.6324, "step": 36540, "vit_lr": 1.3695800336151143e-08 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 6.712806407162209e-08, "loss": 0.6301, "step": 36560, "vit_lr": 1.3425612814324417e-08 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 6.579049498555035e-08, "loss": 0.6199, "step": 36580, "vit_lr": 1.3158098997110067e-08 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 6.446629804747029e-08, "loss": 0.6373, "step": 36600, "vit_lr": 1.2893259609494057e-08 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 6.315547684607514e-08, "loss": 0.6374, "step": 36620, "vit_lr": 1.2631095369215027e-08 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 6.18580349338066e-08, "loss": 0.6327, "step": 36640, "vit_lr": 1.2371606986761319e-08 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 6.05739758268481e-08, "loss": 0.6303, "step": 36660, "vit_lr": 1.2114795165369618e-08 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 5.930330300511489e-08, "loss": 0.6353, "step": 36680, "vit_lr": 1.1860660601022976e-08 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 5.80460199122429e-08, "loss": 0.6499, "step": 36700, "vit_lr": 1.160920398244858e-08 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 5.68021299555821e-08, "loss": 0.633, "step": 36720, "vit_lr": 1.1360425991116418e-08 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 5.55716365061848e-08, "loss": 0.6296, "step": 36740, "vit_lr": 1.1114327301236959e-08 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 5.435454289879849e-08, "loss": 0.6418, "step": 36760, "vit_lr": 1.0870908579759697e-08 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 5.3150852431854695e-08, "loss": 0.6368, "step": 36780, "vit_lr": 1.0630170486370937e-08 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 5.196056836746288e-08, "loss": 0.6524, "step": 36800, "vit_lr": 1.0392113673492575e-08 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 5.0783693931398817e-08, "loss": 0.637, "step": 36820, "vit_lr": 1.0156738786279761e-08 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 4.962023231309676e-08, "loss": 0.6474, "step": 36840, "vit_lr": 9.92404646261935e-09 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 4.847018666564174e-08, "loss": 0.6323, "step": 36860, "vit_lr": 9.694037333128346e-09 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 4.733356010576007e-08, "loss": 0.6384, "step": 36880, "vit_lr": 9.466712021152013e-09 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 4.6210355713811604e-08, "loss": 0.6457, "step": 36900, "vit_lr": 9.24207114276232e-09 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 4.51005765337792e-08, "loss": 0.6339, "step": 36920, "vit_lr": 9.020115306755838e-09 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 4.400422557326534e-08, "loss": 0.6431, "step": 36940, "vit_lr": 8.800845114653065e-09 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 4.292130580347775e-08, "loss": 0.6453, "step": 36960, "vit_lr": 8.58426116069555e-09 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 4.185182015922551e-08, "loss": 0.6243, "step": 36980, "vit_lr": 8.3703640318451e-09 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 4.079577153891012e-08, "loss": 0.6266, "step": 37000, "vit_lr": 8.159154307782024e-09 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 3.9753162804518356e-08, "loss": 0.632, "step": 37020, "vit_lr": 7.950632560903669e-09 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 3.872399678161221e-08, "loss": 0.6531, "step": 37040, "vit_lr": 7.74479935632244e-09 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 3.770827625932338e-08, "loss": 0.6499, "step": 37060, "vit_lr": 7.541655251864676e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.670600399034552e-08, "loss": 0.6365, "step": 37080, "vit_lr": 7.341200798069103e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.5717182690925834e-08, "loss": 0.6358, "step": 37100, "vit_lr": 7.143436538185166e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.474181504085794e-08, "loss": 0.6474, "step": 37120, "vit_lr": 6.948363008171587e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.3779903683475723e-08, "loss": 0.6266, "step": 37140, "vit_lr": 6.755980736695144e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.283145122564446e-08, "loss": 0.6336, "step": 37160, "vit_lr": 6.566290245128891e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.189646023775472e-08, "loss": 0.641, "step": 37180, "vit_lr": 6.379292047550944e-09 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 3.09749332537157e-08, "loss": 0.6531, "step": 37200, "vit_lr": 6.194986650743139e-09 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 3.006687277094855e-08, "loss": 0.6342, "step": 37220, "vit_lr": 6.01337455418971e-09 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 2.917228125037641e-08, "loss": 0.6306, "step": 37240, "vit_lr": 5.834456250075281e-09 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 2.8291161116423272e-08, "loss": 0.6203, "step": 37260, "vit_lr": 5.658232223284653e-09 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 2.7423514757003445e-08, "loss": 0.6359, "step": 37280, "vit_lr": 5.4847029514006885e-09 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 2.6569344523516004e-08, "loss": 0.6282, "step": 37300, "vit_lr": 5.3138689047032e-09 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 2.5728652730837576e-08, "loss": 0.6391, "step": 37320, "vit_lr": 5.1457305461675146e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.4901441657319002e-08, "loss": 0.6359, "step": 37340, "vit_lr": 4.9802883314638e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.408771354477535e-08, "loss": 0.6305, "step": 37360, "vit_lr": 4.817542708955069e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.3287470598482577e-08, "loss": 0.6365, "step": 37380, "vit_lr": 4.657494119696515e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.250071498717088e-08, "loss": 0.6341, "step": 37400, "vit_lr": 4.500142997434176e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.1727448843018028e-08, "loss": 0.641, "step": 37420, "vit_lr": 4.3454897686036055e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.096767426164381e-08, "loss": 0.6329, "step": 37440, "vit_lr": 4.1935348523287616e-09 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 2.0221393302105596e-08, "loss": 0.629, "step": 37460, "vit_lr": 4.044278660421119e-09 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 1.948860798689056e-08, "loss": 0.6331, "step": 37480, "vit_lr": 3.897721597378112e-09 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 1.8769320301913473e-08, "loss": 0.6447, "step": 37500, "vit_lr": 3.753864060382695e-09 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 1.8063532196507804e-08, "loss": 0.6344, "step": 37520, "vit_lr": 3.6127064393015604e-09 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 1.737124558342129e-08, "loss": 0.6355, "step": 37540, "vit_lr": 3.474249116684258e-09 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 1.6692462338813718e-08, "loss": 0.6359, "step": 37560, "vit_lr": 3.3384924677627434e-09 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 1.6027184302248035e-08, "loss": 0.6424, "step": 37580, "vit_lr": 3.2054368604496063e-09 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 1.537541327668646e-08, "loss": 0.6537, "step": 37600, "vit_lr": 3.075082655337291e-09 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 1.4737151028486606e-08, "loss": 0.6459, "step": 37620, "vit_lr": 2.947430205697321e-09 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 1.4112399287396495e-08, "loss": 0.6297, "step": 37640, "vit_lr": 2.8224798574792984e-09 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 1.350115974654953e-08, "loss": 0.6496, "step": 37660, "vit_lr": 2.7002319493099056e-09 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 1.290343406245842e-08, "loss": 0.6466, "step": 37680, "vit_lr": 2.5806868124916837e-09 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 1.2319223855013496e-08, "loss": 0.6289, "step": 37700, "vit_lr": 2.463844771002699e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.1748530707477168e-08, "loss": 0.6471, "step": 37720, "vit_lr": 2.3497061414954333e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.1191356166477818e-08, "loss": 0.6242, "step": 37740, "vit_lr": 2.2382712332955635e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.0647701742008686e-08, "loss": 0.6305, "step": 37760, "vit_lr": 2.129540348401737e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.0117568907421216e-08, "loss": 0.6312, "step": 37780, "vit_lr": 2.023513781484243e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 9.600959099423379e-09, "loss": 0.6413, "step": 37800, "vit_lr": 1.9201918198846757e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 9.097873718074136e-09, "loss": 0.6328, "step": 37820, "vit_lr": 1.8195747436148268e-09 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 8.608314126778428e-09, "loss": 0.6393, "step": 37840, "vit_lr": 1.7216628253556853e-09 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 8.132281652287744e-09, "loss": 0.6376, "step": 37860, "vit_lr": 1.6264563304575485e-09 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 7.669777584691784e-09, "loss": 0.6268, "step": 37880, "vit_lr": 1.5339555169383567e-09 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 7.220803177418467e-09, "loss": 0.6447, "step": 37900, "vit_lr": 1.4441606354836932e-09 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 6.785359647227818e-09, "loss": 0.6336, "step": 37920, "vit_lr": 1.3570719294455634e-09 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 6.363448174210862e-09, "loss": 0.637, "step": 37940, "vit_lr": 1.2726896348421723e-09 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 5.9550699017857375e-09, "loss": 0.6418, "step": 37960, "vit_lr": 1.1910139803571474e-09 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 5.560225936693253e-09, "loss": 0.6122, "step": 37980, "vit_lr": 1.1120451873386506e-09 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 5.178917348996338e-09, "loss": 0.6295, "step": 38000, "vit_lr": 1.0357834697992673e-09 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 4.811145172075593e-09, "loss": 0.6305, "step": 38020, "vit_lr": 9.622290344151184e-10 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 4.4569104026254136e-09, "loss": 0.6428, "step": 38040, "vit_lr": 8.913820805250827e-10 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 4.1162140006545396e-09, "loss": 0.6291, "step": 38060, "vit_lr": 8.232428001309077e-10 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 3.789056889480503e-09, "loss": 0.6354, "step": 38080, "vit_lr": 7.578113778961004e-10 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 3.475439955727966e-09, "loss": 0.6362, "step": 38100, "vit_lr": 6.950879911455931e-10 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 3.1753640493265014e-09, "loss": 0.6311, "step": 38120, "vit_lr": 6.350728098653002e-10 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 2.8888299835089227e-09, "loss": 0.636, "step": 38140, "vit_lr": 5.777659967017845e-10 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 2.6158385348085124e-09, "loss": 0.6309, "step": 38160, "vit_lr": 5.231677069617024e-10 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 2.3563904430562445e-09, "loss": 0.6363, "step": 38180, "vit_lr": 4.712780886112489e-10 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 2.1104864113791203e-09, "loss": 0.6329, "step": 38200, "vit_lr": 4.22097282275824e-10 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 1.878127106199612e-09, "loss": 0.6242, "step": 38220, "vit_lr": 3.756254212399224e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.6593131572317788e-09, "loss": 0.631, "step": 38240, "vit_lr": 3.318626314463557e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.45404515748071e-09, "loss": 0.6248, "step": 38260, "vit_lr": 2.90809031496142e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.262323663241971e-09, "loss": 0.6337, "step": 38280, "vit_lr": 2.524647326483942e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.0841491940971616e-09, "loss": 0.6302, "step": 38300, "vit_lr": 2.168298388194323e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 9.195222329150266e-10, "loss": 0.6466, "step": 38320, "vit_lr": 1.839044465830053e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 7.684432258492358e-10, "loss": 0.6281, "step": 38340, "vit_lr": 1.5368864516984714e-10 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 6.309125823378282e-10, "loss": 0.6246, "step": 38360, "vit_lr": 1.261825164675656e-10 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 5.069306751004366e-10, "loss": 0.6297, "step": 38380, "vit_lr": 1.013861350200873e-10 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.964978401399533e-10, "loss": 0.6434, "step": 38400, "vit_lr": 7.929956802799065e-11 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.996143767386439e-10, "loss": 0.6188, "step": 38420, "vit_lr": 5.992287534772877e-11 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.1628054745925775e-10, "loss": 0.6404, "step": 38440, "vit_lr": 4.325610949185154e-11 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.4649657814447272e-10, "loss": 0.632, "step": 38460, "vit_lr": 2.929931562889454e-11 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 9.026265791467481e-11, "loss": 0.6211, "step": 38480, "vit_lr": 1.805253158293496e-11 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 4.7578939168513306e-11, "loss": 0.6324, "step": 38500, "vit_lr": 9.51578783370266e-12 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.8445537583455797e-11, "loss": 0.6606, "step": 38520, "vit_lr": 3.689107516691159e-12 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 2.862532113012684e-12, "loss": 0.6239, "step": 38540, "vit_lr": 5.725064226025367e-13 }, { "epoch": 3.0, "step": 38553, "total_flos": 6.147232682042248e+19, "train_loss": 0.7164545233597895, "train_runtime": 102404.8721, "train_samples_per_second": 48.187, "train_steps_per_second": 0.376, "vit_lr": 0.0 } ], "logging_steps": 20, "max_steps": 38553, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.147232682042248e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }