pentest-gpt-model / final_checkpoint /trainer_state.json
s0ck3t's picture
Upload folder using huggingface_hub
96189df verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8650519031141869,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005767012687427913,
"grad_norm": 0.5134636163711548,
"learning_rate": 0.0,
"loss": 1.6129628419876099,
"step": 1
},
{
"epoch": 0.0011534025374855825,
"grad_norm": 0.45678019523620605,
"learning_rate": 4e-05,
"loss": 1.713558554649353,
"step": 2
},
{
"epoch": 0.0017301038062283738,
"grad_norm": 0.6324027180671692,
"learning_rate": 8e-05,
"loss": 1.9871511459350586,
"step": 3
},
{
"epoch": 0.002306805074971165,
"grad_norm": 0.5307025909423828,
"learning_rate": 0.00012,
"loss": 1.6862211227416992,
"step": 4
},
{
"epoch": 0.0028835063437139563,
"grad_norm": 0.616538941860199,
"learning_rate": 0.00016,
"loss": 2.1033642292022705,
"step": 5
},
{
"epoch": 0.0034602076124567475,
"grad_norm": 0.7627953290939331,
"learning_rate": 0.0002,
"loss": 2.150984764099121,
"step": 6
},
{
"epoch": 0.004036908881199538,
"grad_norm": 0.8402333855628967,
"learning_rate": 0.00019996151625938042,
"loss": 2.0197458267211914,
"step": 7
},
{
"epoch": 0.00461361014994233,
"grad_norm": 3.813333034515381,
"learning_rate": 0.00019992303251876084,
"loss": 2.396656036376953,
"step": 8
},
{
"epoch": 0.005190311418685121,
"grad_norm": 0.9861733913421631,
"learning_rate": 0.00019988454877814126,
"loss": 2.1392970085144043,
"step": 9
},
{
"epoch": 0.0057670126874279125,
"grad_norm": 0.7931668758392334,
"learning_rate": 0.00019984606503752164,
"loss": 1.8062304258346558,
"step": 10
},
{
"epoch": 0.006343713956170703,
"grad_norm": 0.8828097581863403,
"learning_rate": 0.00019980758129690206,
"loss": 1.76358962059021,
"step": 11
},
{
"epoch": 0.006920415224913495,
"grad_norm": 0.7205682396888733,
"learning_rate": 0.00019976909755628247,
"loss": 1.3197358846664429,
"step": 12
},
{
"epoch": 0.007497116493656286,
"grad_norm": 1.2321408987045288,
"learning_rate": 0.0001997306138156629,
"loss": 1.7697328329086304,
"step": 13
},
{
"epoch": 0.008073817762399077,
"grad_norm": 0.9804911613464355,
"learning_rate": 0.0001996921300750433,
"loss": 1.7214155197143555,
"step": 14
},
{
"epoch": 0.00865051903114187,
"grad_norm": 0.9436901807785034,
"learning_rate": 0.00019965364633442372,
"loss": 1.6395944356918335,
"step": 15
},
{
"epoch": 0.00922722029988466,
"grad_norm": 1.6564269065856934,
"learning_rate": 0.00019961516259380414,
"loss": 1.8607707023620605,
"step": 16
},
{
"epoch": 0.00980392156862745,
"grad_norm": 1.0676305294036865,
"learning_rate": 0.00019957667885318455,
"loss": 1.4897263050079346,
"step": 17
},
{
"epoch": 0.010380622837370242,
"grad_norm": 0.9889469146728516,
"learning_rate": 0.00019953819511256494,
"loss": 1.7445942163467407,
"step": 18
},
{
"epoch": 0.010957324106113034,
"grad_norm": 0.8717456459999084,
"learning_rate": 0.00019949971137194535,
"loss": 1.4854474067687988,
"step": 19
},
{
"epoch": 0.011534025374855825,
"grad_norm": 1.110196590423584,
"learning_rate": 0.00019946122763132577,
"loss": 1.32136869430542,
"step": 20
},
{
"epoch": 0.012110726643598616,
"grad_norm": 0.7795314192771912,
"learning_rate": 0.00019942274389070618,
"loss": 1.7199318408966064,
"step": 21
},
{
"epoch": 0.012687427912341407,
"grad_norm": 0.7504187822341919,
"learning_rate": 0.0001993842601500866,
"loss": 1.2975201606750488,
"step": 22
},
{
"epoch": 0.0132641291810842,
"grad_norm": 0.8012252449989319,
"learning_rate": 0.00019934577640946702,
"loss": 1.2630457878112793,
"step": 23
},
{
"epoch": 0.01384083044982699,
"grad_norm": 0.9531145691871643,
"learning_rate": 0.00019930729266884743,
"loss": 1.6974424123764038,
"step": 24
},
{
"epoch": 0.01441753171856978,
"grad_norm": 1.020970106124878,
"learning_rate": 0.00019926880892822785,
"loss": 1.294957160949707,
"step": 25
},
{
"epoch": 0.014994232987312572,
"grad_norm": 1.7608129978179932,
"learning_rate": 0.00019923032518760823,
"loss": 1.801735520362854,
"step": 26
},
{
"epoch": 0.015570934256055362,
"grad_norm": 0.9601960182189941,
"learning_rate": 0.00019919184144698865,
"loss": 1.4538304805755615,
"step": 27
},
{
"epoch": 0.016147635524798153,
"grad_norm": 0.7025886178016663,
"learning_rate": 0.00019915335770636906,
"loss": 1.1746238470077515,
"step": 28
},
{
"epoch": 0.016724336793540944,
"grad_norm": 0.8506267666816711,
"learning_rate": 0.00019911487396574948,
"loss": 1.1891943216323853,
"step": 29
},
{
"epoch": 0.01730103806228374,
"grad_norm": 0.9117224216461182,
"learning_rate": 0.0001990763902251299,
"loss": 1.4325735569000244,
"step": 30
},
{
"epoch": 0.01787773933102653,
"grad_norm": 0.8756442070007324,
"learning_rate": 0.0001990379064845103,
"loss": 1.3962581157684326,
"step": 31
},
{
"epoch": 0.01845444059976932,
"grad_norm": 1.0293549299240112,
"learning_rate": 0.00019899942274389073,
"loss": 1.4936443567276,
"step": 32
},
{
"epoch": 0.01903114186851211,
"grad_norm": 0.8239012360572815,
"learning_rate": 0.00019896093900327114,
"loss": 1.1294159889221191,
"step": 33
},
{
"epoch": 0.0196078431372549,
"grad_norm": 0.6293753385543823,
"learning_rate": 0.00019892245526265153,
"loss": 1.219704031944275,
"step": 34
},
{
"epoch": 0.020184544405997693,
"grad_norm": 0.9778785109519958,
"learning_rate": 0.00019888397152203194,
"loss": 1.3405961990356445,
"step": 35
},
{
"epoch": 0.020761245674740483,
"grad_norm": 0.9916248917579651,
"learning_rate": 0.00019884548778141236,
"loss": 1.7191007137298584,
"step": 36
},
{
"epoch": 0.021337946943483274,
"grad_norm": 0.9758312106132507,
"learning_rate": 0.00019880700404079277,
"loss": 1.2949879169464111,
"step": 37
},
{
"epoch": 0.02191464821222607,
"grad_norm": 0.7310605645179749,
"learning_rate": 0.0001987685203001732,
"loss": 1.282931923866272,
"step": 38
},
{
"epoch": 0.02249134948096886,
"grad_norm": 0.6537899374961853,
"learning_rate": 0.0001987300365595536,
"loss": 1.4050456285476685,
"step": 39
},
{
"epoch": 0.02306805074971165,
"grad_norm": 0.6727839708328247,
"learning_rate": 0.00019869155281893402,
"loss": 1.3566672801971436,
"step": 40
},
{
"epoch": 0.02364475201845444,
"grad_norm": 0.6026540994644165,
"learning_rate": 0.00019865306907831444,
"loss": 1.6914572715759277,
"step": 41
},
{
"epoch": 0.02422145328719723,
"grad_norm": 0.7345203161239624,
"learning_rate": 0.00019861458533769482,
"loss": 1.3210856914520264,
"step": 42
},
{
"epoch": 0.024798154555940023,
"grad_norm": 1.7062476873397827,
"learning_rate": 0.00019857610159707524,
"loss": 1.6727783679962158,
"step": 43
},
{
"epoch": 0.025374855824682813,
"grad_norm": 0.7726621627807617,
"learning_rate": 0.00019853761785645565,
"loss": 1.7425484657287598,
"step": 44
},
{
"epoch": 0.025951557093425604,
"grad_norm": 0.6947644948959351,
"learning_rate": 0.00019849913411583607,
"loss": 1.0628504753112793,
"step": 45
},
{
"epoch": 0.0265282583621684,
"grad_norm": 0.7833652496337891,
"learning_rate": 0.00019846065037521649,
"loss": 1.4800021648406982,
"step": 46
},
{
"epoch": 0.02710495963091119,
"grad_norm": 0.8065851926803589,
"learning_rate": 0.0001984221666345969,
"loss": 1.2809616327285767,
"step": 47
},
{
"epoch": 0.02768166089965398,
"grad_norm": 1.044630527496338,
"learning_rate": 0.00019838368289397732,
"loss": 1.602962851524353,
"step": 48
},
{
"epoch": 0.02825836216839677,
"grad_norm": 0.5969672203063965,
"learning_rate": 0.00019834519915335773,
"loss": 1.5166534185409546,
"step": 49
},
{
"epoch": 0.02883506343713956,
"grad_norm": 0.848512589931488,
"learning_rate": 0.00019830671541273812,
"loss": 1.442568063735962,
"step": 50
},
{
"epoch": 0.029411764705882353,
"grad_norm": 0.5782500505447388,
"learning_rate": 0.00019826823167211853,
"loss": 1.3492627143859863,
"step": 51
},
{
"epoch": 0.029988465974625143,
"grad_norm": 0.850151777267456,
"learning_rate": 0.00019822974793149895,
"loss": 1.5313668251037598,
"step": 52
},
{
"epoch": 0.030565167243367934,
"grad_norm": 0.613896906375885,
"learning_rate": 0.00019819126419087937,
"loss": 1.0709185600280762,
"step": 53
},
{
"epoch": 0.031141868512110725,
"grad_norm": 0.9450347423553467,
"learning_rate": 0.00019815278045025978,
"loss": 1.5562160015106201,
"step": 54
},
{
"epoch": 0.031718569780853516,
"grad_norm": 0.9424428939819336,
"learning_rate": 0.0001981142967096402,
"loss": 1.764065146446228,
"step": 55
},
{
"epoch": 0.03229527104959631,
"grad_norm": 0.9744471311569214,
"learning_rate": 0.0001980758129690206,
"loss": 0.9400297403335571,
"step": 56
},
{
"epoch": 0.0328719723183391,
"grad_norm": 0.7247487902641296,
"learning_rate": 0.00019803732922840103,
"loss": 1.572107195854187,
"step": 57
},
{
"epoch": 0.03344867358708189,
"grad_norm": 0.6125597357749939,
"learning_rate": 0.00019799884548778141,
"loss": 1.2189209461212158,
"step": 58
},
{
"epoch": 0.034025374855824686,
"grad_norm": 1.0781699419021606,
"learning_rate": 0.00019796036174716183,
"loss": 1.3933414220809937,
"step": 59
},
{
"epoch": 0.03460207612456748,
"grad_norm": 0.8329439759254456,
"learning_rate": 0.00019792187800654224,
"loss": 1.4748475551605225,
"step": 60
},
{
"epoch": 0.03517877739331027,
"grad_norm": 0.7766849398612976,
"learning_rate": 0.00019788339426592266,
"loss": 1.4775745868682861,
"step": 61
},
{
"epoch": 0.03575547866205306,
"grad_norm": 0.7776947021484375,
"learning_rate": 0.00019784491052530308,
"loss": 1.4959548711776733,
"step": 62
},
{
"epoch": 0.03633217993079585,
"grad_norm": 0.7114179134368896,
"learning_rate": 0.0001978064267846835,
"loss": 1.4756664037704468,
"step": 63
},
{
"epoch": 0.03690888119953864,
"grad_norm": 0.675800621509552,
"learning_rate": 0.0001977679430440639,
"loss": 1.4753670692443848,
"step": 64
},
{
"epoch": 0.03748558246828143,
"grad_norm": 1.5709729194641113,
"learning_rate": 0.00019772945930344432,
"loss": 1.5947999954223633,
"step": 65
},
{
"epoch": 0.03806228373702422,
"grad_norm": 0.7363697290420532,
"learning_rate": 0.0001976909755628247,
"loss": 1.2786856889724731,
"step": 66
},
{
"epoch": 0.03863898500576701,
"grad_norm": 0.8212243318557739,
"learning_rate": 0.00019765249182220512,
"loss": 1.3553478717803955,
"step": 67
},
{
"epoch": 0.0392156862745098,
"grad_norm": 0.6724039912223816,
"learning_rate": 0.00019761400808158554,
"loss": 1.3045082092285156,
"step": 68
},
{
"epoch": 0.039792387543252594,
"grad_norm": 1.0372695922851562,
"learning_rate": 0.00019757552434096596,
"loss": 1.5149048566818237,
"step": 69
},
{
"epoch": 0.040369088811995385,
"grad_norm": 0.7058703303337097,
"learning_rate": 0.00019753704060034637,
"loss": 1.2227076292037964,
"step": 70
},
{
"epoch": 0.040945790080738176,
"grad_norm": 0.8637105226516724,
"learning_rate": 0.00019749855685972679,
"loss": 1.0762852430343628,
"step": 71
},
{
"epoch": 0.04152249134948097,
"grad_norm": 0.8108904957771301,
"learning_rate": 0.0001974600731191072,
"loss": 1.4130628108978271,
"step": 72
},
{
"epoch": 0.04209919261822376,
"grad_norm": 1.2491207122802734,
"learning_rate": 0.00019742158937848762,
"loss": 1.7983347177505493,
"step": 73
},
{
"epoch": 0.04267589388696655,
"grad_norm": 1.1523128747940063,
"learning_rate": 0.000197383105637868,
"loss": 1.5859603881835938,
"step": 74
},
{
"epoch": 0.04325259515570934,
"grad_norm": 0.7240892648696899,
"learning_rate": 0.00019734462189724842,
"loss": 1.4029178619384766,
"step": 75
},
{
"epoch": 0.04382929642445214,
"grad_norm": 0.7445366978645325,
"learning_rate": 0.00019730613815662884,
"loss": 1.351811170578003,
"step": 76
},
{
"epoch": 0.04440599769319493,
"grad_norm": 0.9881113767623901,
"learning_rate": 0.00019726765441600925,
"loss": 1.437370777130127,
"step": 77
},
{
"epoch": 0.04498269896193772,
"grad_norm": 1.0404249429702759,
"learning_rate": 0.00019722917067538967,
"loss": 1.0401325225830078,
"step": 78
},
{
"epoch": 0.04555940023068051,
"grad_norm": 0.998892605304718,
"learning_rate": 0.00019719068693477008,
"loss": 1.2733221054077148,
"step": 79
},
{
"epoch": 0.0461361014994233,
"grad_norm": 1.0299255847930908,
"learning_rate": 0.0001971522031941505,
"loss": 1.8878190517425537,
"step": 80
},
{
"epoch": 0.04671280276816609,
"grad_norm": 0.6168495416641235,
"learning_rate": 0.0001971137194535309,
"loss": 1.3375468254089355,
"step": 81
},
{
"epoch": 0.04728950403690888,
"grad_norm": 0.645830512046814,
"learning_rate": 0.0001970752357129113,
"loss": 0.986657440662384,
"step": 82
},
{
"epoch": 0.04786620530565167,
"grad_norm": 0.7971145510673523,
"learning_rate": 0.00019703675197229172,
"loss": 1.3205912113189697,
"step": 83
},
{
"epoch": 0.04844290657439446,
"grad_norm": 0.6297418475151062,
"learning_rate": 0.00019699826823167213,
"loss": 1.3360888957977295,
"step": 84
},
{
"epoch": 0.049019607843137254,
"grad_norm": 0.9845420718193054,
"learning_rate": 0.00019695978449105255,
"loss": 1.4006659984588623,
"step": 85
},
{
"epoch": 0.049596309111880045,
"grad_norm": 0.73700350522995,
"learning_rate": 0.00019692130075043296,
"loss": 1.1298922300338745,
"step": 86
},
{
"epoch": 0.050173010380622836,
"grad_norm": 0.7659608721733093,
"learning_rate": 0.00019688281700981338,
"loss": 1.2487225532531738,
"step": 87
},
{
"epoch": 0.05074971164936563,
"grad_norm": 0.7576966285705566,
"learning_rate": 0.0001968443332691938,
"loss": 1.346827507019043,
"step": 88
},
{
"epoch": 0.05132641291810842,
"grad_norm": 0.6777650117874146,
"learning_rate": 0.0001968058495285742,
"loss": 1.9484481811523438,
"step": 89
},
{
"epoch": 0.05190311418685121,
"grad_norm": 0.9935969114303589,
"learning_rate": 0.0001967673657879546,
"loss": 1.1737089157104492,
"step": 90
},
{
"epoch": 0.052479815455594,
"grad_norm": 1.0581051111221313,
"learning_rate": 0.000196728882047335,
"loss": 1.2755905389785767,
"step": 91
},
{
"epoch": 0.0530565167243368,
"grad_norm": 0.8372200131416321,
"learning_rate": 0.00019669039830671543,
"loss": 1.7988427877426147,
"step": 92
},
{
"epoch": 0.05363321799307959,
"grad_norm": 0.8300452828407288,
"learning_rate": 0.00019665191456609584,
"loss": 0.9904743432998657,
"step": 93
},
{
"epoch": 0.05420991926182238,
"grad_norm": 0.6703553199768066,
"learning_rate": 0.00019661343082547626,
"loss": 1.2092053890228271,
"step": 94
},
{
"epoch": 0.05478662053056517,
"grad_norm": 0.703804075717926,
"learning_rate": 0.00019657494708485667,
"loss": 1.1028215885162354,
"step": 95
},
{
"epoch": 0.05536332179930796,
"grad_norm": 0.8232657313346863,
"learning_rate": 0.0001965364633442371,
"loss": 1.3875727653503418,
"step": 96
},
{
"epoch": 0.05594002306805075,
"grad_norm": 0.6119164824485779,
"learning_rate": 0.00019649797960361747,
"loss": 1.161183476448059,
"step": 97
},
{
"epoch": 0.05651672433679354,
"grad_norm": 0.7460926175117493,
"learning_rate": 0.0001964594958629979,
"loss": 1.3667285442352295,
"step": 98
},
{
"epoch": 0.05709342560553633,
"grad_norm": 0.6345133185386658,
"learning_rate": 0.0001964210121223783,
"loss": 1.1740115880966187,
"step": 99
},
{
"epoch": 0.05767012687427912,
"grad_norm": 0.800463080406189,
"learning_rate": 0.00019638252838175872,
"loss": 1.1274670362472534,
"step": 100
},
{
"epoch": 0.058246828143021914,
"grad_norm": 0.6817663311958313,
"learning_rate": 0.00019634404464113914,
"loss": 1.2432150840759277,
"step": 101
},
{
"epoch": 0.058823529411764705,
"grad_norm": 0.7663673162460327,
"learning_rate": 0.00019630556090051955,
"loss": 1.2066948413848877,
"step": 102
},
{
"epoch": 0.059400230680507496,
"grad_norm": 1.0259535312652588,
"learning_rate": 0.00019626707715989997,
"loss": 1.3713116645812988,
"step": 103
},
{
"epoch": 0.05997693194925029,
"grad_norm": 0.6617158055305481,
"learning_rate": 0.00019622859341928038,
"loss": 1.0320123434066772,
"step": 104
},
{
"epoch": 0.06055363321799308,
"grad_norm": 1.0050235986709595,
"learning_rate": 0.00019619010967866077,
"loss": 1.5375267267227173,
"step": 105
},
{
"epoch": 0.06113033448673587,
"grad_norm": 0.5563177466392517,
"learning_rate": 0.00019615162593804119,
"loss": 0.9102802276611328,
"step": 106
},
{
"epoch": 0.06170703575547866,
"grad_norm": 0.9994164109230042,
"learning_rate": 0.0001961131421974216,
"loss": 1.6505589485168457,
"step": 107
},
{
"epoch": 0.06228373702422145,
"grad_norm": 0.907625675201416,
"learning_rate": 0.00019607465845680202,
"loss": 1.6013598442077637,
"step": 108
},
{
"epoch": 0.06286043829296424,
"grad_norm": 1.0009554624557495,
"learning_rate": 0.00019603617471618243,
"loss": 1.0403454303741455,
"step": 109
},
{
"epoch": 0.06343713956170703,
"grad_norm": 0.8243467807769775,
"learning_rate": 0.00019599769097556285,
"loss": 1.5382654666900635,
"step": 110
},
{
"epoch": 0.06401384083044982,
"grad_norm": 1.0160003900527954,
"learning_rate": 0.00019595920723494326,
"loss": 1.2732863426208496,
"step": 111
},
{
"epoch": 0.06459054209919261,
"grad_norm": 0.608269453048706,
"learning_rate": 0.00019592072349432368,
"loss": 1.070478916168213,
"step": 112
},
{
"epoch": 0.0651672433679354,
"grad_norm": 0.7176778316497803,
"learning_rate": 0.00019588223975370406,
"loss": 1.302718162536621,
"step": 113
},
{
"epoch": 0.0657439446366782,
"grad_norm": 0.551771879196167,
"learning_rate": 0.00019584375601308448,
"loss": 0.9242706894874573,
"step": 114
},
{
"epoch": 0.06632064590542099,
"grad_norm": 0.9680222868919373,
"learning_rate": 0.0001958052722724649,
"loss": 1.9658548831939697,
"step": 115
},
{
"epoch": 0.06689734717416378,
"grad_norm": 0.8025707602500916,
"learning_rate": 0.0001957667885318453,
"loss": 1.5753577947616577,
"step": 116
},
{
"epoch": 0.06747404844290658,
"grad_norm": 0.7211287021636963,
"learning_rate": 0.00019572830479122573,
"loss": 1.3677327632904053,
"step": 117
},
{
"epoch": 0.06805074971164937,
"grad_norm": 0.7547542452812195,
"learning_rate": 0.00019568982105060614,
"loss": 1.507096767425537,
"step": 118
},
{
"epoch": 0.06862745098039216,
"grad_norm": 0.6146650314331055,
"learning_rate": 0.00019565133730998656,
"loss": 1.1320711374282837,
"step": 119
},
{
"epoch": 0.06920415224913495,
"grad_norm": 0.7611070275306702,
"learning_rate": 0.00019561285356936697,
"loss": 1.207049012184143,
"step": 120
},
{
"epoch": 0.06978085351787774,
"grad_norm": 0.714883029460907,
"learning_rate": 0.00019557436982874736,
"loss": 1.3823729753494263,
"step": 121
},
{
"epoch": 0.07035755478662054,
"grad_norm": 0.6768732666969299,
"learning_rate": 0.00019553588608812778,
"loss": 1.3038188219070435,
"step": 122
},
{
"epoch": 0.07093425605536333,
"grad_norm": 0.6013675332069397,
"learning_rate": 0.0001954974023475082,
"loss": 1.056199073791504,
"step": 123
},
{
"epoch": 0.07151095732410612,
"grad_norm": 0.8240784406661987,
"learning_rate": 0.0001954589186068886,
"loss": 1.4242757558822632,
"step": 124
},
{
"epoch": 0.07208765859284891,
"grad_norm": 0.6539785265922546,
"learning_rate": 0.00019542043486626902,
"loss": 1.161075472831726,
"step": 125
},
{
"epoch": 0.0726643598615917,
"grad_norm": 0.6347744464874268,
"learning_rate": 0.00019538195112564944,
"loss": 1.179503321647644,
"step": 126
},
{
"epoch": 0.07324106113033449,
"grad_norm": 0.7294688820838928,
"learning_rate": 0.00019534346738502985,
"loss": 1.2521535158157349,
"step": 127
},
{
"epoch": 0.07381776239907728,
"grad_norm": 0.6087843179702759,
"learning_rate": 0.00019530498364441027,
"loss": 1.0938013792037964,
"step": 128
},
{
"epoch": 0.07439446366782007,
"grad_norm": 1.116716980934143,
"learning_rate": 0.00019526649990379066,
"loss": 1.74098539352417,
"step": 129
},
{
"epoch": 0.07497116493656286,
"grad_norm": 0.7590331435203552,
"learning_rate": 0.00019522801616317107,
"loss": 1.2943538427352905,
"step": 130
},
{
"epoch": 0.07554786620530565,
"grad_norm": 0.9142744541168213,
"learning_rate": 0.00019518953242255149,
"loss": 1.0948201417922974,
"step": 131
},
{
"epoch": 0.07612456747404844,
"grad_norm": 0.8165064454078674,
"learning_rate": 0.0001951510486819319,
"loss": 1.5152888298034668,
"step": 132
},
{
"epoch": 0.07670126874279123,
"grad_norm": 0.8904751539230347,
"learning_rate": 0.00019511256494131232,
"loss": 1.3492425680160522,
"step": 133
},
{
"epoch": 0.07727797001153403,
"grad_norm": 0.632338285446167,
"learning_rate": 0.00019507408120069273,
"loss": 1.1460604667663574,
"step": 134
},
{
"epoch": 0.07785467128027682,
"grad_norm": 0.6621445417404175,
"learning_rate": 0.00019503559746007315,
"loss": 1.153398871421814,
"step": 135
},
{
"epoch": 0.0784313725490196,
"grad_norm": 0.928593635559082,
"learning_rate": 0.00019499711371945356,
"loss": 1.4575080871582031,
"step": 136
},
{
"epoch": 0.0790080738177624,
"grad_norm": 0.9125704765319824,
"learning_rate": 0.00019495862997883395,
"loss": 1.176555871963501,
"step": 137
},
{
"epoch": 0.07958477508650519,
"grad_norm": 0.7735126614570618,
"learning_rate": 0.00019492014623821437,
"loss": 1.3028615713119507,
"step": 138
},
{
"epoch": 0.08016147635524798,
"grad_norm": 1.4182281494140625,
"learning_rate": 0.00019488166249759478,
"loss": 1.7123095989227295,
"step": 139
},
{
"epoch": 0.08073817762399077,
"grad_norm": 0.957777738571167,
"learning_rate": 0.0001948431787569752,
"loss": 1.2952847480773926,
"step": 140
},
{
"epoch": 0.08131487889273356,
"grad_norm": 0.6284865140914917,
"learning_rate": 0.0001948046950163556,
"loss": 1.063300609588623,
"step": 141
},
{
"epoch": 0.08189158016147635,
"grad_norm": 1.020240068435669,
"learning_rate": 0.00019476621127573603,
"loss": 1.0956578254699707,
"step": 142
},
{
"epoch": 0.08246828143021914,
"grad_norm": 0.9629870057106018,
"learning_rate": 0.00019472772753511644,
"loss": 1.6626744270324707,
"step": 143
},
{
"epoch": 0.08304498269896193,
"grad_norm": 0.723129391670227,
"learning_rate": 0.00019468924379449686,
"loss": 1.5930454730987549,
"step": 144
},
{
"epoch": 0.08362168396770472,
"grad_norm": 0.6031758785247803,
"learning_rate": 0.00019465076005387725,
"loss": 1.3550267219543457,
"step": 145
},
{
"epoch": 0.08419838523644751,
"grad_norm": 0.6608120799064636,
"learning_rate": 0.00019461227631325766,
"loss": 1.091226577758789,
"step": 146
},
{
"epoch": 0.0847750865051903,
"grad_norm": 0.8583825826644897,
"learning_rate": 0.00019457379257263808,
"loss": 1.2840064764022827,
"step": 147
},
{
"epoch": 0.0853517877739331,
"grad_norm": 0.6371753215789795,
"learning_rate": 0.0001945353088320185,
"loss": 1.0223405361175537,
"step": 148
},
{
"epoch": 0.08592848904267589,
"grad_norm": 0.6101475954055786,
"learning_rate": 0.0001944968250913989,
"loss": 1.2935165166854858,
"step": 149
},
{
"epoch": 0.08650519031141868,
"grad_norm": 0.8921840190887451,
"learning_rate": 0.00019445834135077932,
"loss": 1.3194819688796997,
"step": 150
},
{
"epoch": 0.08708189158016148,
"grad_norm": 1.0423651933670044,
"learning_rate": 0.0001944198576101597,
"loss": 1.162503957748413,
"step": 151
},
{
"epoch": 0.08765859284890427,
"grad_norm": 0.9011998772621155,
"learning_rate": 0.00019438137386954013,
"loss": 1.4854192733764648,
"step": 152
},
{
"epoch": 0.08823529411764706,
"grad_norm": 0.6850185990333557,
"learning_rate": 0.00019434289012892054,
"loss": 1.2653287649154663,
"step": 153
},
{
"epoch": 0.08881199538638986,
"grad_norm": 0.5742697715759277,
"learning_rate": 0.00019430440638830093,
"loss": 1.1639142036437988,
"step": 154
},
{
"epoch": 0.08938869665513265,
"grad_norm": 0.5625914931297302,
"learning_rate": 0.00019426592264768134,
"loss": 1.0387107133865356,
"step": 155
},
{
"epoch": 0.08996539792387544,
"grad_norm": 0.7183355689048767,
"learning_rate": 0.00019422743890706176,
"loss": 1.211965799331665,
"step": 156
},
{
"epoch": 0.09054209919261823,
"grad_norm": 0.8835011124610901,
"learning_rate": 0.00019418895516644217,
"loss": 1.0958670377731323,
"step": 157
},
{
"epoch": 0.09111880046136102,
"grad_norm": 0.6885069608688354,
"learning_rate": 0.0001941504714258226,
"loss": 1.297393798828125,
"step": 158
},
{
"epoch": 0.09169550173010381,
"grad_norm": 0.7518923878669739,
"learning_rate": 0.000194111987685203,
"loss": 1.1739790439605713,
"step": 159
},
{
"epoch": 0.0922722029988466,
"grad_norm": 0.8452180027961731,
"learning_rate": 0.00019407350394458342,
"loss": 1.2312185764312744,
"step": 160
},
{
"epoch": 0.09284890426758939,
"grad_norm": 0.8018324971199036,
"learning_rate": 0.00019403502020396384,
"loss": 1.392999291419983,
"step": 161
},
{
"epoch": 0.09342560553633218,
"grad_norm": 0.743302583694458,
"learning_rate": 0.00019399653646334422,
"loss": 1.1602349281311035,
"step": 162
},
{
"epoch": 0.09400230680507497,
"grad_norm": 0.551163911819458,
"learning_rate": 0.00019395805272272464,
"loss": 1.0061742067337036,
"step": 163
},
{
"epoch": 0.09457900807381776,
"grad_norm": 0.6732088327407837,
"learning_rate": 0.00019391956898210505,
"loss": 1.2422168254852295,
"step": 164
},
{
"epoch": 0.09515570934256055,
"grad_norm": 0.6432737708091736,
"learning_rate": 0.00019388108524148547,
"loss": 0.8992981910705566,
"step": 165
},
{
"epoch": 0.09573241061130335,
"grad_norm": 0.893099308013916,
"learning_rate": 0.00019384260150086589,
"loss": 1.4426004886627197,
"step": 166
},
{
"epoch": 0.09630911188004614,
"grad_norm": 0.7915064692497253,
"learning_rate": 0.0001938041177602463,
"loss": 1.1332988739013672,
"step": 167
},
{
"epoch": 0.09688581314878893,
"grad_norm": 0.7785482406616211,
"learning_rate": 0.00019376563401962672,
"loss": 1.1662797927856445,
"step": 168
},
{
"epoch": 0.09746251441753172,
"grad_norm": 0.7676025032997131,
"learning_rate": 0.00019372715027900713,
"loss": 1.276615858078003,
"step": 169
},
{
"epoch": 0.09803921568627451,
"grad_norm": 0.7058248519897461,
"learning_rate": 0.00019368866653838752,
"loss": 1.2280982732772827,
"step": 170
},
{
"epoch": 0.0986159169550173,
"grad_norm": 0.7814574241638184,
"learning_rate": 0.00019365018279776793,
"loss": 1.6545538902282715,
"step": 171
},
{
"epoch": 0.09919261822376009,
"grad_norm": 0.5429863333702087,
"learning_rate": 0.00019361169905714835,
"loss": 1.047904133796692,
"step": 172
},
{
"epoch": 0.09976931949250288,
"grad_norm": 0.7021914124488831,
"learning_rate": 0.00019357321531652876,
"loss": 1.3578035831451416,
"step": 173
},
{
"epoch": 0.10034602076124567,
"grad_norm": 0.7608473896980286,
"learning_rate": 0.00019353473157590918,
"loss": 1.3332273960113525,
"step": 174
},
{
"epoch": 0.10092272202998846,
"grad_norm": 0.8988219499588013,
"learning_rate": 0.0001934962478352896,
"loss": 1.5955560207366943,
"step": 175
},
{
"epoch": 0.10149942329873125,
"grad_norm": 0.8784334659576416,
"learning_rate": 0.00019345776409467,
"loss": 1.4267313480377197,
"step": 176
},
{
"epoch": 0.10207612456747404,
"grad_norm": 0.9006462097167969,
"learning_rate": 0.00019341928035405043,
"loss": 1.2960124015808105,
"step": 177
},
{
"epoch": 0.10265282583621683,
"grad_norm": 0.7736122608184814,
"learning_rate": 0.00019338079661343081,
"loss": 1.3841434717178345,
"step": 178
},
{
"epoch": 0.10322952710495963,
"grad_norm": 0.8202458620071411,
"learning_rate": 0.00019334231287281123,
"loss": 1.2962226867675781,
"step": 179
},
{
"epoch": 0.10380622837370242,
"grad_norm": 0.743390679359436,
"learning_rate": 0.00019330382913219164,
"loss": 1.010484218597412,
"step": 180
},
{
"epoch": 0.10438292964244521,
"grad_norm": 0.7926476001739502,
"learning_rate": 0.00019326534539157206,
"loss": 1.45333731174469,
"step": 181
},
{
"epoch": 0.104959630911188,
"grad_norm": 0.527367889881134,
"learning_rate": 0.00019322686165095248,
"loss": 0.7763160467147827,
"step": 182
},
{
"epoch": 0.10553633217993079,
"grad_norm": 1.0006170272827148,
"learning_rate": 0.0001931883779103329,
"loss": 1.089290738105774,
"step": 183
},
{
"epoch": 0.1061130334486736,
"grad_norm": 0.7497840523719788,
"learning_rate": 0.0001931498941697133,
"loss": 1.1641783714294434,
"step": 184
},
{
"epoch": 0.10668973471741638,
"grad_norm": 0.6732814908027649,
"learning_rate": 0.00019311141042909372,
"loss": 1.0954653024673462,
"step": 185
},
{
"epoch": 0.10726643598615918,
"grad_norm": 0.7817464470863342,
"learning_rate": 0.0001930729266884741,
"loss": 1.5050190687179565,
"step": 186
},
{
"epoch": 0.10784313725490197,
"grad_norm": 0.813869297504425,
"learning_rate": 0.00019303444294785452,
"loss": 1.5048751831054688,
"step": 187
},
{
"epoch": 0.10841983852364476,
"grad_norm": 0.6368386745452881,
"learning_rate": 0.00019299595920723494,
"loss": 1.0601242780685425,
"step": 188
},
{
"epoch": 0.10899653979238755,
"grad_norm": 0.817610502243042,
"learning_rate": 0.00019295747546661536,
"loss": 1.2267041206359863,
"step": 189
},
{
"epoch": 0.10957324106113034,
"grad_norm": 0.768892228603363,
"learning_rate": 0.00019291899172599577,
"loss": 1.0935152769088745,
"step": 190
},
{
"epoch": 0.11014994232987313,
"grad_norm": 0.8072124123573303,
"learning_rate": 0.00019288050798537619,
"loss": 1.5566798448562622,
"step": 191
},
{
"epoch": 0.11072664359861592,
"grad_norm": 0.7275574803352356,
"learning_rate": 0.0001928420242447566,
"loss": 1.5278323888778687,
"step": 192
},
{
"epoch": 0.11130334486735871,
"grad_norm": 0.6448370814323425,
"learning_rate": 0.00019280354050413702,
"loss": 1.2096084356307983,
"step": 193
},
{
"epoch": 0.1118800461361015,
"grad_norm": 0.9334590435028076,
"learning_rate": 0.0001927650567635174,
"loss": 1.2487378120422363,
"step": 194
},
{
"epoch": 0.11245674740484429,
"grad_norm": 0.6830427646636963,
"learning_rate": 0.00019272657302289782,
"loss": 1.3567012548446655,
"step": 195
},
{
"epoch": 0.11303344867358708,
"grad_norm": 0.9035089612007141,
"learning_rate": 0.00019268808928227823,
"loss": 1.1751577854156494,
"step": 196
},
{
"epoch": 0.11361014994232987,
"grad_norm": 0.5569579005241394,
"learning_rate": 0.00019264960554165865,
"loss": 1.0159823894500732,
"step": 197
},
{
"epoch": 0.11418685121107267,
"grad_norm": 0.6232113838195801,
"learning_rate": 0.00019261112180103907,
"loss": 1.0779603719711304,
"step": 198
},
{
"epoch": 0.11476355247981546,
"grad_norm": 0.7666590213775635,
"learning_rate": 0.00019257263806041948,
"loss": 1.2052793502807617,
"step": 199
},
{
"epoch": 0.11534025374855825,
"grad_norm": 0.6218665242195129,
"learning_rate": 0.0001925341543197999,
"loss": 1.2699958086013794,
"step": 200
},
{
"epoch": 0.11591695501730104,
"grad_norm": 0.6059345006942749,
"learning_rate": 0.0001924956705791803,
"loss": 1.0522977113723755,
"step": 201
},
{
"epoch": 0.11649365628604383,
"grad_norm": 0.6952403783798218,
"learning_rate": 0.0001924571868385607,
"loss": 1.3461261987686157,
"step": 202
},
{
"epoch": 0.11707035755478662,
"grad_norm": 0.7097076177597046,
"learning_rate": 0.00019241870309794111,
"loss": 1.0901520252227783,
"step": 203
},
{
"epoch": 0.11764705882352941,
"grad_norm": 1.3426554203033447,
"learning_rate": 0.00019238021935732153,
"loss": 1.8886399269104004,
"step": 204
},
{
"epoch": 0.1182237600922722,
"grad_norm": 1.00478196144104,
"learning_rate": 0.00019234173561670195,
"loss": 1.2172045707702637,
"step": 205
},
{
"epoch": 0.11880046136101499,
"grad_norm": 0.8586134314537048,
"learning_rate": 0.00019230325187608236,
"loss": 1.0469045639038086,
"step": 206
},
{
"epoch": 0.11937716262975778,
"grad_norm": 0.7872591018676758,
"learning_rate": 0.00019226476813546278,
"loss": 1.1137733459472656,
"step": 207
},
{
"epoch": 0.11995386389850057,
"grad_norm": 0.8721824884414673,
"learning_rate": 0.0001922262843948432,
"loss": 1.3743940591812134,
"step": 208
},
{
"epoch": 0.12053056516724336,
"grad_norm": 0.6212759613990784,
"learning_rate": 0.0001921878006542236,
"loss": 0.900457501411438,
"step": 209
},
{
"epoch": 0.12110726643598616,
"grad_norm": 1.0083750486373901,
"learning_rate": 0.000192149316913604,
"loss": 1.339089035987854,
"step": 210
},
{
"epoch": 0.12168396770472895,
"grad_norm": 0.794417142868042,
"learning_rate": 0.0001921108331729844,
"loss": 1.194704532623291,
"step": 211
},
{
"epoch": 0.12226066897347174,
"grad_norm": 1.1438184976577759,
"learning_rate": 0.00019207234943236483,
"loss": 1.3168675899505615,
"step": 212
},
{
"epoch": 0.12283737024221453,
"grad_norm": 0.5655554533004761,
"learning_rate": 0.00019203386569174524,
"loss": 1.008853793144226,
"step": 213
},
{
"epoch": 0.12341407151095732,
"grad_norm": 0.7868179082870483,
"learning_rate": 0.00019199538195112566,
"loss": 1.3174118995666504,
"step": 214
},
{
"epoch": 0.12399077277970011,
"grad_norm": 0.6736404299736023,
"learning_rate": 0.00019195689821050607,
"loss": 1.054055094718933,
"step": 215
},
{
"epoch": 0.1245674740484429,
"grad_norm": 0.7425172328948975,
"learning_rate": 0.00019191841446988649,
"loss": 1.2892072200775146,
"step": 216
},
{
"epoch": 0.1251441753171857,
"grad_norm": 0.7724793553352356,
"learning_rate": 0.00019187993072926687,
"loss": 1.3278907537460327,
"step": 217
},
{
"epoch": 0.12572087658592848,
"grad_norm": 0.7415600419044495,
"learning_rate": 0.0001918414469886473,
"loss": 1.1893579959869385,
"step": 218
},
{
"epoch": 0.12629757785467127,
"grad_norm": 0.8178536295890808,
"learning_rate": 0.0001918029632480277,
"loss": 1.3486452102661133,
"step": 219
},
{
"epoch": 0.12687427912341406,
"grad_norm": 0.803683340549469,
"learning_rate": 0.00019176447950740812,
"loss": 1.297539234161377,
"step": 220
},
{
"epoch": 0.12745098039215685,
"grad_norm": 0.6226982474327087,
"learning_rate": 0.00019172599576678854,
"loss": 1.0952314138412476,
"step": 221
},
{
"epoch": 0.12802768166089964,
"grad_norm": 0.652317225933075,
"learning_rate": 0.00019168751202616895,
"loss": 0.9360387325286865,
"step": 222
},
{
"epoch": 0.12860438292964244,
"grad_norm": 0.8147749900817871,
"learning_rate": 0.00019164902828554937,
"loss": 1.0632787942886353,
"step": 223
},
{
"epoch": 0.12918108419838523,
"grad_norm": 0.9202223420143127,
"learning_rate": 0.00019161054454492978,
"loss": 1.3678290843963623,
"step": 224
},
{
"epoch": 0.12975778546712802,
"grad_norm": 1.1951165199279785,
"learning_rate": 0.00019157206080431017,
"loss": 1.2670767307281494,
"step": 225
},
{
"epoch": 0.1303344867358708,
"grad_norm": 0.7266793847084045,
"learning_rate": 0.00019153357706369058,
"loss": 1.1158084869384766,
"step": 226
},
{
"epoch": 0.1309111880046136,
"grad_norm": 0.6181395649909973,
"learning_rate": 0.000191495093323071,
"loss": 1.1156044006347656,
"step": 227
},
{
"epoch": 0.1314878892733564,
"grad_norm": 0.7921776175498962,
"learning_rate": 0.00019145660958245142,
"loss": 1.001752257347107,
"step": 228
},
{
"epoch": 0.13206459054209918,
"grad_norm": 0.5998401045799255,
"learning_rate": 0.00019141812584183183,
"loss": 0.7688826322555542,
"step": 229
},
{
"epoch": 0.13264129181084197,
"grad_norm": 0.7660285234451294,
"learning_rate": 0.00019137964210121225,
"loss": 1.2462745904922485,
"step": 230
},
{
"epoch": 0.13321799307958476,
"grad_norm": 0.7925796508789062,
"learning_rate": 0.00019134115836059266,
"loss": 1.1053651571273804,
"step": 231
},
{
"epoch": 0.13379469434832755,
"grad_norm": 0.6407649517059326,
"learning_rate": 0.00019130267461997308,
"loss": 0.8710946440696716,
"step": 232
},
{
"epoch": 0.13437139561707034,
"grad_norm": 0.7516645789146423,
"learning_rate": 0.00019126419087935346,
"loss": 1.009436011314392,
"step": 233
},
{
"epoch": 0.13494809688581316,
"grad_norm": 0.5998948216438293,
"learning_rate": 0.00019122570713873388,
"loss": 1.0309457778930664,
"step": 234
},
{
"epoch": 0.13552479815455595,
"grad_norm": 1.1897567510604858,
"learning_rate": 0.0001911872233981143,
"loss": 0.9930981397628784,
"step": 235
},
{
"epoch": 0.13610149942329874,
"grad_norm": 0.7404462695121765,
"learning_rate": 0.0001911487396574947,
"loss": 1.1489670276641846,
"step": 236
},
{
"epoch": 0.13667820069204153,
"grad_norm": 0.7168471813201904,
"learning_rate": 0.00019111025591687513,
"loss": 1.202157735824585,
"step": 237
},
{
"epoch": 0.13725490196078433,
"grad_norm": 0.7502639293670654,
"learning_rate": 0.00019107177217625554,
"loss": 1.022951364517212,
"step": 238
},
{
"epoch": 0.13783160322952712,
"grad_norm": 0.6795151233673096,
"learning_rate": 0.00019103328843563596,
"loss": 1.1194236278533936,
"step": 239
},
{
"epoch": 0.1384083044982699,
"grad_norm": 0.7620200514793396,
"learning_rate": 0.00019099480469501637,
"loss": 0.8411365747451782,
"step": 240
},
{
"epoch": 0.1389850057670127,
"grad_norm": 0.6618032455444336,
"learning_rate": 0.00019095632095439676,
"loss": 0.7801553606987,
"step": 241
},
{
"epoch": 0.1395617070357555,
"grad_norm": 0.9366044402122498,
"learning_rate": 0.00019091783721377718,
"loss": 1.0621672868728638,
"step": 242
},
{
"epoch": 0.14013840830449828,
"grad_norm": 1.0874788761138916,
"learning_rate": 0.0001908793534731576,
"loss": 1.6787068843841553,
"step": 243
},
{
"epoch": 0.14071510957324107,
"grad_norm": 0.8962084054946899,
"learning_rate": 0.000190840869732538,
"loss": 1.1922732591629028,
"step": 244
},
{
"epoch": 0.14129181084198386,
"grad_norm": 0.7039315700531006,
"learning_rate": 0.00019080238599191842,
"loss": 1.177897334098816,
"step": 245
},
{
"epoch": 0.14186851211072665,
"grad_norm": 0.9172819256782532,
"learning_rate": 0.00019076390225129884,
"loss": 1.3276829719543457,
"step": 246
},
{
"epoch": 0.14244521337946944,
"grad_norm": 1.002533197402954,
"learning_rate": 0.00019072541851067925,
"loss": 1.11848783493042,
"step": 247
},
{
"epoch": 0.14302191464821223,
"grad_norm": 0.9164738059043884,
"learning_rate": 0.00019068693477005967,
"loss": 0.7153259515762329,
"step": 248
},
{
"epoch": 0.14359861591695502,
"grad_norm": 0.7163867354393005,
"learning_rate": 0.00019064845102944006,
"loss": 1.206921100616455,
"step": 249
},
{
"epoch": 0.14417531718569782,
"grad_norm": 0.8200199604034424,
"learning_rate": 0.00019060996728882047,
"loss": 0.9798004031181335,
"step": 250
},
{
"epoch": 0.1447520184544406,
"grad_norm": 0.9806034564971924,
"learning_rate": 0.00019057148354820089,
"loss": 1.0969898700714111,
"step": 251
},
{
"epoch": 0.1453287197231834,
"grad_norm": 1.0849624872207642,
"learning_rate": 0.0001905329998075813,
"loss": 1.2618253231048584,
"step": 252
},
{
"epoch": 0.1459054209919262,
"grad_norm": 0.8736698031425476,
"learning_rate": 0.00019049451606696172,
"loss": 1.1534979343414307,
"step": 253
},
{
"epoch": 0.14648212226066898,
"grad_norm": 0.6748337745666504,
"learning_rate": 0.00019045603232634213,
"loss": 0.9178370237350464,
"step": 254
},
{
"epoch": 0.14705882352941177,
"grad_norm": 0.8655548691749573,
"learning_rate": 0.00019041754858572255,
"loss": 1.157179355621338,
"step": 255
},
{
"epoch": 0.14763552479815456,
"grad_norm": 0.7558174133300781,
"learning_rate": 0.00019037906484510296,
"loss": 0.7844438552856445,
"step": 256
},
{
"epoch": 0.14821222606689735,
"grad_norm": 0.8278117179870605,
"learning_rate": 0.00019034058110448335,
"loss": 1.4085724353790283,
"step": 257
},
{
"epoch": 0.14878892733564014,
"grad_norm": 0.9563509225845337,
"learning_rate": 0.00019030209736386377,
"loss": 1.244802713394165,
"step": 258
},
{
"epoch": 0.14936562860438293,
"grad_norm": 0.8018333315849304,
"learning_rate": 0.00019026361362324418,
"loss": 0.801522970199585,
"step": 259
},
{
"epoch": 0.14994232987312572,
"grad_norm": 0.555248498916626,
"learning_rate": 0.0001902251298826246,
"loss": 0.8989696502685547,
"step": 260
},
{
"epoch": 0.15051903114186851,
"grad_norm": 0.5092940926551819,
"learning_rate": 0.000190186646142005,
"loss": 0.8229849338531494,
"step": 261
},
{
"epoch": 0.1510957324106113,
"grad_norm": 0.614162266254425,
"learning_rate": 0.00019014816240138543,
"loss": 1.14143705368042,
"step": 262
},
{
"epoch": 0.1516724336793541,
"grad_norm": 0.7050411701202393,
"learning_rate": 0.00019010967866076584,
"loss": 1.2602849006652832,
"step": 263
},
{
"epoch": 0.1522491349480969,
"grad_norm": 0.8917875289916992,
"learning_rate": 0.00019007119492014626,
"loss": 1.2684617042541504,
"step": 264
},
{
"epoch": 0.15282583621683968,
"grad_norm": 0.7177139520645142,
"learning_rate": 0.00019003271117952665,
"loss": 0.664681077003479,
"step": 265
},
{
"epoch": 0.15340253748558247,
"grad_norm": 0.7513463497161865,
"learning_rate": 0.00018999422743890706,
"loss": 0.9689874649047852,
"step": 266
},
{
"epoch": 0.15397923875432526,
"grad_norm": 0.8350100517272949,
"learning_rate": 0.00018995574369828748,
"loss": 1.222740888595581,
"step": 267
},
{
"epoch": 0.15455594002306805,
"grad_norm": 1.152787685394287,
"learning_rate": 0.0001899172599576679,
"loss": 1.0707926750183105,
"step": 268
},
{
"epoch": 0.15513264129181084,
"grad_norm": 0.7810789346694946,
"learning_rate": 0.0001898787762170483,
"loss": 1.1552890539169312,
"step": 269
},
{
"epoch": 0.15570934256055363,
"grad_norm": 0.864863395690918,
"learning_rate": 0.00018984029247642872,
"loss": 1.2455859184265137,
"step": 270
},
{
"epoch": 0.15628604382929642,
"grad_norm": 0.578794002532959,
"learning_rate": 0.00018980180873580914,
"loss": 0.9284070730209351,
"step": 271
},
{
"epoch": 0.1568627450980392,
"grad_norm": 0.9245108962059021,
"learning_rate": 0.00018976332499518955,
"loss": 0.8936307430267334,
"step": 272
},
{
"epoch": 0.157439446366782,
"grad_norm": 1.022964358329773,
"learning_rate": 0.00018972484125456994,
"loss": 1.2052812576293945,
"step": 273
},
{
"epoch": 0.1580161476355248,
"grad_norm": 0.6136555075645447,
"learning_rate": 0.00018968635751395036,
"loss": 0.9395220875740051,
"step": 274
},
{
"epoch": 0.15859284890426759,
"grad_norm": 0.49354949593544006,
"learning_rate": 0.00018964787377333077,
"loss": 0.7979940176010132,
"step": 275
},
{
"epoch": 0.15916955017301038,
"grad_norm": 0.8118260502815247,
"learning_rate": 0.00018960939003271119,
"loss": 1.3310189247131348,
"step": 276
},
{
"epoch": 0.15974625144175317,
"grad_norm": 0.7864040732383728,
"learning_rate": 0.0001895709062920916,
"loss": 0.995107889175415,
"step": 277
},
{
"epoch": 0.16032295271049596,
"grad_norm": 0.7795019149780273,
"learning_rate": 0.00018953242255147202,
"loss": 1.031097412109375,
"step": 278
},
{
"epoch": 0.16089965397923875,
"grad_norm": 0.7358199954032898,
"learning_rate": 0.00018949393881085243,
"loss": 1.2151832580566406,
"step": 279
},
{
"epoch": 0.16147635524798154,
"grad_norm": 0.592187225818634,
"learning_rate": 0.00018945545507023285,
"loss": 1.18082857131958,
"step": 280
},
{
"epoch": 0.16205305651672433,
"grad_norm": 0.6349275708198547,
"learning_rate": 0.00018941697132961324,
"loss": 1.0011241436004639,
"step": 281
},
{
"epoch": 0.16262975778546712,
"grad_norm": 0.827673614025116,
"learning_rate": 0.00018937848758899365,
"loss": 1.1634137630462646,
"step": 282
},
{
"epoch": 0.1632064590542099,
"grad_norm": 0.7459465861320496,
"learning_rate": 0.00018934000384837407,
"loss": 1.2054771184921265,
"step": 283
},
{
"epoch": 0.1637831603229527,
"grad_norm": 0.8688679337501526,
"learning_rate": 0.00018930152010775448,
"loss": 1.5523681640625,
"step": 284
},
{
"epoch": 0.1643598615916955,
"grad_norm": 0.5501953959465027,
"learning_rate": 0.0001892630363671349,
"loss": 0.8807846903800964,
"step": 285
},
{
"epoch": 0.16493656286043828,
"grad_norm": 0.9370623230934143,
"learning_rate": 0.0001892245526265153,
"loss": 1.480832815170288,
"step": 286
},
{
"epoch": 0.16551326412918108,
"grad_norm": 0.824664831161499,
"learning_rate": 0.00018918606888589573,
"loss": 1.1490377187728882,
"step": 287
},
{
"epoch": 0.16608996539792387,
"grad_norm": 0.6960827708244324,
"learning_rate": 0.00018914758514527614,
"loss": 0.9883493185043335,
"step": 288
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.5384089946746826,
"learning_rate": 0.00018910910140465653,
"loss": 0.9772455096244812,
"step": 289
},
{
"epoch": 0.16724336793540945,
"grad_norm": 0.5826528072357178,
"learning_rate": 0.00018907061766403695,
"loss": 0.80659019947052,
"step": 290
},
{
"epoch": 0.16782006920415224,
"grad_norm": 0.8662609457969666,
"learning_rate": 0.00018903213392341736,
"loss": 1.438920497894287,
"step": 291
},
{
"epoch": 0.16839677047289503,
"grad_norm": 0.8694437742233276,
"learning_rate": 0.00018899365018279778,
"loss": 1.594082236289978,
"step": 292
},
{
"epoch": 0.16897347174163782,
"grad_norm": 0.9895355701446533,
"learning_rate": 0.0001889551664421782,
"loss": 1.1623947620391846,
"step": 293
},
{
"epoch": 0.1695501730103806,
"grad_norm": 0.7757118940353394,
"learning_rate": 0.0001889166827015586,
"loss": 1.2969348430633545,
"step": 294
},
{
"epoch": 0.1701268742791234,
"grad_norm": 1.1235777139663696,
"learning_rate": 0.00018887819896093902,
"loss": 1.5447598695755005,
"step": 295
},
{
"epoch": 0.1707035755478662,
"grad_norm": 0.5995392799377441,
"learning_rate": 0.00018883971522031944,
"loss": 1.1860620975494385,
"step": 296
},
{
"epoch": 0.17128027681660898,
"grad_norm": 0.7350177764892578,
"learning_rate": 0.00018880123147969983,
"loss": 1.1964070796966553,
"step": 297
},
{
"epoch": 0.17185697808535177,
"grad_norm": 0.7769676446914673,
"learning_rate": 0.00018876274773908024,
"loss": 0.9732775688171387,
"step": 298
},
{
"epoch": 0.17243367935409457,
"grad_norm": 1.0317054986953735,
"learning_rate": 0.00018872426399846066,
"loss": 1.1931625604629517,
"step": 299
},
{
"epoch": 0.17301038062283736,
"grad_norm": 0.855571985244751,
"learning_rate": 0.00018868578025784107,
"loss": 1.2726032733917236,
"step": 300
},
{
"epoch": 0.17358708189158017,
"grad_norm": 1.0038337707519531,
"learning_rate": 0.0001886472965172215,
"loss": 1.3021737337112427,
"step": 301
},
{
"epoch": 0.17416378316032297,
"grad_norm": 1.05097496509552,
"learning_rate": 0.0001886088127766019,
"loss": 1.6369917392730713,
"step": 302
},
{
"epoch": 0.17474048442906576,
"grad_norm": 0.6620575189590454,
"learning_rate": 0.00018857032903598232,
"loss": 1.0873693227767944,
"step": 303
},
{
"epoch": 0.17531718569780855,
"grad_norm": 0.8430469036102295,
"learning_rate": 0.0001885318452953627,
"loss": 1.1750123500823975,
"step": 304
},
{
"epoch": 0.17589388696655134,
"grad_norm": 0.8181238174438477,
"learning_rate": 0.00018849336155474312,
"loss": 1.3522461652755737,
"step": 305
},
{
"epoch": 0.17647058823529413,
"grad_norm": 0.6994307041168213,
"learning_rate": 0.00018845487781412354,
"loss": 1.327797293663025,
"step": 306
},
{
"epoch": 0.17704728950403692,
"grad_norm": 0.7090145349502563,
"learning_rate": 0.00018841639407350395,
"loss": 1.3075491189956665,
"step": 307
},
{
"epoch": 0.1776239907727797,
"grad_norm": 0.7612029314041138,
"learning_rate": 0.00018837791033288437,
"loss": 1.0585792064666748,
"step": 308
},
{
"epoch": 0.1782006920415225,
"grad_norm": 0.8592241406440735,
"learning_rate": 0.00018833942659226478,
"loss": 0.6441008448600769,
"step": 309
},
{
"epoch": 0.1787773933102653,
"grad_norm": 1.0303255319595337,
"learning_rate": 0.0001883009428516452,
"loss": 1.520599365234375,
"step": 310
},
{
"epoch": 0.17935409457900808,
"grad_norm": 0.80874103307724,
"learning_rate": 0.0001882624591110256,
"loss": 0.902335524559021,
"step": 311
},
{
"epoch": 0.17993079584775087,
"grad_norm": 0.7039778232574463,
"learning_rate": 0.000188223975370406,
"loss": 1.0226070880889893,
"step": 312
},
{
"epoch": 0.18050749711649366,
"grad_norm": 0.7102690935134888,
"learning_rate": 0.00018818549162978642,
"loss": 1.0590555667877197,
"step": 313
},
{
"epoch": 0.18108419838523646,
"grad_norm": 1.0405141115188599,
"learning_rate": 0.00018814700788916683,
"loss": 1.4237335920333862,
"step": 314
},
{
"epoch": 0.18166089965397925,
"grad_norm": 0.6633170247077942,
"learning_rate": 0.00018810852414854725,
"loss": 0.9277420043945312,
"step": 315
},
{
"epoch": 0.18223760092272204,
"grad_norm": 0.6740328073501587,
"learning_rate": 0.00018807004040792766,
"loss": 1.053580403327942,
"step": 316
},
{
"epoch": 0.18281430219146483,
"grad_norm": 0.6842854619026184,
"learning_rate": 0.00018803155666730808,
"loss": 1.0379540920257568,
"step": 317
},
{
"epoch": 0.18339100346020762,
"grad_norm": 0.6766674518585205,
"learning_rate": 0.0001879930729266885,
"loss": 0.9214432835578918,
"step": 318
},
{
"epoch": 0.1839677047289504,
"grad_norm": 0.8358355164527893,
"learning_rate": 0.0001879545891860689,
"loss": 1.069684624671936,
"step": 319
},
{
"epoch": 0.1845444059976932,
"grad_norm": 0.9044516086578369,
"learning_rate": 0.0001879161054454493,
"loss": 1.4757916927337646,
"step": 320
},
{
"epoch": 0.185121107266436,
"grad_norm": 0.9662521481513977,
"learning_rate": 0.0001878776217048297,
"loss": 1.3449480533599854,
"step": 321
},
{
"epoch": 0.18569780853517878,
"grad_norm": 0.8681714534759521,
"learning_rate": 0.00018783913796421013,
"loss": 1.2057011127471924,
"step": 322
},
{
"epoch": 0.18627450980392157,
"grad_norm": 0.7318335175514221,
"learning_rate": 0.00018780065422359054,
"loss": 1.276970386505127,
"step": 323
},
{
"epoch": 0.18685121107266436,
"grad_norm": 0.798865556716919,
"learning_rate": 0.00018776217048297096,
"loss": 1.1334099769592285,
"step": 324
},
{
"epoch": 0.18742791234140715,
"grad_norm": 0.6787270903587341,
"learning_rate": 0.00018772368674235137,
"loss": 1.0829839706420898,
"step": 325
},
{
"epoch": 0.18800461361014995,
"grad_norm": 0.705894947052002,
"learning_rate": 0.0001876852030017318,
"loss": 1.3146710395812988,
"step": 326
},
{
"epoch": 0.18858131487889274,
"grad_norm": 0.7403978705406189,
"learning_rate": 0.0001876467192611122,
"loss": 0.7811852693557739,
"step": 327
},
{
"epoch": 0.18915801614763553,
"grad_norm": 0.8138331770896912,
"learning_rate": 0.0001876082355204926,
"loss": 1.3800559043884277,
"step": 328
},
{
"epoch": 0.18973471741637832,
"grad_norm": 1.0053505897521973,
"learning_rate": 0.000187569751779873,
"loss": 1.502892017364502,
"step": 329
},
{
"epoch": 0.1903114186851211,
"grad_norm": 1.2905986309051514,
"learning_rate": 0.00018753126803925342,
"loss": 1.6044906377792358,
"step": 330
},
{
"epoch": 0.1908881199538639,
"grad_norm": 0.7266846299171448,
"learning_rate": 0.00018749278429863384,
"loss": 0.8269582390785217,
"step": 331
},
{
"epoch": 0.1914648212226067,
"grad_norm": 0.9892683029174805,
"learning_rate": 0.00018745430055801425,
"loss": 1.2374012470245361,
"step": 332
},
{
"epoch": 0.19204152249134948,
"grad_norm": 0.8026344180107117,
"learning_rate": 0.00018741581681739467,
"loss": 0.9166598916053772,
"step": 333
},
{
"epoch": 0.19261822376009227,
"grad_norm": 0.7790790796279907,
"learning_rate": 0.00018737733307677508,
"loss": 0.8837241530418396,
"step": 334
},
{
"epoch": 0.19319492502883506,
"grad_norm": 0.8625907897949219,
"learning_rate": 0.0001873388493361555,
"loss": 1.0963804721832275,
"step": 335
},
{
"epoch": 0.19377162629757785,
"grad_norm": 0.8408490419387817,
"learning_rate": 0.00018730036559553589,
"loss": 1.2887423038482666,
"step": 336
},
{
"epoch": 0.19434832756632064,
"grad_norm": 0.8141940236091614,
"learning_rate": 0.0001872618818549163,
"loss": 1.234419584274292,
"step": 337
},
{
"epoch": 0.19492502883506344,
"grad_norm": 0.7913158535957336,
"learning_rate": 0.00018722339811429672,
"loss": 0.8931217193603516,
"step": 338
},
{
"epoch": 0.19550173010380623,
"grad_norm": 0.9377291202545166,
"learning_rate": 0.00018718491437367713,
"loss": 1.1958264112472534,
"step": 339
},
{
"epoch": 0.19607843137254902,
"grad_norm": 1.1096664667129517,
"learning_rate": 0.00018714643063305755,
"loss": 0.871677041053772,
"step": 340
},
{
"epoch": 0.1966551326412918,
"grad_norm": 0.7379001379013062,
"learning_rate": 0.00018710794689243796,
"loss": 0.9309886693954468,
"step": 341
},
{
"epoch": 0.1972318339100346,
"grad_norm": 0.738572895526886,
"learning_rate": 0.00018706946315181838,
"loss": 1.065298080444336,
"step": 342
},
{
"epoch": 0.1978085351787774,
"grad_norm": 0.8279491066932678,
"learning_rate": 0.0001870309794111988,
"loss": 1.0682514905929565,
"step": 343
},
{
"epoch": 0.19838523644752018,
"grad_norm": 0.9108213782310486,
"learning_rate": 0.00018699249567057918,
"loss": 1.2043181657791138,
"step": 344
},
{
"epoch": 0.19896193771626297,
"grad_norm": 0.9347065687179565,
"learning_rate": 0.0001869540119299596,
"loss": 1.5744340419769287,
"step": 345
},
{
"epoch": 0.19953863898500576,
"grad_norm": 0.5783383250236511,
"learning_rate": 0.00018691552818934,
"loss": 0.7808327674865723,
"step": 346
},
{
"epoch": 0.20011534025374855,
"grad_norm": 0.661321759223938,
"learning_rate": 0.00018687704444872043,
"loss": 0.9458237290382385,
"step": 347
},
{
"epoch": 0.20069204152249134,
"grad_norm": 0.5592895746231079,
"learning_rate": 0.00018683856070810084,
"loss": 0.8761368989944458,
"step": 348
},
{
"epoch": 0.20126874279123413,
"grad_norm": 0.6626494526863098,
"learning_rate": 0.00018680007696748126,
"loss": 0.9110841751098633,
"step": 349
},
{
"epoch": 0.20184544405997693,
"grad_norm": 0.8392354249954224,
"learning_rate": 0.00018676159322686167,
"loss": 1.234721302986145,
"step": 350
},
{
"epoch": 0.20242214532871972,
"grad_norm": 0.5596436262130737,
"learning_rate": 0.0001867231094862421,
"loss": 0.837221622467041,
"step": 351
},
{
"epoch": 0.2029988465974625,
"grad_norm": 0.5023308992385864,
"learning_rate": 0.00018668462574562248,
"loss": 0.7079763412475586,
"step": 352
},
{
"epoch": 0.2035755478662053,
"grad_norm": 0.7946610450744629,
"learning_rate": 0.0001866461420050029,
"loss": 1.3043620586395264,
"step": 353
},
{
"epoch": 0.2041522491349481,
"grad_norm": 0.8124772310256958,
"learning_rate": 0.0001866076582643833,
"loss": 1.1276662349700928,
"step": 354
},
{
"epoch": 0.20472895040369088,
"grad_norm": 0.5195242166519165,
"learning_rate": 0.00018656917452376372,
"loss": 0.737315833568573,
"step": 355
},
{
"epoch": 0.20530565167243367,
"grad_norm": 0.7146646976470947,
"learning_rate": 0.00018653069078314414,
"loss": 1.0838680267333984,
"step": 356
},
{
"epoch": 0.20588235294117646,
"grad_norm": 0.7928506135940552,
"learning_rate": 0.00018649220704252455,
"loss": 1.2697861194610596,
"step": 357
},
{
"epoch": 0.20645905420991925,
"grad_norm": 0.6152468919754028,
"learning_rate": 0.00018645372330190497,
"loss": 0.9355758428573608,
"step": 358
},
{
"epoch": 0.20703575547866204,
"grad_norm": 1.0809266567230225,
"learning_rate": 0.00018641523956128538,
"loss": 1.9420266151428223,
"step": 359
},
{
"epoch": 0.20761245674740483,
"grad_norm": 0.59016352891922,
"learning_rate": 0.00018637675582066577,
"loss": 0.9944459199905396,
"step": 360
},
{
"epoch": 0.20818915801614762,
"grad_norm": 0.7870339751243591,
"learning_rate": 0.0001863382720800462,
"loss": 1.0614302158355713,
"step": 361
},
{
"epoch": 0.20876585928489041,
"grad_norm": 0.7203708291053772,
"learning_rate": 0.0001862997883394266,
"loss": 0.9602723717689514,
"step": 362
},
{
"epoch": 0.2093425605536332,
"grad_norm": 0.532341480255127,
"learning_rate": 0.00018626130459880702,
"loss": 0.8718068599700928,
"step": 363
},
{
"epoch": 0.209919261822376,
"grad_norm": 0.9565883278846741,
"learning_rate": 0.00018622282085818743,
"loss": 1.278198480606079,
"step": 364
},
{
"epoch": 0.2104959630911188,
"grad_norm": 0.7197461724281311,
"learning_rate": 0.00018618433711756785,
"loss": 1.3148860931396484,
"step": 365
},
{
"epoch": 0.21107266435986158,
"grad_norm": 0.6119058728218079,
"learning_rate": 0.00018614585337694826,
"loss": 0.9266935586929321,
"step": 366
},
{
"epoch": 0.2116493656286044,
"grad_norm": 0.9047015309333801,
"learning_rate": 0.00018610736963632868,
"loss": 1.1473264694213867,
"step": 367
},
{
"epoch": 0.2122260668973472,
"grad_norm": 0.6796925663948059,
"learning_rate": 0.00018606888589570907,
"loss": 1.0393201112747192,
"step": 368
},
{
"epoch": 0.21280276816608998,
"grad_norm": 0.6059300303459167,
"learning_rate": 0.00018603040215508948,
"loss": 1.001380443572998,
"step": 369
},
{
"epoch": 0.21337946943483277,
"grad_norm": 0.6669148206710815,
"learning_rate": 0.0001859919184144699,
"loss": 0.8133573532104492,
"step": 370
},
{
"epoch": 0.21395617070357556,
"grad_norm": 0.6025424003601074,
"learning_rate": 0.0001859534346738503,
"loss": 0.9277598261833191,
"step": 371
},
{
"epoch": 0.21453287197231835,
"grad_norm": 0.8728757500648499,
"learning_rate": 0.00018591495093323073,
"loss": 1.1496421098709106,
"step": 372
},
{
"epoch": 0.21510957324106114,
"grad_norm": 0.587089478969574,
"learning_rate": 0.00018587646719261114,
"loss": 0.8672431707382202,
"step": 373
},
{
"epoch": 0.21568627450980393,
"grad_norm": 0.7482187747955322,
"learning_rate": 0.00018583798345199156,
"loss": 1.0713750123977661,
"step": 374
},
{
"epoch": 0.21626297577854672,
"grad_norm": 0.8591217398643494,
"learning_rate": 0.00018579949971137197,
"loss": 1.4045636653900146,
"step": 375
},
{
"epoch": 0.21683967704728951,
"grad_norm": 0.7630711793899536,
"learning_rate": 0.00018576101597075236,
"loss": 0.9842856526374817,
"step": 376
},
{
"epoch": 0.2174163783160323,
"grad_norm": 1.2762526273727417,
"learning_rate": 0.00018572253223013278,
"loss": 1.5381450653076172,
"step": 377
},
{
"epoch": 0.2179930795847751,
"grad_norm": 0.7234092950820923,
"learning_rate": 0.0001856840484895132,
"loss": 1.0782972574234009,
"step": 378
},
{
"epoch": 0.2185697808535179,
"grad_norm": 0.8868815898895264,
"learning_rate": 0.0001856455647488936,
"loss": 0.9910011291503906,
"step": 379
},
{
"epoch": 0.21914648212226068,
"grad_norm": 0.5880477428436279,
"learning_rate": 0.00018560708100827402,
"loss": 0.9178383946418762,
"step": 380
},
{
"epoch": 0.21972318339100347,
"grad_norm": 0.7115210294723511,
"learning_rate": 0.00018556859726765444,
"loss": 1.3695993423461914,
"step": 381
},
{
"epoch": 0.22029988465974626,
"grad_norm": 0.9036445617675781,
"learning_rate": 0.00018553011352703485,
"loss": 1.049261212348938,
"step": 382
},
{
"epoch": 0.22087658592848905,
"grad_norm": 1.044411540031433,
"learning_rate": 0.00018549162978641527,
"loss": 1.272240400314331,
"step": 383
},
{
"epoch": 0.22145328719723184,
"grad_norm": 0.6363574862480164,
"learning_rate": 0.00018545314604579566,
"loss": 1.0237360000610352,
"step": 384
},
{
"epoch": 0.22202998846597463,
"grad_norm": 0.7671105861663818,
"learning_rate": 0.00018541466230517607,
"loss": 0.9970401525497437,
"step": 385
},
{
"epoch": 0.22260668973471742,
"grad_norm": 1.170229434967041,
"learning_rate": 0.0001853761785645565,
"loss": 1.5654575824737549,
"step": 386
},
{
"epoch": 0.2231833910034602,
"grad_norm": 0.9486715793609619,
"learning_rate": 0.0001853376948239369,
"loss": 1.8445625305175781,
"step": 387
},
{
"epoch": 0.223760092272203,
"grad_norm": 0.7049561142921448,
"learning_rate": 0.00018529921108331732,
"loss": 1.147915005683899,
"step": 388
},
{
"epoch": 0.2243367935409458,
"grad_norm": 0.7626886963844299,
"learning_rate": 0.00018526072734269773,
"loss": 0.9354770183563232,
"step": 389
},
{
"epoch": 0.22491349480968859,
"grad_norm": 0.8018368482589722,
"learning_rate": 0.00018522224360207815,
"loss": 1.0617220401763916,
"step": 390
},
{
"epoch": 0.22549019607843138,
"grad_norm": 0.7590807676315308,
"learning_rate": 0.00018518375986145854,
"loss": 0.9120303988456726,
"step": 391
},
{
"epoch": 0.22606689734717417,
"grad_norm": 0.6623148918151855,
"learning_rate": 0.00018514527612083895,
"loss": 0.7569756507873535,
"step": 392
},
{
"epoch": 0.22664359861591696,
"grad_norm": 0.5547282099723816,
"learning_rate": 0.00018510679238021937,
"loss": 0.7989190816879272,
"step": 393
},
{
"epoch": 0.22722029988465975,
"grad_norm": 0.5765286087989807,
"learning_rate": 0.00018506830863959978,
"loss": 0.6133571863174438,
"step": 394
},
{
"epoch": 0.22779700115340254,
"grad_norm": 0.8331816792488098,
"learning_rate": 0.0001850298248989802,
"loss": 1.1577847003936768,
"step": 395
},
{
"epoch": 0.22837370242214533,
"grad_norm": 0.7655069231987,
"learning_rate": 0.0001849913411583606,
"loss": 1.0809553861618042,
"step": 396
},
{
"epoch": 0.22895040369088812,
"grad_norm": 0.7397854924201965,
"learning_rate": 0.00018495285741774103,
"loss": 0.9830250144004822,
"step": 397
},
{
"epoch": 0.2295271049596309,
"grad_norm": 0.6970857381820679,
"learning_rate": 0.00018491437367712144,
"loss": 0.8101853132247925,
"step": 398
},
{
"epoch": 0.2301038062283737,
"grad_norm": 0.5724602937698364,
"learning_rate": 0.00018487588993650183,
"loss": 0.70196932554245,
"step": 399
},
{
"epoch": 0.2306805074971165,
"grad_norm": 0.9593637585639954,
"learning_rate": 0.00018483740619588225,
"loss": 0.9378552436828613,
"step": 400
},
{
"epoch": 0.23125720876585928,
"grad_norm": 0.7079650163650513,
"learning_rate": 0.00018479892245526266,
"loss": 0.8764985799789429,
"step": 401
},
{
"epoch": 0.23183391003460208,
"grad_norm": 0.7374391555786133,
"learning_rate": 0.00018476043871464308,
"loss": 0.8556146025657654,
"step": 402
},
{
"epoch": 0.23241061130334487,
"grad_norm": 0.6992713809013367,
"learning_rate": 0.0001847219549740235,
"loss": 0.9657334089279175,
"step": 403
},
{
"epoch": 0.23298731257208766,
"grad_norm": 0.8299751281738281,
"learning_rate": 0.0001846834712334039,
"loss": 1.2171483039855957,
"step": 404
},
{
"epoch": 0.23356401384083045,
"grad_norm": 0.5866743922233582,
"learning_rate": 0.00018464498749278432,
"loss": 0.9809523820877075,
"step": 405
},
{
"epoch": 0.23414071510957324,
"grad_norm": 0.8412980437278748,
"learning_rate": 0.00018460650375216474,
"loss": 1.1848514080047607,
"step": 406
},
{
"epoch": 0.23471741637831603,
"grad_norm": 0.7566470503807068,
"learning_rate": 0.00018456802001154513,
"loss": 1.0939483642578125,
"step": 407
},
{
"epoch": 0.23529411764705882,
"grad_norm": 0.787800669670105,
"learning_rate": 0.00018452953627092554,
"loss": 1.2347867488861084,
"step": 408
},
{
"epoch": 0.2358708189158016,
"grad_norm": 0.8511201739311218,
"learning_rate": 0.00018449105253030596,
"loss": 0.9385696053504944,
"step": 409
},
{
"epoch": 0.2364475201845444,
"grad_norm": 0.9360937476158142,
"learning_rate": 0.00018445256878968637,
"loss": 1.3519483804702759,
"step": 410
},
{
"epoch": 0.2370242214532872,
"grad_norm": 0.556093692779541,
"learning_rate": 0.0001844140850490668,
"loss": 0.8482391238212585,
"step": 411
},
{
"epoch": 0.23760092272202998,
"grad_norm": 0.6390929818153381,
"learning_rate": 0.0001843756013084472,
"loss": 1.0374037027359009,
"step": 412
},
{
"epoch": 0.23817762399077277,
"grad_norm": 0.5385326743125916,
"learning_rate": 0.00018433711756782762,
"loss": 0.8951395750045776,
"step": 413
},
{
"epoch": 0.23875432525951557,
"grad_norm": 0.7417898774147034,
"learning_rate": 0.00018429863382720803,
"loss": 1.1854356527328491,
"step": 414
},
{
"epoch": 0.23933102652825836,
"grad_norm": 0.7092972993850708,
"learning_rate": 0.00018426015008658842,
"loss": 1.2556312084197998,
"step": 415
},
{
"epoch": 0.23990772779700115,
"grad_norm": 0.6026037931442261,
"learning_rate": 0.00018422166634596884,
"loss": 0.8205006718635559,
"step": 416
},
{
"epoch": 0.24048442906574394,
"grad_norm": 0.7460249662399292,
"learning_rate": 0.00018418318260534925,
"loss": 0.9955434203147888,
"step": 417
},
{
"epoch": 0.24106113033448673,
"grad_norm": 0.6313579082489014,
"learning_rate": 0.00018414469886472967,
"loss": 1.15024995803833,
"step": 418
},
{
"epoch": 0.24163783160322952,
"grad_norm": 0.7596423029899597,
"learning_rate": 0.00018410621512411008,
"loss": 1.196816325187683,
"step": 419
},
{
"epoch": 0.2422145328719723,
"grad_norm": 0.7336683869361877,
"learning_rate": 0.0001840677313834905,
"loss": 1.0791605710983276,
"step": 420
},
{
"epoch": 0.2427912341407151,
"grad_norm": 0.6802041530609131,
"learning_rate": 0.00018402924764287091,
"loss": 0.8439788222312927,
"step": 421
},
{
"epoch": 0.2433679354094579,
"grad_norm": 0.9311268329620361,
"learning_rate": 0.00018399076390225133,
"loss": 1.4188232421875,
"step": 422
},
{
"epoch": 0.24394463667820068,
"grad_norm": 0.9715989232063293,
"learning_rate": 0.00018395228016163172,
"loss": 1.149898648262024,
"step": 423
},
{
"epoch": 0.24452133794694347,
"grad_norm": 0.6722977161407471,
"learning_rate": 0.00018391379642101213,
"loss": 1.0626373291015625,
"step": 424
},
{
"epoch": 0.24509803921568626,
"grad_norm": 0.9417729377746582,
"learning_rate": 0.00018387531268039255,
"loss": 1.277899980545044,
"step": 425
},
{
"epoch": 0.24567474048442905,
"grad_norm": 0.8700136542320251,
"learning_rate": 0.00018383682893977296,
"loss": 1.106884479522705,
"step": 426
},
{
"epoch": 0.24625144175317185,
"grad_norm": 0.71380615234375,
"learning_rate": 0.00018379834519915338,
"loss": 1.1928266286849976,
"step": 427
},
{
"epoch": 0.24682814302191464,
"grad_norm": 0.7276275157928467,
"learning_rate": 0.0001837598614585338,
"loss": 1.2448585033416748,
"step": 428
},
{
"epoch": 0.24740484429065743,
"grad_norm": 0.8795212507247925,
"learning_rate": 0.0001837213777179142,
"loss": 1.317166805267334,
"step": 429
},
{
"epoch": 0.24798154555940022,
"grad_norm": 0.9904524087905884,
"learning_rate": 0.00018368289397729462,
"loss": 1.166348934173584,
"step": 430
},
{
"epoch": 0.248558246828143,
"grad_norm": 0.7632173299789429,
"learning_rate": 0.000183644410236675,
"loss": 1.5664170980453491,
"step": 431
},
{
"epoch": 0.2491349480968858,
"grad_norm": 0.8291054964065552,
"learning_rate": 0.00018360592649605543,
"loss": 1.4953291416168213,
"step": 432
},
{
"epoch": 0.2497116493656286,
"grad_norm": 0.6445023417472839,
"learning_rate": 0.00018356744275543584,
"loss": 0.8673335313796997,
"step": 433
},
{
"epoch": 0.2502883506343714,
"grad_norm": 1.2072186470031738,
"learning_rate": 0.00018352895901481626,
"loss": 1.59421968460083,
"step": 434
},
{
"epoch": 0.2508650519031142,
"grad_norm": 0.7409680485725403,
"learning_rate": 0.00018349047527419667,
"loss": 1.0224432945251465,
"step": 435
},
{
"epoch": 0.25144175317185696,
"grad_norm": 0.8207524418830872,
"learning_rate": 0.0001834519915335771,
"loss": 1.276658058166504,
"step": 436
},
{
"epoch": 0.2520184544405998,
"grad_norm": 0.8591949343681335,
"learning_rate": 0.0001834135077929575,
"loss": 1.1319093704223633,
"step": 437
},
{
"epoch": 0.25259515570934254,
"grad_norm": 0.6689372658729553,
"learning_rate": 0.00018337502405233792,
"loss": 0.9691576361656189,
"step": 438
},
{
"epoch": 0.25317185697808536,
"grad_norm": 0.9033296704292297,
"learning_rate": 0.0001833365403117183,
"loss": 1.4272680282592773,
"step": 439
},
{
"epoch": 0.2537485582468281,
"grad_norm": 0.6959604620933533,
"learning_rate": 0.0001832980565710987,
"loss": 1.1449182033538818,
"step": 440
},
{
"epoch": 0.25432525951557095,
"grad_norm": 0.6695550680160522,
"learning_rate": 0.0001832595728304791,
"loss": 1.0492792129516602,
"step": 441
},
{
"epoch": 0.2549019607843137,
"grad_norm": 0.710794985294342,
"learning_rate": 0.00018322108908985953,
"loss": 0.9534090757369995,
"step": 442
},
{
"epoch": 0.2554786620530565,
"grad_norm": 0.6955594420433044,
"learning_rate": 0.00018318260534923994,
"loss": 0.8743690252304077,
"step": 443
},
{
"epoch": 0.2560553633217993,
"grad_norm": 0.6831961274147034,
"learning_rate": 0.00018314412160862036,
"loss": 1.3500818014144897,
"step": 444
},
{
"epoch": 0.2566320645905421,
"grad_norm": 0.7839577198028564,
"learning_rate": 0.00018310563786800077,
"loss": 1.0105950832366943,
"step": 445
},
{
"epoch": 0.25720876585928487,
"grad_norm": 0.8791704773902893,
"learning_rate": 0.0001830671541273812,
"loss": 1.2243623733520508,
"step": 446
},
{
"epoch": 0.2577854671280277,
"grad_norm": 0.7005860209465027,
"learning_rate": 0.0001830286703867616,
"loss": 1.077842354774475,
"step": 447
},
{
"epoch": 0.25836216839677045,
"grad_norm": 0.822964072227478,
"learning_rate": 0.000182990186646142,
"loss": 1.2265344858169556,
"step": 448
},
{
"epoch": 0.25893886966551327,
"grad_norm": 0.773158609867096,
"learning_rate": 0.0001829517029055224,
"loss": 0.8715431690216064,
"step": 449
},
{
"epoch": 0.25951557093425603,
"grad_norm": 0.8603456616401672,
"learning_rate": 0.00018291321916490282,
"loss": 0.9889146089553833,
"step": 450
},
{
"epoch": 0.26009227220299885,
"grad_norm": 0.8188443779945374,
"learning_rate": 0.00018287473542428324,
"loss": 0.8885264992713928,
"step": 451
},
{
"epoch": 0.2606689734717416,
"grad_norm": 1.0877407789230347,
"learning_rate": 0.00018283625168366365,
"loss": 1.0748121738433838,
"step": 452
},
{
"epoch": 0.26124567474048443,
"grad_norm": 0.5481402277946472,
"learning_rate": 0.00018279776794304407,
"loss": 0.807957649230957,
"step": 453
},
{
"epoch": 0.2618223760092272,
"grad_norm": 0.8591419458389282,
"learning_rate": 0.00018275928420242448,
"loss": 1.3057336807250977,
"step": 454
},
{
"epoch": 0.26239907727797,
"grad_norm": 0.7936019897460938,
"learning_rate": 0.0001827208004618049,
"loss": 1.185962200164795,
"step": 455
},
{
"epoch": 0.2629757785467128,
"grad_norm": 0.6581904888153076,
"learning_rate": 0.00018268231672118529,
"loss": 0.8275895118713379,
"step": 456
},
{
"epoch": 0.2635524798154556,
"grad_norm": 0.831302285194397,
"learning_rate": 0.0001826438329805657,
"loss": 1.299217939376831,
"step": 457
},
{
"epoch": 0.26412918108419836,
"grad_norm": 0.6771467924118042,
"learning_rate": 0.00018260534923994612,
"loss": 0.8427085876464844,
"step": 458
},
{
"epoch": 0.2647058823529412,
"grad_norm": 0.7914313077926636,
"learning_rate": 0.00018256686549932653,
"loss": 1.369484305381775,
"step": 459
},
{
"epoch": 0.26528258362168394,
"grad_norm": 0.5916578769683838,
"learning_rate": 0.00018252838175870695,
"loss": 0.6241229772567749,
"step": 460
},
{
"epoch": 0.26585928489042676,
"grad_norm": 0.6836418509483337,
"learning_rate": 0.00018248989801808736,
"loss": 0.8050651550292969,
"step": 461
},
{
"epoch": 0.2664359861591695,
"grad_norm": 0.7545502185821533,
"learning_rate": 0.00018245141427746778,
"loss": 0.7911585569381714,
"step": 462
},
{
"epoch": 0.26701268742791234,
"grad_norm": 0.6010773181915283,
"learning_rate": 0.0001824129305368482,
"loss": 1.1161192655563354,
"step": 463
},
{
"epoch": 0.2675893886966551,
"grad_norm": 0.813204824924469,
"learning_rate": 0.00018237444679622858,
"loss": 1.096695065498352,
"step": 464
},
{
"epoch": 0.2681660899653979,
"grad_norm": 0.91140216588974,
"learning_rate": 0.000182335963055609,
"loss": 1.4385195970535278,
"step": 465
},
{
"epoch": 0.2687427912341407,
"grad_norm": 0.9745720624923706,
"learning_rate": 0.0001822974793149894,
"loss": 1.3157883882522583,
"step": 466
},
{
"epoch": 0.2693194925028835,
"grad_norm": 0.4999851584434509,
"learning_rate": 0.00018225899557436983,
"loss": 0.6729867458343506,
"step": 467
},
{
"epoch": 0.2698961937716263,
"grad_norm": 0.9021291732788086,
"learning_rate": 0.00018222051183375024,
"loss": 1.0553233623504639,
"step": 468
},
{
"epoch": 0.2704728950403691,
"grad_norm": 0.8061716556549072,
"learning_rate": 0.00018218202809313066,
"loss": 1.3081198930740356,
"step": 469
},
{
"epoch": 0.2710495963091119,
"grad_norm": 0.6820981502532959,
"learning_rate": 0.00018214354435251107,
"loss": 0.9388906359672546,
"step": 470
},
{
"epoch": 0.27162629757785467,
"grad_norm": 1.0991320610046387,
"learning_rate": 0.0001821050606118915,
"loss": 1.528028964996338,
"step": 471
},
{
"epoch": 0.2722029988465975,
"grad_norm": 0.7934592962265015,
"learning_rate": 0.00018206657687127188,
"loss": 1.2054097652435303,
"step": 472
},
{
"epoch": 0.27277970011534025,
"grad_norm": 0.7113450765609741,
"learning_rate": 0.0001820280931306523,
"loss": 1.0254576206207275,
"step": 473
},
{
"epoch": 0.27335640138408307,
"grad_norm": 0.7593767046928406,
"learning_rate": 0.0001819896093900327,
"loss": 1.284333348274231,
"step": 474
},
{
"epoch": 0.27393310265282583,
"grad_norm": 1.006116509437561,
"learning_rate": 0.00018195112564941312,
"loss": 1.3650097846984863,
"step": 475
},
{
"epoch": 0.27450980392156865,
"grad_norm": 0.8706763982772827,
"learning_rate": 0.00018191264190879354,
"loss": 1.6067880392074585,
"step": 476
},
{
"epoch": 0.2750865051903114,
"grad_norm": 0.7428901195526123,
"learning_rate": 0.00018187415816817395,
"loss": 1.373342514038086,
"step": 477
},
{
"epoch": 0.27566320645905423,
"grad_norm": 0.8846433162689209,
"learning_rate": 0.00018183567442755437,
"loss": 1.5520777702331543,
"step": 478
},
{
"epoch": 0.276239907727797,
"grad_norm": 0.8808581829071045,
"learning_rate": 0.00018179719068693478,
"loss": 1.1342291831970215,
"step": 479
},
{
"epoch": 0.2768166089965398,
"grad_norm": 0.7310512065887451,
"learning_rate": 0.00018175870694631517,
"loss": 0.7762906551361084,
"step": 480
},
{
"epoch": 0.2773933102652826,
"grad_norm": 0.8467727303504944,
"learning_rate": 0.0001817202232056956,
"loss": 0.990180253982544,
"step": 481
},
{
"epoch": 0.2779700115340254,
"grad_norm": 0.642230212688446,
"learning_rate": 0.000181681739465076,
"loss": 0.845292329788208,
"step": 482
},
{
"epoch": 0.27854671280276816,
"grad_norm": 0.7775582075119019,
"learning_rate": 0.00018164325572445642,
"loss": 1.279380202293396,
"step": 483
},
{
"epoch": 0.279123414071511,
"grad_norm": 0.6477130651473999,
"learning_rate": 0.00018160477198383683,
"loss": 0.8197907209396362,
"step": 484
},
{
"epoch": 0.27970011534025374,
"grad_norm": 0.6508778929710388,
"learning_rate": 0.00018156628824321725,
"loss": 0.9538026452064514,
"step": 485
},
{
"epoch": 0.28027681660899656,
"grad_norm": 0.9379159212112427,
"learning_rate": 0.00018152780450259766,
"loss": 1.2874410152435303,
"step": 486
},
{
"epoch": 0.2808535178777393,
"grad_norm": 0.8014243245124817,
"learning_rate": 0.00018148932076197808,
"loss": 1.364856481552124,
"step": 487
},
{
"epoch": 0.28143021914648214,
"grad_norm": 1.0049822330474854,
"learning_rate": 0.00018145083702135847,
"loss": 1.3461369276046753,
"step": 488
},
{
"epoch": 0.2820069204152249,
"grad_norm": 0.8764071464538574,
"learning_rate": 0.00018141235328073888,
"loss": 1.549091100692749,
"step": 489
},
{
"epoch": 0.2825836216839677,
"grad_norm": 0.6743770241737366,
"learning_rate": 0.0001813738695401193,
"loss": 0.8718385696411133,
"step": 490
},
{
"epoch": 0.2831603229527105,
"grad_norm": 0.8501721024513245,
"learning_rate": 0.0001813353857994997,
"loss": 0.9592713117599487,
"step": 491
},
{
"epoch": 0.2837370242214533,
"grad_norm": 0.6727166771888733,
"learning_rate": 0.00018129690205888013,
"loss": 1.0024611949920654,
"step": 492
},
{
"epoch": 0.28431372549019607,
"grad_norm": 0.7949026226997375,
"learning_rate": 0.00018125841831826054,
"loss": 0.889624297618866,
"step": 493
},
{
"epoch": 0.2848904267589389,
"grad_norm": 0.8814200758934021,
"learning_rate": 0.00018121993457764096,
"loss": 1.7483818531036377,
"step": 494
},
{
"epoch": 0.28546712802768165,
"grad_norm": 0.6116936206817627,
"learning_rate": 0.00018118145083702137,
"loss": 1.097643256187439,
"step": 495
},
{
"epoch": 0.28604382929642447,
"grad_norm": 0.6951889395713806,
"learning_rate": 0.00018114296709640176,
"loss": 0.9292160272598267,
"step": 496
},
{
"epoch": 0.28662053056516723,
"grad_norm": 0.9138390421867371,
"learning_rate": 0.00018110448335578218,
"loss": 1.174808144569397,
"step": 497
},
{
"epoch": 0.28719723183391005,
"grad_norm": 0.6442549824714661,
"learning_rate": 0.0001810659996151626,
"loss": 0.9390018582344055,
"step": 498
},
{
"epoch": 0.2877739331026528,
"grad_norm": 0.9683842658996582,
"learning_rate": 0.000181027515874543,
"loss": 1.4045450687408447,
"step": 499
},
{
"epoch": 0.28835063437139563,
"grad_norm": 0.7444068193435669,
"learning_rate": 0.00018098903213392342,
"loss": 0.9792321920394897,
"step": 500
},
{
"epoch": 0.2889273356401384,
"grad_norm": 0.7402380108833313,
"learning_rate": 0.00018095054839330384,
"loss": 1.231440782546997,
"step": 501
},
{
"epoch": 0.2895040369088812,
"grad_norm": 0.7022894024848938,
"learning_rate": 0.00018091206465268425,
"loss": 0.856300950050354,
"step": 502
},
{
"epoch": 0.290080738177624,
"grad_norm": 0.7641032338142395,
"learning_rate": 0.00018087358091206467,
"loss": 0.9729149341583252,
"step": 503
},
{
"epoch": 0.2906574394463668,
"grad_norm": 0.9500510096549988,
"learning_rate": 0.00018083509717144506,
"loss": 1.2449204921722412,
"step": 504
},
{
"epoch": 0.29123414071510956,
"grad_norm": 0.6954758763313293,
"learning_rate": 0.00018079661343082547,
"loss": 0.8000816106796265,
"step": 505
},
{
"epoch": 0.2918108419838524,
"grad_norm": 0.7313628196716309,
"learning_rate": 0.0001807581296902059,
"loss": 1.233512282371521,
"step": 506
},
{
"epoch": 0.29238754325259514,
"grad_norm": 0.8792680501937866,
"learning_rate": 0.0001807196459495863,
"loss": 1.092308521270752,
"step": 507
},
{
"epoch": 0.29296424452133796,
"grad_norm": 0.6230028867721558,
"learning_rate": 0.00018068116220896672,
"loss": 0.7719423174858093,
"step": 508
},
{
"epoch": 0.2935409457900807,
"grad_norm": 0.8965409398078918,
"learning_rate": 0.00018064267846834713,
"loss": 1.576930284500122,
"step": 509
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.756908118724823,
"learning_rate": 0.00018060419472772755,
"loss": 0.9762069582939148,
"step": 510
},
{
"epoch": 0.2946943483275663,
"grad_norm": 0.7524373531341553,
"learning_rate": 0.00018056571098710794,
"loss": 0.9206646680831909,
"step": 511
},
{
"epoch": 0.2952710495963091,
"grad_norm": 0.9292136430740356,
"learning_rate": 0.00018052722724648835,
"loss": 1.534470558166504,
"step": 512
},
{
"epoch": 0.2958477508650519,
"grad_norm": 1.0442750453948975,
"learning_rate": 0.00018048874350586877,
"loss": 1.2520341873168945,
"step": 513
},
{
"epoch": 0.2964244521337947,
"grad_norm": 0.8131316900253296,
"learning_rate": 0.00018045025976524918,
"loss": 1.5056309700012207,
"step": 514
},
{
"epoch": 0.29700115340253747,
"grad_norm": 0.7711693048477173,
"learning_rate": 0.0001804117760246296,
"loss": 1.2189143896102905,
"step": 515
},
{
"epoch": 0.2975778546712803,
"grad_norm": 0.6610523462295532,
"learning_rate": 0.00018037329228401,
"loss": 1.1120340824127197,
"step": 516
},
{
"epoch": 0.29815455594002305,
"grad_norm": 0.7343090772628784,
"learning_rate": 0.00018033480854339043,
"loss": 1.0496878623962402,
"step": 517
},
{
"epoch": 0.29873125720876587,
"grad_norm": 0.6952423453330994,
"learning_rate": 0.00018029632480277084,
"loss": 1.0725046396255493,
"step": 518
},
{
"epoch": 0.29930795847750863,
"grad_norm": 1.0385462045669556,
"learning_rate": 0.00018025784106215123,
"loss": 1.3104898929595947,
"step": 519
},
{
"epoch": 0.29988465974625145,
"grad_norm": 0.6035030484199524,
"learning_rate": 0.00018021935732153165,
"loss": 0.7342404127120972,
"step": 520
},
{
"epoch": 0.3004613610149942,
"grad_norm": 0.5726889371871948,
"learning_rate": 0.00018018087358091206,
"loss": 0.9352455139160156,
"step": 521
},
{
"epoch": 0.30103806228373703,
"grad_norm": 0.5148364305496216,
"learning_rate": 0.00018014238984029248,
"loss": 0.8527913093566895,
"step": 522
},
{
"epoch": 0.3016147635524798,
"grad_norm": 0.8307221531867981,
"learning_rate": 0.0001801039060996729,
"loss": 1.180746078491211,
"step": 523
},
{
"epoch": 0.3021914648212226,
"grad_norm": 0.8560492396354675,
"learning_rate": 0.0001800654223590533,
"loss": 1.4329997301101685,
"step": 524
},
{
"epoch": 0.3027681660899654,
"grad_norm": 0.5972908139228821,
"learning_rate": 0.00018002693861843372,
"loss": 0.7385514974594116,
"step": 525
},
{
"epoch": 0.3033448673587082,
"grad_norm": 0.5159963965415955,
"learning_rate": 0.00017998845487781414,
"loss": 0.646453320980072,
"step": 526
},
{
"epoch": 0.30392156862745096,
"grad_norm": 0.9237578511238098,
"learning_rate": 0.00017994997113719453,
"loss": 1.442482590675354,
"step": 527
},
{
"epoch": 0.3044982698961938,
"grad_norm": 0.9341033697128296,
"learning_rate": 0.00017991148739657494,
"loss": 1.3850878477096558,
"step": 528
},
{
"epoch": 0.30507497116493654,
"grad_norm": 0.5422039031982422,
"learning_rate": 0.00017987300365595536,
"loss": 0.6736562252044678,
"step": 529
},
{
"epoch": 0.30565167243367936,
"grad_norm": 0.6220455765724182,
"learning_rate": 0.00017983451991533577,
"loss": 0.7528645992279053,
"step": 530
},
{
"epoch": 0.3062283737024221,
"grad_norm": 0.8073663115501404,
"learning_rate": 0.0001797960361747162,
"loss": 1.2123267650604248,
"step": 531
},
{
"epoch": 0.30680507497116494,
"grad_norm": 0.5491252541542053,
"learning_rate": 0.0001797575524340966,
"loss": 0.5903505086898804,
"step": 532
},
{
"epoch": 0.3073817762399077,
"grad_norm": 1.9019479751586914,
"learning_rate": 0.00017971906869347702,
"loss": 1.4316587448120117,
"step": 533
},
{
"epoch": 0.3079584775086505,
"grad_norm": 0.45649734139442444,
"learning_rate": 0.00017968058495285743,
"loss": 0.659195065498352,
"step": 534
},
{
"epoch": 0.30853517877739334,
"grad_norm": 0.7406135201454163,
"learning_rate": 0.00017964210121223782,
"loss": 1.0346477031707764,
"step": 535
},
{
"epoch": 0.3091118800461361,
"grad_norm": 0.9768670201301575,
"learning_rate": 0.00017960361747161824,
"loss": 1.584676742553711,
"step": 536
},
{
"epoch": 0.3096885813148789,
"grad_norm": 0.7869756817817688,
"learning_rate": 0.00017956513373099865,
"loss": 1.0404967069625854,
"step": 537
},
{
"epoch": 0.3102652825836217,
"grad_norm": 0.6868966221809387,
"learning_rate": 0.00017952664999037907,
"loss": 0.8878238201141357,
"step": 538
},
{
"epoch": 0.3108419838523645,
"grad_norm": 0.7594157457351685,
"learning_rate": 0.00017948816624975948,
"loss": 1.0191287994384766,
"step": 539
},
{
"epoch": 0.31141868512110726,
"grad_norm": 0.8346229195594788,
"learning_rate": 0.0001794496825091399,
"loss": 1.021256923675537,
"step": 540
},
{
"epoch": 0.3119953863898501,
"grad_norm": 1.0493948459625244,
"learning_rate": 0.00017941119876852031,
"loss": 1.0015616416931152,
"step": 541
},
{
"epoch": 0.31257208765859285,
"grad_norm": 0.62034010887146,
"learning_rate": 0.00017937271502790073,
"loss": 0.9237149357795715,
"step": 542
},
{
"epoch": 0.31314878892733566,
"grad_norm": 0.7169587016105652,
"learning_rate": 0.00017933423128728112,
"loss": 0.8658795356750488,
"step": 543
},
{
"epoch": 0.3137254901960784,
"grad_norm": 0.7205992341041565,
"learning_rate": 0.00017929574754666153,
"loss": 1.1227588653564453,
"step": 544
},
{
"epoch": 0.31430219146482125,
"grad_norm": 0.7573957443237305,
"learning_rate": 0.00017925726380604195,
"loss": 0.9638352394104004,
"step": 545
},
{
"epoch": 0.314878892733564,
"grad_norm": 0.981253981590271,
"learning_rate": 0.00017921878006542236,
"loss": 1.0400216579437256,
"step": 546
},
{
"epoch": 0.3154555940023068,
"grad_norm": 0.6763452291488647,
"learning_rate": 0.00017918029632480278,
"loss": 1.0069935321807861,
"step": 547
},
{
"epoch": 0.3160322952710496,
"grad_norm": 0.5641304850578308,
"learning_rate": 0.0001791418125841832,
"loss": 0.7099517583847046,
"step": 548
},
{
"epoch": 0.3166089965397924,
"grad_norm": 0.542838454246521,
"learning_rate": 0.0001791033288435636,
"loss": 0.7347281575202942,
"step": 549
},
{
"epoch": 0.31718569780853517,
"grad_norm": 0.6865650415420532,
"learning_rate": 0.00017906484510294402,
"loss": 0.9269914031028748,
"step": 550
},
{
"epoch": 0.317762399077278,
"grad_norm": 0.6794233322143555,
"learning_rate": 0.0001790263613623244,
"loss": 0.8624827861785889,
"step": 551
},
{
"epoch": 0.31833910034602075,
"grad_norm": 0.9417468905448914,
"learning_rate": 0.00017898787762170483,
"loss": 1.2194072008132935,
"step": 552
},
{
"epoch": 0.31891580161476357,
"grad_norm": 0.8551915287971497,
"learning_rate": 0.00017894939388108524,
"loss": 1.1121107339859009,
"step": 553
},
{
"epoch": 0.31949250288350634,
"grad_norm": 1.0210304260253906,
"learning_rate": 0.00017891091014046566,
"loss": 1.3061752319335938,
"step": 554
},
{
"epoch": 0.32006920415224915,
"grad_norm": 0.9833082556724548,
"learning_rate": 0.00017887242639984607,
"loss": 1.3157097101211548,
"step": 555
},
{
"epoch": 0.3206459054209919,
"grad_norm": 0.8534771203994751,
"learning_rate": 0.0001788339426592265,
"loss": 1.1443736553192139,
"step": 556
},
{
"epoch": 0.32122260668973474,
"grad_norm": 0.5206373929977417,
"learning_rate": 0.0001787954589186069,
"loss": 0.9210702776908875,
"step": 557
},
{
"epoch": 0.3217993079584775,
"grad_norm": 0.9890329837799072,
"learning_rate": 0.00017875697517798732,
"loss": 1.1474642753601074,
"step": 558
},
{
"epoch": 0.3223760092272203,
"grad_norm": 1.033987045288086,
"learning_rate": 0.0001787184914373677,
"loss": 1.3469852209091187,
"step": 559
},
{
"epoch": 0.3229527104959631,
"grad_norm": 0.5397274494171143,
"learning_rate": 0.00017868000769674812,
"loss": 0.8606307506561279,
"step": 560
},
{
"epoch": 0.3235294117647059,
"grad_norm": 0.7607125639915466,
"learning_rate": 0.00017864152395612854,
"loss": 1.5313308238983154,
"step": 561
},
{
"epoch": 0.32410611303344866,
"grad_norm": 0.8187709450721741,
"learning_rate": 0.00017860304021550895,
"loss": 1.2671842575073242,
"step": 562
},
{
"epoch": 0.3246828143021915,
"grad_norm": 0.8652257919311523,
"learning_rate": 0.00017856455647488937,
"loss": 1.0011459589004517,
"step": 563
},
{
"epoch": 0.32525951557093424,
"grad_norm": 0.8205957412719727,
"learning_rate": 0.00017852607273426978,
"loss": 0.9995499849319458,
"step": 564
},
{
"epoch": 0.32583621683967706,
"grad_norm": 0.8630533814430237,
"learning_rate": 0.0001784875889936502,
"loss": 1.119580864906311,
"step": 565
},
{
"epoch": 0.3264129181084198,
"grad_norm": 0.6678904294967651,
"learning_rate": 0.00017844910525303061,
"loss": 0.9301247596740723,
"step": 566
},
{
"epoch": 0.32698961937716264,
"grad_norm": 0.7211806774139404,
"learning_rate": 0.000178410621512411,
"loss": 1.3346351385116577,
"step": 567
},
{
"epoch": 0.3275663206459054,
"grad_norm": 0.6392566561698914,
"learning_rate": 0.00017837213777179142,
"loss": 0.6997557878494263,
"step": 568
},
{
"epoch": 0.3281430219146482,
"grad_norm": 0.8357546329498291,
"learning_rate": 0.00017833365403117183,
"loss": 1.3044462203979492,
"step": 569
},
{
"epoch": 0.328719723183391,
"grad_norm": 0.7778827548027039,
"learning_rate": 0.00017829517029055225,
"loss": 0.9234685897827148,
"step": 570
},
{
"epoch": 0.3292964244521338,
"grad_norm": 0.7168182730674744,
"learning_rate": 0.00017825668654993266,
"loss": 1.532446265220642,
"step": 571
},
{
"epoch": 0.32987312572087657,
"grad_norm": 1.016398549079895,
"learning_rate": 0.00017821820280931308,
"loss": 1.4056748151779175,
"step": 572
},
{
"epoch": 0.3304498269896194,
"grad_norm": 0.8056113719940186,
"learning_rate": 0.0001781797190686935,
"loss": 1.0595710277557373,
"step": 573
},
{
"epoch": 0.33102652825836215,
"grad_norm": 0.6588327884674072,
"learning_rate": 0.0001781412353280739,
"loss": 0.849087655544281,
"step": 574
},
{
"epoch": 0.33160322952710497,
"grad_norm": 0.7659177184104919,
"learning_rate": 0.0001781027515874543,
"loss": 1.1442945003509521,
"step": 575
},
{
"epoch": 0.33217993079584773,
"grad_norm": 0.8960584402084351,
"learning_rate": 0.0001780642678468347,
"loss": 1.2777467966079712,
"step": 576
},
{
"epoch": 0.33275663206459055,
"grad_norm": 0.8990175724029541,
"learning_rate": 0.00017802578410621513,
"loss": 1.0199333429336548,
"step": 577
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.7010089159011841,
"learning_rate": 0.00017798730036559554,
"loss": 1.2177313566207886,
"step": 578
},
{
"epoch": 0.33391003460207613,
"grad_norm": 0.8779993057250977,
"learning_rate": 0.00017794881662497596,
"loss": 0.8511064648628235,
"step": 579
},
{
"epoch": 0.3344867358708189,
"grad_norm": 0.8380318880081177,
"learning_rate": 0.00017791033288435637,
"loss": 1.0792275667190552,
"step": 580
},
{
"epoch": 0.3350634371395617,
"grad_norm": 0.7335569858551025,
"learning_rate": 0.0001778718491437368,
"loss": 1.0502758026123047,
"step": 581
},
{
"epoch": 0.3356401384083045,
"grad_norm": 0.7759366631507874,
"learning_rate": 0.0001778333654031172,
"loss": 1.000847578048706,
"step": 582
},
{
"epoch": 0.3362168396770473,
"grad_norm": 0.565648078918457,
"learning_rate": 0.0001777948816624976,
"loss": 0.7337483167648315,
"step": 583
},
{
"epoch": 0.33679354094579006,
"grad_norm": 0.8646697998046875,
"learning_rate": 0.000177756397921878,
"loss": 1.2806568145751953,
"step": 584
},
{
"epoch": 0.3373702422145329,
"grad_norm": 0.9556112289428711,
"learning_rate": 0.00017771791418125842,
"loss": 1.1648443937301636,
"step": 585
},
{
"epoch": 0.33794694348327564,
"grad_norm": 0.6629974842071533,
"learning_rate": 0.00017767943044063884,
"loss": 1.0415198802947998,
"step": 586
},
{
"epoch": 0.33852364475201846,
"grad_norm": 0.5972018837928772,
"learning_rate": 0.00017764094670001925,
"loss": 0.6916914582252502,
"step": 587
},
{
"epoch": 0.3391003460207612,
"grad_norm": 0.7391757965087891,
"learning_rate": 0.00017760246295939967,
"loss": 1.194846510887146,
"step": 588
},
{
"epoch": 0.33967704728950404,
"grad_norm": 0.7234671711921692,
"learning_rate": 0.00017756397921878008,
"loss": 0.9572672247886658,
"step": 589
},
{
"epoch": 0.3402537485582468,
"grad_norm": 0.6949688792228699,
"learning_rate": 0.0001775254954781605,
"loss": 0.9968490600585938,
"step": 590
},
{
"epoch": 0.3408304498269896,
"grad_norm": 0.9384737610816956,
"learning_rate": 0.0001774870117375409,
"loss": 1.106278896331787,
"step": 591
},
{
"epoch": 0.3414071510957324,
"grad_norm": 0.8691385388374329,
"learning_rate": 0.0001774485279969213,
"loss": 0.8517290353775024,
"step": 592
},
{
"epoch": 0.3419838523644752,
"grad_norm": 0.6864728331565857,
"learning_rate": 0.00017741004425630172,
"loss": 0.9280612468719482,
"step": 593
},
{
"epoch": 0.34256055363321797,
"grad_norm": 0.7656051516532898,
"learning_rate": 0.00017737156051568213,
"loss": 1.0975104570388794,
"step": 594
},
{
"epoch": 0.3431372549019608,
"grad_norm": 0.6587508916854858,
"learning_rate": 0.00017733307677506255,
"loss": 0.9575508236885071,
"step": 595
},
{
"epoch": 0.34371395617070355,
"grad_norm": 0.8466372489929199,
"learning_rate": 0.00017729459303444296,
"loss": 1.2343617677688599,
"step": 596
},
{
"epoch": 0.34429065743944637,
"grad_norm": 1.0839906930923462,
"learning_rate": 0.00017725610929382338,
"loss": 1.3552396297454834,
"step": 597
},
{
"epoch": 0.34486735870818913,
"grad_norm": 0.7300306558609009,
"learning_rate": 0.00017721762555320377,
"loss": 1.0701713562011719,
"step": 598
},
{
"epoch": 0.34544405997693195,
"grad_norm": 0.737766683101654,
"learning_rate": 0.00017717914181258418,
"loss": 1.0968977212905884,
"step": 599
},
{
"epoch": 0.3460207612456747,
"grad_norm": 0.749933660030365,
"learning_rate": 0.0001771406580719646,
"loss": 1.3320926427841187,
"step": 600
},
{
"epoch": 0.34659746251441753,
"grad_norm": 1.0226854085922241,
"learning_rate": 0.000177102174331345,
"loss": 1.5281516313552856,
"step": 601
},
{
"epoch": 0.34717416378316035,
"grad_norm": 0.5458315014839172,
"learning_rate": 0.00017706369059072543,
"loss": 0.6243756413459778,
"step": 602
},
{
"epoch": 0.3477508650519031,
"grad_norm": 0.6592231392860413,
"learning_rate": 0.00017702520685010584,
"loss": 1.007111668586731,
"step": 603
},
{
"epoch": 0.34832756632064593,
"grad_norm": 0.7599675059318542,
"learning_rate": 0.00017698672310948626,
"loss": 1.059772253036499,
"step": 604
},
{
"epoch": 0.3489042675893887,
"grad_norm": 0.7249642610549927,
"learning_rate": 0.00017694823936886667,
"loss": 1.0405762195587158,
"step": 605
},
{
"epoch": 0.3494809688581315,
"grad_norm": 0.6669758558273315,
"learning_rate": 0.00017690975562824706,
"loss": 0.8157357573509216,
"step": 606
},
{
"epoch": 0.3500576701268743,
"grad_norm": 1.0521658658981323,
"learning_rate": 0.00017687127188762748,
"loss": 1.3226133584976196,
"step": 607
},
{
"epoch": 0.3506343713956171,
"grad_norm": 1.190586805343628,
"learning_rate": 0.0001768327881470079,
"loss": 0.9668002724647522,
"step": 608
},
{
"epoch": 0.35121107266435986,
"grad_norm": 0.7342950105667114,
"learning_rate": 0.0001767943044063883,
"loss": 1.0137907266616821,
"step": 609
},
{
"epoch": 0.3517877739331027,
"grad_norm": 0.8390425443649292,
"learning_rate": 0.00017675582066576872,
"loss": 1.2452900409698486,
"step": 610
},
{
"epoch": 0.35236447520184544,
"grad_norm": 0.7040269374847412,
"learning_rate": 0.00017671733692514914,
"loss": 1.1274709701538086,
"step": 611
},
{
"epoch": 0.35294117647058826,
"grad_norm": 0.6184991002082825,
"learning_rate": 0.00017667885318452955,
"loss": 0.8320228457450867,
"step": 612
},
{
"epoch": 0.353517877739331,
"grad_norm": 0.9174041748046875,
"learning_rate": 0.00017664036944390997,
"loss": 1.0515730381011963,
"step": 613
},
{
"epoch": 0.35409457900807384,
"grad_norm": 0.8032795786857605,
"learning_rate": 0.00017660188570329036,
"loss": 0.9692851901054382,
"step": 614
},
{
"epoch": 0.3546712802768166,
"grad_norm": 0.854794979095459,
"learning_rate": 0.00017656340196267077,
"loss": 0.9672110676765442,
"step": 615
},
{
"epoch": 0.3552479815455594,
"grad_norm": 0.8945924043655396,
"learning_rate": 0.0001765249182220512,
"loss": 1.1629329919815063,
"step": 616
},
{
"epoch": 0.3558246828143022,
"grad_norm": 0.8737151622772217,
"learning_rate": 0.0001764864344814316,
"loss": 1.022585153579712,
"step": 617
},
{
"epoch": 0.356401384083045,
"grad_norm": 0.7043283581733704,
"learning_rate": 0.00017644795074081202,
"loss": 1.825275182723999,
"step": 618
},
{
"epoch": 0.35697808535178777,
"grad_norm": 0.81025230884552,
"learning_rate": 0.00017640946700019243,
"loss": 1.1937224864959717,
"step": 619
},
{
"epoch": 0.3575547866205306,
"grad_norm": 0.6064541339874268,
"learning_rate": 0.00017637098325957285,
"loss": 1.144992709159851,
"step": 620
},
{
"epoch": 0.35813148788927335,
"grad_norm": 0.7281432747840881,
"learning_rate": 0.00017633249951895326,
"loss": 0.8976823091506958,
"step": 621
},
{
"epoch": 0.35870818915801617,
"grad_norm": 0.7124044895172119,
"learning_rate": 0.00017629401577833365,
"loss": 0.9814664721488953,
"step": 622
},
{
"epoch": 0.35928489042675893,
"grad_norm": 0.7080062031745911,
"learning_rate": 0.00017625553203771407,
"loss": 0.8040327429771423,
"step": 623
},
{
"epoch": 0.35986159169550175,
"grad_norm": 0.9307262897491455,
"learning_rate": 0.00017621704829709448,
"loss": 1.1769636869430542,
"step": 624
},
{
"epoch": 0.3604382929642445,
"grad_norm": 0.6040496230125427,
"learning_rate": 0.0001761785645564749,
"loss": 0.8058497905731201,
"step": 625
},
{
"epoch": 0.36101499423298733,
"grad_norm": 0.6352747678756714,
"learning_rate": 0.00017614008081585531,
"loss": 1.0901957750320435,
"step": 626
},
{
"epoch": 0.3615916955017301,
"grad_norm": 1.0686722993850708,
"learning_rate": 0.00017610159707523573,
"loss": 1.0280206203460693,
"step": 627
},
{
"epoch": 0.3621683967704729,
"grad_norm": 0.823551595211029,
"learning_rate": 0.00017606311333461614,
"loss": 1.1255362033843994,
"step": 628
},
{
"epoch": 0.3627450980392157,
"grad_norm": 0.8719285726547241,
"learning_rate": 0.00017602462959399656,
"loss": 1.1470766067504883,
"step": 629
},
{
"epoch": 0.3633217993079585,
"grad_norm": 0.8169400691986084,
"learning_rate": 0.00017598614585337695,
"loss": 1.0567045211791992,
"step": 630
},
{
"epoch": 0.36389850057670126,
"grad_norm": 1.0707166194915771,
"learning_rate": 0.00017594766211275736,
"loss": 1.3314507007598877,
"step": 631
},
{
"epoch": 0.3644752018454441,
"grad_norm": 0.6268380284309387,
"learning_rate": 0.00017590917837213778,
"loss": 1.100555419921875,
"step": 632
},
{
"epoch": 0.36505190311418684,
"grad_norm": 0.7382054328918457,
"learning_rate": 0.0001758706946315182,
"loss": 0.9670585989952087,
"step": 633
},
{
"epoch": 0.36562860438292966,
"grad_norm": 1.193224310874939,
"learning_rate": 0.0001758322108908986,
"loss": 1.3042614459991455,
"step": 634
},
{
"epoch": 0.3662053056516724,
"grad_norm": 1.0091503858566284,
"learning_rate": 0.00017579372715027902,
"loss": 1.3520644903182983,
"step": 635
},
{
"epoch": 0.36678200692041524,
"grad_norm": 0.6810548901557922,
"learning_rate": 0.00017575524340965944,
"loss": 0.8741036653518677,
"step": 636
},
{
"epoch": 0.367358708189158,
"grad_norm": 0.7155483365058899,
"learning_rate": 0.00017571675966903986,
"loss": 0.8751124143600464,
"step": 637
},
{
"epoch": 0.3679354094579008,
"grad_norm": 1.0436261892318726,
"learning_rate": 0.00017567827592842024,
"loss": 1.5696821212768555,
"step": 638
},
{
"epoch": 0.3685121107266436,
"grad_norm": 0.9394407868385315,
"learning_rate": 0.00017563979218780066,
"loss": 0.8675939440727234,
"step": 639
},
{
"epoch": 0.3690888119953864,
"grad_norm": 1.4341135025024414,
"learning_rate": 0.00017560130844718107,
"loss": 1.498160481452942,
"step": 640
},
{
"epoch": 0.36966551326412916,
"grad_norm": 1.006375789642334,
"learning_rate": 0.0001755628247065615,
"loss": 1.2490055561065674,
"step": 641
},
{
"epoch": 0.370242214532872,
"grad_norm": 0.6104082465171814,
"learning_rate": 0.0001755243409659419,
"loss": 0.8300263285636902,
"step": 642
},
{
"epoch": 0.37081891580161475,
"grad_norm": 0.8571838736534119,
"learning_rate": 0.00017548585722532232,
"loss": 1.146481990814209,
"step": 643
},
{
"epoch": 0.37139561707035756,
"grad_norm": 0.6824607253074646,
"learning_rate": 0.00017544737348470273,
"loss": 1.2418452501296997,
"step": 644
},
{
"epoch": 0.3719723183391003,
"grad_norm": 1.0891611576080322,
"learning_rate": 0.00017540888974408315,
"loss": 1.2160457372665405,
"step": 645
},
{
"epoch": 0.37254901960784315,
"grad_norm": 0.6260281801223755,
"learning_rate": 0.00017537040600346354,
"loss": 0.8934881091117859,
"step": 646
},
{
"epoch": 0.3731257208765859,
"grad_norm": 0.8351913690567017,
"learning_rate": 0.00017533192226284395,
"loss": 1.5422282218933105,
"step": 647
},
{
"epoch": 0.3737024221453287,
"grad_norm": 0.7572267055511475,
"learning_rate": 0.00017529343852222437,
"loss": 1.2659950256347656,
"step": 648
},
{
"epoch": 0.3742791234140715,
"grad_norm": 0.7712565064430237,
"learning_rate": 0.00017525495478160478,
"loss": 1.2143782377243042,
"step": 649
},
{
"epoch": 0.3748558246828143,
"grad_norm": 0.6880773305892944,
"learning_rate": 0.0001752164710409852,
"loss": 1.0878217220306396,
"step": 650
},
{
"epoch": 0.3754325259515571,
"grad_norm": 0.8996551632881165,
"learning_rate": 0.00017517798730036561,
"loss": 0.9668335914611816,
"step": 651
},
{
"epoch": 0.3760092272202999,
"grad_norm": 0.921444296836853,
"learning_rate": 0.00017513950355974603,
"loss": 1.1585900783538818,
"step": 652
},
{
"epoch": 0.37658592848904265,
"grad_norm": 0.8658480048179626,
"learning_rate": 0.00017510101981912645,
"loss": 1.1533393859863281,
"step": 653
},
{
"epoch": 0.3771626297577855,
"grad_norm": 0.6665229797363281,
"learning_rate": 0.00017506253607850683,
"loss": 0.8233336210250854,
"step": 654
},
{
"epoch": 0.37773933102652824,
"grad_norm": 0.746337890625,
"learning_rate": 0.00017502405233788725,
"loss": 1.099341630935669,
"step": 655
},
{
"epoch": 0.37831603229527105,
"grad_norm": 0.8498716354370117,
"learning_rate": 0.00017498556859726766,
"loss": 1.333115577697754,
"step": 656
},
{
"epoch": 0.3788927335640138,
"grad_norm": 0.7371817827224731,
"learning_rate": 0.00017494708485664808,
"loss": 1.05489182472229,
"step": 657
},
{
"epoch": 0.37946943483275664,
"grad_norm": 0.7369913458824158,
"learning_rate": 0.0001749086011160285,
"loss": 0.7275075912475586,
"step": 658
},
{
"epoch": 0.3800461361014994,
"grad_norm": 1.4918899536132812,
"learning_rate": 0.0001748701173754089,
"loss": 1.2430638074874878,
"step": 659
},
{
"epoch": 0.3806228373702422,
"grad_norm": 0.686100423336029,
"learning_rate": 0.00017483163363478933,
"loss": 0.7841339707374573,
"step": 660
},
{
"epoch": 0.381199538638985,
"grad_norm": 0.7799985408782959,
"learning_rate": 0.00017479314989416974,
"loss": 1.1784673929214478,
"step": 661
},
{
"epoch": 0.3817762399077278,
"grad_norm": 0.7435747385025024,
"learning_rate": 0.00017475466615355013,
"loss": 1.180450439453125,
"step": 662
},
{
"epoch": 0.38235294117647056,
"grad_norm": 0.7358818650245667,
"learning_rate": 0.00017471618241293054,
"loss": 0.9987742900848389,
"step": 663
},
{
"epoch": 0.3829296424452134,
"grad_norm": 0.7353511452674866,
"learning_rate": 0.00017467769867231096,
"loss": 1.1325185298919678,
"step": 664
},
{
"epoch": 0.38350634371395614,
"grad_norm": 0.7735626697540283,
"learning_rate": 0.00017463921493169137,
"loss": 1.0828659534454346,
"step": 665
},
{
"epoch": 0.38408304498269896,
"grad_norm": 0.6293249130249023,
"learning_rate": 0.0001746007311910718,
"loss": 0.9253727793693542,
"step": 666
},
{
"epoch": 0.3846597462514418,
"grad_norm": 0.6271319389343262,
"learning_rate": 0.0001745622474504522,
"loss": 0.7645162343978882,
"step": 667
},
{
"epoch": 0.38523644752018454,
"grad_norm": 0.6632966995239258,
"learning_rate": 0.00017452376370983262,
"loss": 0.9796670079231262,
"step": 668
},
{
"epoch": 0.38581314878892736,
"grad_norm": 0.8829965591430664,
"learning_rate": 0.00017448527996921304,
"loss": 0.9777094721794128,
"step": 669
},
{
"epoch": 0.3863898500576701,
"grad_norm": 0.7675085663795471,
"learning_rate": 0.00017444679622859342,
"loss": 1.0497252941131592,
"step": 670
},
{
"epoch": 0.38696655132641294,
"grad_norm": 0.9194138050079346,
"learning_rate": 0.00017440831248797384,
"loss": 1.0992257595062256,
"step": 671
},
{
"epoch": 0.3875432525951557,
"grad_norm": 1.0398883819580078,
"learning_rate": 0.00017436982874735425,
"loss": 1.25284743309021,
"step": 672
},
{
"epoch": 0.3881199538638985,
"grad_norm": 0.5921796560287476,
"learning_rate": 0.00017433134500673467,
"loss": 0.6763097047805786,
"step": 673
},
{
"epoch": 0.3886966551326413,
"grad_norm": 1.0226387977600098,
"learning_rate": 0.00017429286126611508,
"loss": 1.3273173570632935,
"step": 674
},
{
"epoch": 0.3892733564013841,
"grad_norm": 0.5252590179443359,
"learning_rate": 0.0001742543775254955,
"loss": 0.6646312475204468,
"step": 675
},
{
"epoch": 0.38985005767012687,
"grad_norm": 0.600639820098877,
"learning_rate": 0.00017421589378487592,
"loss": 0.7095688581466675,
"step": 676
},
{
"epoch": 0.3904267589388697,
"grad_norm": 0.7131365537643433,
"learning_rate": 0.00017417741004425633,
"loss": 1.2200595140457153,
"step": 677
},
{
"epoch": 0.39100346020761245,
"grad_norm": 0.9018159508705139,
"learning_rate": 0.00017413892630363672,
"loss": 0.9669409394264221,
"step": 678
},
{
"epoch": 0.39158016147635527,
"grad_norm": 0.9841684103012085,
"learning_rate": 0.00017410044256301713,
"loss": 1.028241515159607,
"step": 679
},
{
"epoch": 0.39215686274509803,
"grad_norm": 0.9678821563720703,
"learning_rate": 0.00017406195882239755,
"loss": 1.3122403621673584,
"step": 680
},
{
"epoch": 0.39273356401384085,
"grad_norm": 0.6439565420150757,
"learning_rate": 0.00017402347508177796,
"loss": 0.8441326022148132,
"step": 681
},
{
"epoch": 0.3933102652825836,
"grad_norm": 0.8460219502449036,
"learning_rate": 0.00017398499134115838,
"loss": 1.193575382232666,
"step": 682
},
{
"epoch": 0.39388696655132643,
"grad_norm": 0.8068860769271851,
"learning_rate": 0.0001739465076005388,
"loss": 1.209285020828247,
"step": 683
},
{
"epoch": 0.3944636678200692,
"grad_norm": 0.6420811414718628,
"learning_rate": 0.0001739080238599192,
"loss": 0.9203285574913025,
"step": 684
},
{
"epoch": 0.395040369088812,
"grad_norm": 1.1171250343322754,
"learning_rate": 0.0001738695401192996,
"loss": 1.5638062953948975,
"step": 685
},
{
"epoch": 0.3956170703575548,
"grad_norm": 0.7218726873397827,
"learning_rate": 0.00017383105637868001,
"loss": 1.1434835195541382,
"step": 686
},
{
"epoch": 0.3961937716262976,
"grad_norm": 0.9958249926567078,
"learning_rate": 0.00017379257263806043,
"loss": 0.7441573143005371,
"step": 687
},
{
"epoch": 0.39677047289504036,
"grad_norm": 0.8222061395645142,
"learning_rate": 0.00017375408889744084,
"loss": 1.2088245153427124,
"step": 688
},
{
"epoch": 0.3973471741637832,
"grad_norm": 0.5759637355804443,
"learning_rate": 0.00017371560515682126,
"loss": 0.9504674077033997,
"step": 689
},
{
"epoch": 0.39792387543252594,
"grad_norm": 0.8157130479812622,
"learning_rate": 0.00017367712141620168,
"loss": 1.319948673248291,
"step": 690
},
{
"epoch": 0.39850057670126876,
"grad_norm": 0.7266381978988647,
"learning_rate": 0.0001736386376755821,
"loss": 0.8739478588104248,
"step": 691
},
{
"epoch": 0.3990772779700115,
"grad_norm": 0.644598126411438,
"learning_rate": 0.0001736001539349625,
"loss": 0.9521651864051819,
"step": 692
},
{
"epoch": 0.39965397923875434,
"grad_norm": 0.5922922492027283,
"learning_rate": 0.0001735616701943429,
"loss": 0.7051569223403931,
"step": 693
},
{
"epoch": 0.4002306805074971,
"grad_norm": 0.6880702972412109,
"learning_rate": 0.0001735231864537233,
"loss": 1.1202598810195923,
"step": 694
},
{
"epoch": 0.4008073817762399,
"grad_norm": 1.1836776733398438,
"learning_rate": 0.00017348470271310372,
"loss": 1.2588169574737549,
"step": 695
},
{
"epoch": 0.4013840830449827,
"grad_norm": 0.965606689453125,
"learning_rate": 0.00017344621897248414,
"loss": 0.7970831990242004,
"step": 696
},
{
"epoch": 0.4019607843137255,
"grad_norm": 0.8883787989616394,
"learning_rate": 0.00017340773523186456,
"loss": 1.6653708219528198,
"step": 697
},
{
"epoch": 0.40253748558246827,
"grad_norm": 0.7349938750267029,
"learning_rate": 0.00017336925149124497,
"loss": 0.7324041724205017,
"step": 698
},
{
"epoch": 0.4031141868512111,
"grad_norm": 1.0731885433197021,
"learning_rate": 0.00017333076775062539,
"loss": 0.9731301069259644,
"step": 699
},
{
"epoch": 0.40369088811995385,
"grad_norm": 0.8691738843917847,
"learning_rate": 0.0001732922840100058,
"loss": 1.0968525409698486,
"step": 700
},
{
"epoch": 0.40426758938869667,
"grad_norm": 0.921116292476654,
"learning_rate": 0.0001732538002693862,
"loss": 1.3427119255065918,
"step": 701
},
{
"epoch": 0.40484429065743943,
"grad_norm": 0.8539203405380249,
"learning_rate": 0.0001732153165287666,
"loss": 1.2618871927261353,
"step": 702
},
{
"epoch": 0.40542099192618225,
"grad_norm": 0.6238696575164795,
"learning_rate": 0.00017317683278814702,
"loss": 0.7679486274719238,
"step": 703
},
{
"epoch": 0.405997693194925,
"grad_norm": 0.6827321648597717,
"learning_rate": 0.00017313834904752743,
"loss": 0.9498722553253174,
"step": 704
},
{
"epoch": 0.40657439446366783,
"grad_norm": 0.9637985229492188,
"learning_rate": 0.00017309986530690785,
"loss": 1.2945339679718018,
"step": 705
},
{
"epoch": 0.4071510957324106,
"grad_norm": 0.6361503601074219,
"learning_rate": 0.00017306138156628827,
"loss": 1.2040516138076782,
"step": 706
},
{
"epoch": 0.4077277970011534,
"grad_norm": 0.713758647441864,
"learning_rate": 0.00017302289782566868,
"loss": 1.1285666227340698,
"step": 707
},
{
"epoch": 0.4083044982698962,
"grad_norm": 1.0620390176773071,
"learning_rate": 0.0001729844140850491,
"loss": 1.2117018699645996,
"step": 708
},
{
"epoch": 0.408881199538639,
"grad_norm": 0.6957300305366516,
"learning_rate": 0.00017294593034442948,
"loss": 1.2091706991195679,
"step": 709
},
{
"epoch": 0.40945790080738176,
"grad_norm": 0.4594845771789551,
"learning_rate": 0.0001729074466038099,
"loss": 3.3324732780456543,
"step": 710
},
{
"epoch": 0.4100346020761246,
"grad_norm": 0.8902932405471802,
"learning_rate": 0.00017286896286319031,
"loss": 1.1579055786132812,
"step": 711
},
{
"epoch": 0.41061130334486734,
"grad_norm": 0.7140578031539917,
"learning_rate": 0.00017283047912257073,
"loss": 0.877116858959198,
"step": 712
},
{
"epoch": 0.41118800461361016,
"grad_norm": 0.8449535369873047,
"learning_rate": 0.00017279199538195115,
"loss": 1.2400063276290894,
"step": 713
},
{
"epoch": 0.4117647058823529,
"grad_norm": 1.0700358152389526,
"learning_rate": 0.00017275351164133156,
"loss": 1.1401453018188477,
"step": 714
},
{
"epoch": 0.41234140715109574,
"grad_norm": 0.6705982685089111,
"learning_rate": 0.00017271502790071198,
"loss": 0.8326209783554077,
"step": 715
},
{
"epoch": 0.4129181084198385,
"grad_norm": 0.7149010896682739,
"learning_rate": 0.0001726765441600924,
"loss": 1.0872998237609863,
"step": 716
},
{
"epoch": 0.4134948096885813,
"grad_norm": 0.46808966994285583,
"learning_rate": 0.00017263806041947278,
"loss": 0.6795035004615784,
"step": 717
},
{
"epoch": 0.4140715109573241,
"grad_norm": 0.8606752157211304,
"learning_rate": 0.0001725995766788532,
"loss": 1.0544252395629883,
"step": 718
},
{
"epoch": 0.4146482122260669,
"grad_norm": 0.5839232802391052,
"learning_rate": 0.0001725610929382336,
"loss": 0.7785719633102417,
"step": 719
},
{
"epoch": 0.41522491349480967,
"grad_norm": 0.8700772523880005,
"learning_rate": 0.00017252260919761403,
"loss": 0.988602340221405,
"step": 720
},
{
"epoch": 0.4158016147635525,
"grad_norm": 0.9886090159416199,
"learning_rate": 0.00017248412545699444,
"loss": 1.3493539094924927,
"step": 721
},
{
"epoch": 0.41637831603229525,
"grad_norm": 0.9088316559791565,
"learning_rate": 0.00017244564171637486,
"loss": 1.0131090879440308,
"step": 722
},
{
"epoch": 0.41695501730103807,
"grad_norm": 0.9066189527511597,
"learning_rate": 0.00017240715797575527,
"loss": 1.2530944347381592,
"step": 723
},
{
"epoch": 0.41753171856978083,
"grad_norm": 0.7733665704727173,
"learning_rate": 0.00017236867423513569,
"loss": 1.1255629062652588,
"step": 724
},
{
"epoch": 0.41810841983852365,
"grad_norm": 0.609832763671875,
"learning_rate": 0.00017233019049451607,
"loss": 0.7514859437942505,
"step": 725
},
{
"epoch": 0.4186851211072664,
"grad_norm": 0.6903802752494812,
"learning_rate": 0.0001722917067538965,
"loss": 0.8925538063049316,
"step": 726
},
{
"epoch": 0.41926182237600923,
"grad_norm": 0.7692581415176392,
"learning_rate": 0.0001722532230132769,
"loss": 1.103420376777649,
"step": 727
},
{
"epoch": 0.419838523644752,
"grad_norm": 0.7881311774253845,
"learning_rate": 0.0001722147392726573,
"loss": 1.3109550476074219,
"step": 728
},
{
"epoch": 0.4204152249134948,
"grad_norm": 0.6949164271354675,
"learning_rate": 0.0001721762555320377,
"loss": 1.0904300212860107,
"step": 729
},
{
"epoch": 0.4209919261822376,
"grad_norm": 0.6746834516525269,
"learning_rate": 0.00017213777179141812,
"loss": 1.240382194519043,
"step": 730
},
{
"epoch": 0.4215686274509804,
"grad_norm": 0.8831079602241516,
"learning_rate": 0.00017209928805079854,
"loss": 1.546260118484497,
"step": 731
},
{
"epoch": 0.42214532871972316,
"grad_norm": 0.917523205280304,
"learning_rate": 0.00017206080431017895,
"loss": 1.3464173078536987,
"step": 732
},
{
"epoch": 0.422722029988466,
"grad_norm": 0.729640007019043,
"learning_rate": 0.00017202232056955937,
"loss": 0.9092597961425781,
"step": 733
},
{
"epoch": 0.4232987312572088,
"grad_norm": 0.9597057104110718,
"learning_rate": 0.00017198383682893976,
"loss": 1.449595332145691,
"step": 734
},
{
"epoch": 0.42387543252595156,
"grad_norm": 0.570996880531311,
"learning_rate": 0.00017194535308832017,
"loss": 0.660990297794342,
"step": 735
},
{
"epoch": 0.4244521337946944,
"grad_norm": 0.8485130071640015,
"learning_rate": 0.0001719068693477006,
"loss": 1.009351372718811,
"step": 736
},
{
"epoch": 0.42502883506343714,
"grad_norm": 1.1340487003326416,
"learning_rate": 0.000171868385607081,
"loss": 1.186898946762085,
"step": 737
},
{
"epoch": 0.42560553633217996,
"grad_norm": 0.9666796326637268,
"learning_rate": 0.00017182990186646142,
"loss": 1.3713027238845825,
"step": 738
},
{
"epoch": 0.4261822376009227,
"grad_norm": 0.8104447722434998,
"learning_rate": 0.00017179141812584183,
"loss": 0.7822756767272949,
"step": 739
},
{
"epoch": 0.42675893886966554,
"grad_norm": 0.7587509155273438,
"learning_rate": 0.00017175293438522225,
"loss": 1.1129992008209229,
"step": 740
},
{
"epoch": 0.4273356401384083,
"grad_norm": 0.854256272315979,
"learning_rate": 0.00017171445064460266,
"loss": 1.1753698587417603,
"step": 741
},
{
"epoch": 0.4279123414071511,
"grad_norm": 0.7335513234138489,
"learning_rate": 0.00017167596690398305,
"loss": 1.1233677864074707,
"step": 742
},
{
"epoch": 0.4284890426758939,
"grad_norm": 1.1383814811706543,
"learning_rate": 0.00017163748316336347,
"loss": 1.6328407526016235,
"step": 743
},
{
"epoch": 0.4290657439446367,
"grad_norm": 0.5805800557136536,
"learning_rate": 0.00017159899942274388,
"loss": 0.8374234437942505,
"step": 744
},
{
"epoch": 0.42964244521337946,
"grad_norm": 0.5744853615760803,
"learning_rate": 0.0001715605156821243,
"loss": 0.7072418332099915,
"step": 745
},
{
"epoch": 0.4302191464821223,
"grad_norm": 1.0968151092529297,
"learning_rate": 0.00017152203194150471,
"loss": 0.9308477640151978,
"step": 746
},
{
"epoch": 0.43079584775086505,
"grad_norm": 0.7771037220954895,
"learning_rate": 0.00017148354820088513,
"loss": 1.0803910493850708,
"step": 747
},
{
"epoch": 0.43137254901960786,
"grad_norm": 0.760296106338501,
"learning_rate": 0.00017144506446026554,
"loss": 0.9416469931602478,
"step": 748
},
{
"epoch": 0.43194925028835063,
"grad_norm": 0.8478863835334778,
"learning_rate": 0.00017140658071964596,
"loss": 1.0037909746170044,
"step": 749
},
{
"epoch": 0.43252595155709345,
"grad_norm": 0.802010715007782,
"learning_rate": 0.00017136809697902635,
"loss": 1.2789827585220337,
"step": 750
},
{
"epoch": 0.4331026528258362,
"grad_norm": 0.7146703004837036,
"learning_rate": 0.00017132961323840676,
"loss": 0.925313413143158,
"step": 751
},
{
"epoch": 0.43367935409457903,
"grad_norm": 1.1419707536697388,
"learning_rate": 0.00017129112949778718,
"loss": 1.3266316652297974,
"step": 752
},
{
"epoch": 0.4342560553633218,
"grad_norm": 0.5337522029876709,
"learning_rate": 0.0001712526457571676,
"loss": 0.8182927966117859,
"step": 753
},
{
"epoch": 0.4348327566320646,
"grad_norm": 0.7067147493362427,
"learning_rate": 0.000171214162016548,
"loss": 1.01529061794281,
"step": 754
},
{
"epoch": 0.4354094579008074,
"grad_norm": 0.8742361664772034,
"learning_rate": 0.00017117567827592842,
"loss": 0.9216449856758118,
"step": 755
},
{
"epoch": 0.4359861591695502,
"grad_norm": 1.0121413469314575,
"learning_rate": 0.00017113719453530884,
"loss": 1.5315768718719482,
"step": 756
},
{
"epoch": 0.43656286043829295,
"grad_norm": 0.970582127571106,
"learning_rate": 0.00017109871079468925,
"loss": 1.1701881885528564,
"step": 757
},
{
"epoch": 0.4371395617070358,
"grad_norm": 0.8317894339561462,
"learning_rate": 0.00017106022705406964,
"loss": 1.1619702577590942,
"step": 758
},
{
"epoch": 0.43771626297577854,
"grad_norm": 0.6935670375823975,
"learning_rate": 0.00017102174331345006,
"loss": 1.0018664598464966,
"step": 759
},
{
"epoch": 0.43829296424452135,
"grad_norm": 1.0123279094696045,
"learning_rate": 0.00017098325957283047,
"loss": 1.1231794357299805,
"step": 760
},
{
"epoch": 0.4388696655132641,
"grad_norm": 0.7619280219078064,
"learning_rate": 0.0001709447758322109,
"loss": 1.0395662784576416,
"step": 761
},
{
"epoch": 0.43944636678200694,
"grad_norm": 0.8570308089256287,
"learning_rate": 0.0001709062920915913,
"loss": 1.4022446870803833,
"step": 762
},
{
"epoch": 0.4400230680507497,
"grad_norm": 1.178285837173462,
"learning_rate": 0.00017086780835097172,
"loss": 1.5245153903961182,
"step": 763
},
{
"epoch": 0.4405997693194925,
"grad_norm": 0.876589298248291,
"learning_rate": 0.00017082932461035213,
"loss": 1.482165813446045,
"step": 764
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.8614532947540283,
"learning_rate": 0.00017079084086973255,
"loss": 1.312232255935669,
"step": 765
},
{
"epoch": 0.4417531718569781,
"grad_norm": 0.6772201061248779,
"learning_rate": 0.00017075235712911294,
"loss": 1.1610076427459717,
"step": 766
},
{
"epoch": 0.44232987312572086,
"grad_norm": 0.805927038192749,
"learning_rate": 0.00017071387338849335,
"loss": 1.3874244689941406,
"step": 767
},
{
"epoch": 0.4429065743944637,
"grad_norm": 0.5419954061508179,
"learning_rate": 0.00017067538964787377,
"loss": 0.7610808610916138,
"step": 768
},
{
"epoch": 0.44348327566320644,
"grad_norm": 0.773598313331604,
"learning_rate": 0.00017063690590725418,
"loss": 0.8612810373306274,
"step": 769
},
{
"epoch": 0.44405997693194926,
"grad_norm": 0.6376165151596069,
"learning_rate": 0.0001705984221666346,
"loss": 0.8417828679084778,
"step": 770
},
{
"epoch": 0.444636678200692,
"grad_norm": 0.6870789527893066,
"learning_rate": 0.00017055993842601501,
"loss": 1.1764918565750122,
"step": 771
},
{
"epoch": 0.44521337946943484,
"grad_norm": 0.5562968254089355,
"learning_rate": 0.00017052145468539543,
"loss": 0.8358933925628662,
"step": 772
},
{
"epoch": 0.4457900807381776,
"grad_norm": 0.602963924407959,
"learning_rate": 0.00017048297094477585,
"loss": 1.197677731513977,
"step": 773
},
{
"epoch": 0.4463667820069204,
"grad_norm": 1.0190907716751099,
"learning_rate": 0.00017044448720415623,
"loss": 1.4355199337005615,
"step": 774
},
{
"epoch": 0.4469434832756632,
"grad_norm": 0.633346676826477,
"learning_rate": 0.00017040600346353665,
"loss": 0.7924656867980957,
"step": 775
},
{
"epoch": 0.447520184544406,
"grad_norm": 0.797099232673645,
"learning_rate": 0.00017036751972291706,
"loss": 1.2302619218826294,
"step": 776
},
{
"epoch": 0.44809688581314877,
"grad_norm": 0.7166492938995361,
"learning_rate": 0.00017032903598229748,
"loss": 1.063340187072754,
"step": 777
},
{
"epoch": 0.4486735870818916,
"grad_norm": 0.9511370062828064,
"learning_rate": 0.0001702905522416779,
"loss": 0.8998168706893921,
"step": 778
},
{
"epoch": 0.44925028835063435,
"grad_norm": 0.8487029075622559,
"learning_rate": 0.0001702520685010583,
"loss": 1.1850653886795044,
"step": 779
},
{
"epoch": 0.44982698961937717,
"grad_norm": 1.0267854928970337,
"learning_rate": 0.00017021358476043873,
"loss": 1.246724009513855,
"step": 780
},
{
"epoch": 0.45040369088811993,
"grad_norm": 1.155428409576416,
"learning_rate": 0.00017017510101981914,
"loss": 1.539854884147644,
"step": 781
},
{
"epoch": 0.45098039215686275,
"grad_norm": 0.6774823069572449,
"learning_rate": 0.00017013661727919953,
"loss": 0.7472063302993774,
"step": 782
},
{
"epoch": 0.4515570934256055,
"grad_norm": 0.7500667572021484,
"learning_rate": 0.00017009813353857994,
"loss": 0.9946876168251038,
"step": 783
},
{
"epoch": 0.45213379469434833,
"grad_norm": 0.7643426656723022,
"learning_rate": 0.00017005964979796036,
"loss": 0.8451071977615356,
"step": 784
},
{
"epoch": 0.4527104959630911,
"grad_norm": 0.721379816532135,
"learning_rate": 0.00017002116605734077,
"loss": 0.9988998174667358,
"step": 785
},
{
"epoch": 0.4532871972318339,
"grad_norm": 0.8850287199020386,
"learning_rate": 0.0001699826823167212,
"loss": 0.9789897203445435,
"step": 786
},
{
"epoch": 0.4538638985005767,
"grad_norm": 1.0076375007629395,
"learning_rate": 0.0001699441985761016,
"loss": 1.3830417394638062,
"step": 787
},
{
"epoch": 0.4544405997693195,
"grad_norm": 0.6105207204818726,
"learning_rate": 0.00016990571483548202,
"loss": 0.8870081901550293,
"step": 788
},
{
"epoch": 0.45501730103806226,
"grad_norm": 0.7732753157615662,
"learning_rate": 0.00016986723109486244,
"loss": 0.9958963990211487,
"step": 789
},
{
"epoch": 0.4555940023068051,
"grad_norm": 0.9871165156364441,
"learning_rate": 0.00016982874735424282,
"loss": 1.1141139268875122,
"step": 790
},
{
"epoch": 0.45617070357554784,
"grad_norm": 0.7117231488227844,
"learning_rate": 0.00016979026361362324,
"loss": 1.0168585777282715,
"step": 791
},
{
"epoch": 0.45674740484429066,
"grad_norm": 0.6954454183578491,
"learning_rate": 0.00016975177987300365,
"loss": 0.9319931268692017,
"step": 792
},
{
"epoch": 0.4573241061130334,
"grad_norm": 0.6463753581047058,
"learning_rate": 0.00016971329613238407,
"loss": 0.9734832644462585,
"step": 793
},
{
"epoch": 0.45790080738177624,
"grad_norm": 0.7156365513801575,
"learning_rate": 0.00016967481239176448,
"loss": 1.0014495849609375,
"step": 794
},
{
"epoch": 0.458477508650519,
"grad_norm": 0.8648508787155151,
"learning_rate": 0.0001696363286511449,
"loss": 1.3907616138458252,
"step": 795
},
{
"epoch": 0.4590542099192618,
"grad_norm": 0.8066338300704956,
"learning_rate": 0.00016959784491052532,
"loss": 1.0530327558517456,
"step": 796
},
{
"epoch": 0.4596309111880046,
"grad_norm": 0.8617266416549683,
"learning_rate": 0.00016955936116990573,
"loss": 1.7989249229431152,
"step": 797
},
{
"epoch": 0.4602076124567474,
"grad_norm": 0.7956259250640869,
"learning_rate": 0.00016952087742928612,
"loss": 0.928198516368866,
"step": 798
},
{
"epoch": 0.46078431372549017,
"grad_norm": 0.8778709173202515,
"learning_rate": 0.00016948239368866653,
"loss": 0.9466978907585144,
"step": 799
},
{
"epoch": 0.461361014994233,
"grad_norm": 0.8518659472465515,
"learning_rate": 0.00016944390994804695,
"loss": 1.0593540668487549,
"step": 800
},
{
"epoch": 0.4619377162629758,
"grad_norm": 0.79550701379776,
"learning_rate": 0.00016940542620742736,
"loss": 1.1164321899414062,
"step": 801
},
{
"epoch": 0.46251441753171857,
"grad_norm": 1.0006239414215088,
"learning_rate": 0.00016936694246680778,
"loss": 1.160499930381775,
"step": 802
},
{
"epoch": 0.4630911188004614,
"grad_norm": 0.8525403738021851,
"learning_rate": 0.0001693284587261882,
"loss": 1.0770652294158936,
"step": 803
},
{
"epoch": 0.46366782006920415,
"grad_norm": 0.6851354837417603,
"learning_rate": 0.0001692899749855686,
"loss": 1.0310590267181396,
"step": 804
},
{
"epoch": 0.46424452133794697,
"grad_norm": 0.6831552386283875,
"learning_rate": 0.000169251491244949,
"loss": 1.0782524347305298,
"step": 805
},
{
"epoch": 0.46482122260668973,
"grad_norm": 0.8892863988876343,
"learning_rate": 0.00016921300750432941,
"loss": 1.3154478073120117,
"step": 806
},
{
"epoch": 0.46539792387543255,
"grad_norm": 0.6863577961921692,
"learning_rate": 0.00016917452376370983,
"loss": 0.5912436842918396,
"step": 807
},
{
"epoch": 0.4659746251441753,
"grad_norm": 0.8612192869186401,
"learning_rate": 0.00016913604002309024,
"loss": 1.0140503644943237,
"step": 808
},
{
"epoch": 0.46655132641291813,
"grad_norm": 0.6565495729446411,
"learning_rate": 0.00016909755628247066,
"loss": 0.8388250470161438,
"step": 809
},
{
"epoch": 0.4671280276816609,
"grad_norm": 0.5729434490203857,
"learning_rate": 0.00016905907254185107,
"loss": 0.8662521839141846,
"step": 810
},
{
"epoch": 0.4677047289504037,
"grad_norm": 0.8261442184448242,
"learning_rate": 0.0001690205888012315,
"loss": 1.1527458429336548,
"step": 811
},
{
"epoch": 0.4682814302191465,
"grad_norm": 0.6182582974433899,
"learning_rate": 0.0001689821050606119,
"loss": 0.7817882895469666,
"step": 812
},
{
"epoch": 0.4688581314878893,
"grad_norm": 0.5987662672996521,
"learning_rate": 0.0001689436213199923,
"loss": 0.864625871181488,
"step": 813
},
{
"epoch": 0.46943483275663206,
"grad_norm": 0.8617327809333801,
"learning_rate": 0.0001689051375793727,
"loss": 1.1531751155853271,
"step": 814
},
{
"epoch": 0.4700115340253749,
"grad_norm": 0.8277755379676819,
"learning_rate": 0.00016886665383875312,
"loss": 0.928108811378479,
"step": 815
},
{
"epoch": 0.47058823529411764,
"grad_norm": 0.7510029673576355,
"learning_rate": 0.00016882817009813354,
"loss": 1.0068414211273193,
"step": 816
},
{
"epoch": 0.47116493656286046,
"grad_norm": 0.8691316246986389,
"learning_rate": 0.00016878968635751395,
"loss": 1.0941516160964966,
"step": 817
},
{
"epoch": 0.4717416378316032,
"grad_norm": 0.581984281539917,
"learning_rate": 0.00016875120261689437,
"loss": 0.6039727926254272,
"step": 818
},
{
"epoch": 0.47231833910034604,
"grad_norm": 0.7486310005187988,
"learning_rate": 0.00016871271887627479,
"loss": 1.140452265739441,
"step": 819
},
{
"epoch": 0.4728950403690888,
"grad_norm": 0.8794305324554443,
"learning_rate": 0.0001686742351356552,
"loss": 1.2717854976654053,
"step": 820
},
{
"epoch": 0.4734717416378316,
"grad_norm": 0.8812481164932251,
"learning_rate": 0.0001686357513950356,
"loss": 0.9813717007637024,
"step": 821
},
{
"epoch": 0.4740484429065744,
"grad_norm": 0.9091891646385193,
"learning_rate": 0.000168597267654416,
"loss": 1.2938401699066162,
"step": 822
},
{
"epoch": 0.4746251441753172,
"grad_norm": 0.9045780301094055,
"learning_rate": 0.00016855878391379642,
"loss": 1.312792181968689,
"step": 823
},
{
"epoch": 0.47520184544405997,
"grad_norm": 0.8430265784263611,
"learning_rate": 0.00016852030017317683,
"loss": 1.2679914236068726,
"step": 824
},
{
"epoch": 0.4757785467128028,
"grad_norm": 0.6870001554489136,
"learning_rate": 0.00016848181643255725,
"loss": 0.970576822757721,
"step": 825
},
{
"epoch": 0.47635524798154555,
"grad_norm": 0.8256406188011169,
"learning_rate": 0.00016844333269193767,
"loss": 1.302760362625122,
"step": 826
},
{
"epoch": 0.47693194925028837,
"grad_norm": 0.7057660222053528,
"learning_rate": 0.00016840484895131808,
"loss": 0.9811574220657349,
"step": 827
},
{
"epoch": 0.47750865051903113,
"grad_norm": 0.8487821817398071,
"learning_rate": 0.0001683663652106985,
"loss": 1.0537941455841064,
"step": 828
},
{
"epoch": 0.47808535178777395,
"grad_norm": 0.7474492788314819,
"learning_rate": 0.00016832788147007888,
"loss": 0.856541633605957,
"step": 829
},
{
"epoch": 0.4786620530565167,
"grad_norm": 0.9228368401527405,
"learning_rate": 0.0001682893977294593,
"loss": 1.0505741834640503,
"step": 830
},
{
"epoch": 0.47923875432525953,
"grad_norm": 0.9288182854652405,
"learning_rate": 0.00016825091398883971,
"loss": 1.3584654331207275,
"step": 831
},
{
"epoch": 0.4798154555940023,
"grad_norm": 1.4403129816055298,
"learning_rate": 0.00016821243024822013,
"loss": 1.911801815032959,
"step": 832
},
{
"epoch": 0.4803921568627451,
"grad_norm": 0.6283893585205078,
"learning_rate": 0.00016817394650760055,
"loss": 0.8583131432533264,
"step": 833
},
{
"epoch": 0.4809688581314879,
"grad_norm": 0.6910902261734009,
"learning_rate": 0.00016813546276698096,
"loss": 1.3508315086364746,
"step": 834
},
{
"epoch": 0.4815455594002307,
"grad_norm": 0.6606875658035278,
"learning_rate": 0.00016809697902636138,
"loss": 1.0815465450286865,
"step": 835
},
{
"epoch": 0.48212226066897346,
"grad_norm": 0.8546112775802612,
"learning_rate": 0.0001680584952857418,
"loss": 1.2201032638549805,
"step": 836
},
{
"epoch": 0.4826989619377163,
"grad_norm": 0.9130816459655762,
"learning_rate": 0.00016802001154512218,
"loss": 1.208343744277954,
"step": 837
},
{
"epoch": 0.48327566320645904,
"grad_norm": 0.7690496444702148,
"learning_rate": 0.0001679815278045026,
"loss": 1.0452954769134521,
"step": 838
},
{
"epoch": 0.48385236447520186,
"grad_norm": 0.7210266590118408,
"learning_rate": 0.000167943044063883,
"loss": 0.7897384166717529,
"step": 839
},
{
"epoch": 0.4844290657439446,
"grad_norm": 0.5705054402351379,
"learning_rate": 0.00016790456032326342,
"loss": 0.8288441896438599,
"step": 840
},
{
"epoch": 0.48500576701268744,
"grad_norm": 0.6143510341644287,
"learning_rate": 0.00016786607658264384,
"loss": 0.8081311583518982,
"step": 841
},
{
"epoch": 0.4855824682814302,
"grad_norm": 0.7222305536270142,
"learning_rate": 0.00016782759284202426,
"loss": 1.1107532978057861,
"step": 842
},
{
"epoch": 0.486159169550173,
"grad_norm": 0.6712546944618225,
"learning_rate": 0.00016778910910140467,
"loss": 0.8375999927520752,
"step": 843
},
{
"epoch": 0.4867358708189158,
"grad_norm": 0.9085020422935486,
"learning_rate": 0.00016775062536078509,
"loss": 0.9624453186988831,
"step": 844
},
{
"epoch": 0.4873125720876586,
"grad_norm": 0.773102879524231,
"learning_rate": 0.00016771214162016547,
"loss": 1.0454928874969482,
"step": 845
},
{
"epoch": 0.48788927335640137,
"grad_norm": 0.5635338425636292,
"learning_rate": 0.0001676736578795459,
"loss": 0.7329631447792053,
"step": 846
},
{
"epoch": 0.4884659746251442,
"grad_norm": 0.8183399438858032,
"learning_rate": 0.0001676351741389263,
"loss": 0.859244704246521,
"step": 847
},
{
"epoch": 0.48904267589388695,
"grad_norm": 0.7920128107070923,
"learning_rate": 0.00016759669039830672,
"loss": 0.9889219403266907,
"step": 848
},
{
"epoch": 0.48961937716262977,
"grad_norm": 1.1391570568084717,
"learning_rate": 0.00016755820665768714,
"loss": 1.146942138671875,
"step": 849
},
{
"epoch": 0.49019607843137253,
"grad_norm": 0.6648845076560974,
"learning_rate": 0.00016751972291706755,
"loss": 0.7090552449226379,
"step": 850
},
{
"epoch": 0.49077277970011535,
"grad_norm": 0.7156478762626648,
"learning_rate": 0.00016748123917644797,
"loss": 0.7772218585014343,
"step": 851
},
{
"epoch": 0.4913494809688581,
"grad_norm": 0.7279021739959717,
"learning_rate": 0.00016744275543582838,
"loss": 1.0468722581863403,
"step": 852
},
{
"epoch": 0.49192618223760093,
"grad_norm": 1.0862352848052979,
"learning_rate": 0.00016740427169520877,
"loss": 1.3199949264526367,
"step": 853
},
{
"epoch": 0.4925028835063437,
"grad_norm": 0.5989871025085449,
"learning_rate": 0.00016736578795458918,
"loss": 0.7066143751144409,
"step": 854
},
{
"epoch": 0.4930795847750865,
"grad_norm": 0.88418048620224,
"learning_rate": 0.0001673273042139696,
"loss": 0.9679941534996033,
"step": 855
},
{
"epoch": 0.4936562860438293,
"grad_norm": 0.7538619637489319,
"learning_rate": 0.00016728882047335002,
"loss": 0.906350314617157,
"step": 856
},
{
"epoch": 0.4942329873125721,
"grad_norm": 1.0406384468078613,
"learning_rate": 0.00016725033673273043,
"loss": 1.0761326551437378,
"step": 857
},
{
"epoch": 0.49480968858131485,
"grad_norm": 0.9118819236755371,
"learning_rate": 0.00016721185299211085,
"loss": 1.449715495109558,
"step": 858
},
{
"epoch": 0.4953863898500577,
"grad_norm": 0.7859880328178406,
"learning_rate": 0.00016717336925149126,
"loss": 1.0066848993301392,
"step": 859
},
{
"epoch": 0.49596309111880044,
"grad_norm": 0.7971929907798767,
"learning_rate": 0.00016713488551087168,
"loss": 1.0836429595947266,
"step": 860
},
{
"epoch": 0.49653979238754326,
"grad_norm": 0.7688129544258118,
"learning_rate": 0.00016709640177025206,
"loss": 0.8990678191184998,
"step": 861
},
{
"epoch": 0.497116493656286,
"grad_norm": 0.6911450028419495,
"learning_rate": 0.00016705791802963248,
"loss": 0.9118435382843018,
"step": 862
},
{
"epoch": 0.49769319492502884,
"grad_norm": 0.9296817183494568,
"learning_rate": 0.0001670194342890129,
"loss": 1.0580615997314453,
"step": 863
},
{
"epoch": 0.4982698961937716,
"grad_norm": 0.5820940732955933,
"learning_rate": 0.0001669809505483933,
"loss": 0.6944743394851685,
"step": 864
},
{
"epoch": 0.4988465974625144,
"grad_norm": 0.9766574501991272,
"learning_rate": 0.00016694246680777373,
"loss": 1.4097439050674438,
"step": 865
},
{
"epoch": 0.4994232987312572,
"grad_norm": 0.658211350440979,
"learning_rate": 0.00016690398306715414,
"loss": 0.7773644924163818,
"step": 866
},
{
"epoch": 0.5,
"grad_norm": 0.7480500340461731,
"learning_rate": 0.00016686549932653456,
"loss": 1.1536113023757935,
"step": 867
},
{
"epoch": 0.5005767012687428,
"grad_norm": 0.5885343551635742,
"learning_rate": 0.00016682701558591497,
"loss": 0.5359970927238464,
"step": 868
},
{
"epoch": 0.5011534025374856,
"grad_norm": 0.7808444499969482,
"learning_rate": 0.00016678853184529536,
"loss": 0.6940274834632874,
"step": 869
},
{
"epoch": 0.5017301038062284,
"grad_norm": 0.8007370233535767,
"learning_rate": 0.00016675004810467577,
"loss": 1.3268241882324219,
"step": 870
},
{
"epoch": 0.5023068050749712,
"grad_norm": 0.6729685068130493,
"learning_rate": 0.0001667115643640562,
"loss": 0.9482746124267578,
"step": 871
},
{
"epoch": 0.5028835063437139,
"grad_norm": 0.648239016532898,
"learning_rate": 0.0001666730806234366,
"loss": 0.9904931783676147,
"step": 872
},
{
"epoch": 0.5034602076124568,
"grad_norm": 0.7997180223464966,
"learning_rate": 0.00016663459688281702,
"loss": 1.0594019889831543,
"step": 873
},
{
"epoch": 0.5040369088811996,
"grad_norm": 0.8298223614692688,
"learning_rate": 0.00016659611314219744,
"loss": 0.9604882597923279,
"step": 874
},
{
"epoch": 0.5046136101499423,
"grad_norm": 0.8724483251571655,
"learning_rate": 0.00016655762940157785,
"loss": 1.0515791177749634,
"step": 875
},
{
"epoch": 0.5051903114186851,
"grad_norm": 0.7477858662605286,
"learning_rate": 0.00016651914566095827,
"loss": 1.0346887111663818,
"step": 876
},
{
"epoch": 0.505767012687428,
"grad_norm": 0.6524494886398315,
"learning_rate": 0.00016648066192033865,
"loss": 0.8699806928634644,
"step": 877
},
{
"epoch": 0.5063437139561707,
"grad_norm": 0.7959410548210144,
"learning_rate": 0.00016644217817971907,
"loss": 1.0138338804244995,
"step": 878
},
{
"epoch": 0.5069204152249135,
"grad_norm": 0.7872818112373352,
"learning_rate": 0.00016640369443909949,
"loss": 1.0084038972854614,
"step": 879
},
{
"epoch": 0.5074971164936563,
"grad_norm": 0.9153385758399963,
"learning_rate": 0.0001663652106984799,
"loss": 0.9120053052902222,
"step": 880
},
{
"epoch": 0.5080738177623991,
"grad_norm": 0.8691549301147461,
"learning_rate": 0.00016632672695786032,
"loss": 0.9792031645774841,
"step": 881
},
{
"epoch": 0.5086505190311419,
"grad_norm": 0.7193480730056763,
"learning_rate": 0.00016628824321724073,
"loss": 0.9441159963607788,
"step": 882
},
{
"epoch": 0.5092272202998847,
"grad_norm": 0.5675065517425537,
"learning_rate": 0.00016624975947662115,
"loss": 0.7550349235534668,
"step": 883
},
{
"epoch": 0.5098039215686274,
"grad_norm": 0.45122864842414856,
"learning_rate": 0.00016621127573600156,
"loss": 0.494687020778656,
"step": 884
},
{
"epoch": 0.5103806228373703,
"grad_norm": 0.5535047650337219,
"learning_rate": 0.00016617279199538195,
"loss": 1.0048768520355225,
"step": 885
},
{
"epoch": 0.510957324106113,
"grad_norm": 1.1627446413040161,
"learning_rate": 0.00016613430825476237,
"loss": 1.3231415748596191,
"step": 886
},
{
"epoch": 0.5115340253748558,
"grad_norm": 0.5924594402313232,
"learning_rate": 0.00016609582451414278,
"loss": 0.8373284339904785,
"step": 887
},
{
"epoch": 0.5121107266435986,
"grad_norm": 1.071594476699829,
"learning_rate": 0.0001660573407735232,
"loss": 1.1695808172225952,
"step": 888
},
{
"epoch": 0.5126874279123415,
"grad_norm": 0.7243885397911072,
"learning_rate": 0.0001660188570329036,
"loss": 0.9688019156455994,
"step": 889
},
{
"epoch": 0.5132641291810842,
"grad_norm": 0.7857576012611389,
"learning_rate": 0.00016598037329228403,
"loss": 0.9062821269035339,
"step": 890
},
{
"epoch": 0.513840830449827,
"grad_norm": 0.6501168012619019,
"learning_rate": 0.00016594188955166444,
"loss": 0.7230191230773926,
"step": 891
},
{
"epoch": 0.5144175317185697,
"grad_norm": 0.7679166197776794,
"learning_rate": 0.00016590340581104483,
"loss": 0.9849987030029297,
"step": 892
},
{
"epoch": 0.5149942329873126,
"grad_norm": 0.5687773823738098,
"learning_rate": 0.00016586492207042524,
"loss": 0.5315793752670288,
"step": 893
},
{
"epoch": 0.5155709342560554,
"grad_norm": 0.5201639533042908,
"learning_rate": 0.00016582643832980566,
"loss": 0.833229660987854,
"step": 894
},
{
"epoch": 0.5161476355247981,
"grad_norm": 0.9703792333602905,
"learning_rate": 0.00016578795458918608,
"loss": 1.2787346839904785,
"step": 895
},
{
"epoch": 0.5167243367935409,
"grad_norm": 0.5964572429656982,
"learning_rate": 0.0001657494708485665,
"loss": 0.8054360151290894,
"step": 896
},
{
"epoch": 0.5173010380622838,
"grad_norm": 0.8156993389129639,
"learning_rate": 0.0001657109871079469,
"loss": 1.1183547973632812,
"step": 897
},
{
"epoch": 0.5178777393310265,
"grad_norm": 0.9944779276847839,
"learning_rate": 0.00016567250336732732,
"loss": 1.4230319261550903,
"step": 898
},
{
"epoch": 0.5184544405997693,
"grad_norm": 0.6466273069381714,
"learning_rate": 0.00016563401962670774,
"loss": 0.9248323440551758,
"step": 899
},
{
"epoch": 0.5190311418685121,
"grad_norm": 0.6486216187477112,
"learning_rate": 0.00016559553588608812,
"loss": 0.8279266357421875,
"step": 900
},
{
"epoch": 0.5196078431372549,
"grad_norm": 0.8492687940597534,
"learning_rate": 0.00016555705214546854,
"loss": 1.1167151927947998,
"step": 901
},
{
"epoch": 0.5201845444059977,
"grad_norm": 0.7403521537780762,
"learning_rate": 0.00016551856840484896,
"loss": 0.9129210710525513,
"step": 902
},
{
"epoch": 0.5207612456747405,
"grad_norm": 0.9525539875030518,
"learning_rate": 0.00016548008466422937,
"loss": 1.0805696249008179,
"step": 903
},
{
"epoch": 0.5213379469434832,
"grad_norm": 0.6410759091377258,
"learning_rate": 0.00016544160092360979,
"loss": 0.7183154821395874,
"step": 904
},
{
"epoch": 0.5219146482122261,
"grad_norm": 0.9240155816078186,
"learning_rate": 0.0001654031171829902,
"loss": 1.2977594137191772,
"step": 905
},
{
"epoch": 0.5224913494809689,
"grad_norm": 0.5909906625747681,
"learning_rate": 0.00016536463344237062,
"loss": 0.8771336078643799,
"step": 906
},
{
"epoch": 0.5230680507497116,
"grad_norm": 0.6739245653152466,
"learning_rate": 0.00016532614970175103,
"loss": 0.9435271620750427,
"step": 907
},
{
"epoch": 0.5236447520184544,
"grad_norm": 0.7840787172317505,
"learning_rate": 0.00016528766596113142,
"loss": 0.9116816520690918,
"step": 908
},
{
"epoch": 0.5242214532871973,
"grad_norm": 0.7001404762268066,
"learning_rate": 0.00016524918222051184,
"loss": 0.7686711549758911,
"step": 909
},
{
"epoch": 0.52479815455594,
"grad_norm": 0.7492363452911377,
"learning_rate": 0.00016521069847989225,
"loss": 0.894406795501709,
"step": 910
},
{
"epoch": 0.5253748558246828,
"grad_norm": 0.6643780469894409,
"learning_rate": 0.00016517221473927267,
"loss": 0.9077553153038025,
"step": 911
},
{
"epoch": 0.5259515570934256,
"grad_norm": 0.6426498889923096,
"learning_rate": 0.00016513373099865308,
"loss": 0.7784804701805115,
"step": 912
},
{
"epoch": 0.5265282583621684,
"grad_norm": 0.6445097923278809,
"learning_rate": 0.0001650952472580335,
"loss": 0.8351481556892395,
"step": 913
},
{
"epoch": 0.5271049596309112,
"grad_norm": 0.9749622344970703,
"learning_rate": 0.0001650567635174139,
"loss": 1.3779326677322388,
"step": 914
},
{
"epoch": 0.527681660899654,
"grad_norm": 1.0297281742095947,
"learning_rate": 0.00016501827977679433,
"loss": 1.4258373975753784,
"step": 915
},
{
"epoch": 0.5282583621683967,
"grad_norm": 0.8116568326950073,
"learning_rate": 0.00016497979603617472,
"loss": 1.120481252670288,
"step": 916
},
{
"epoch": 0.5288350634371396,
"grad_norm": 0.8832195401191711,
"learning_rate": 0.00016494131229555513,
"loss": 1.0475956201553345,
"step": 917
},
{
"epoch": 0.5294117647058824,
"grad_norm": 0.7668746709823608,
"learning_rate": 0.00016490282855493555,
"loss": 0.9356057643890381,
"step": 918
},
{
"epoch": 0.5299884659746251,
"grad_norm": 0.7938312292098999,
"learning_rate": 0.00016486434481431596,
"loss": 1.0766160488128662,
"step": 919
},
{
"epoch": 0.5305651672433679,
"grad_norm": 0.6379091739654541,
"learning_rate": 0.00016482586107369638,
"loss": 0.8664296865463257,
"step": 920
},
{
"epoch": 0.5311418685121108,
"grad_norm": 0.5966930389404297,
"learning_rate": 0.0001647873773330768,
"loss": 0.7848939299583435,
"step": 921
},
{
"epoch": 0.5317185697808535,
"grad_norm": 0.7270369529724121,
"learning_rate": 0.0001647488935924572,
"loss": 0.8690502643585205,
"step": 922
},
{
"epoch": 0.5322952710495963,
"grad_norm": 0.7373891472816467,
"learning_rate": 0.00016471040985183762,
"loss": 0.9187401533126831,
"step": 923
},
{
"epoch": 0.532871972318339,
"grad_norm": 0.6114344596862793,
"learning_rate": 0.000164671926111218,
"loss": 0.7336284518241882,
"step": 924
},
{
"epoch": 0.5334486735870819,
"grad_norm": 0.7629640102386475,
"learning_rate": 0.00016463344237059843,
"loss": 1.0568023920059204,
"step": 925
},
{
"epoch": 0.5340253748558247,
"grad_norm": 0.5172185897827148,
"learning_rate": 0.00016459495862997884,
"loss": 0.6043404936790466,
"step": 926
},
{
"epoch": 0.5346020761245674,
"grad_norm": 0.6732125282287598,
"learning_rate": 0.00016455647488935926,
"loss": 0.7869133353233337,
"step": 927
},
{
"epoch": 0.5351787773933102,
"grad_norm": 0.993881344795227,
"learning_rate": 0.00016451799114873967,
"loss": 1.3750996589660645,
"step": 928
},
{
"epoch": 0.5357554786620531,
"grad_norm": 0.6748846173286438,
"learning_rate": 0.0001644795074081201,
"loss": 0.7957302331924438,
"step": 929
},
{
"epoch": 0.5363321799307958,
"grad_norm": 0.5961597561836243,
"learning_rate": 0.0001644410236675005,
"loss": 0.817986786365509,
"step": 930
},
{
"epoch": 0.5369088811995386,
"grad_norm": 0.8336942195892334,
"learning_rate": 0.00016440253992688092,
"loss": 1.071876883506775,
"step": 931
},
{
"epoch": 0.5374855824682814,
"grad_norm": 0.8322470784187317,
"learning_rate": 0.0001643640561862613,
"loss": 0.9675548672676086,
"step": 932
},
{
"epoch": 0.5380622837370242,
"grad_norm": 0.8054575324058533,
"learning_rate": 0.00016432557244564172,
"loss": 1.0018256902694702,
"step": 933
},
{
"epoch": 0.538638985005767,
"grad_norm": 0.7546166181564331,
"learning_rate": 0.00016428708870502214,
"loss": 0.9199832677841187,
"step": 934
},
{
"epoch": 0.5392156862745098,
"grad_norm": 0.6384134292602539,
"learning_rate": 0.00016424860496440255,
"loss": 0.5693946480751038,
"step": 935
},
{
"epoch": 0.5397923875432526,
"grad_norm": 0.8509575128555298,
"learning_rate": 0.00016421012122378297,
"loss": 1.3604402542114258,
"step": 936
},
{
"epoch": 0.5403690888119954,
"grad_norm": 1.0863171815872192,
"learning_rate": 0.00016417163748316338,
"loss": 1.441767692565918,
"step": 937
},
{
"epoch": 0.5409457900807382,
"grad_norm": 0.7680332064628601,
"learning_rate": 0.0001641331537425438,
"loss": 0.8990482091903687,
"step": 938
},
{
"epoch": 0.5415224913494809,
"grad_norm": 0.9804447889328003,
"learning_rate": 0.0001640946700019242,
"loss": 1.0421537160873413,
"step": 939
},
{
"epoch": 0.5420991926182238,
"grad_norm": 1.0693145990371704,
"learning_rate": 0.0001640561862613046,
"loss": 1.1600146293640137,
"step": 940
},
{
"epoch": 0.5426758938869666,
"grad_norm": 0.8488958477973938,
"learning_rate": 0.00016401770252068502,
"loss": 1.2710307836532593,
"step": 941
},
{
"epoch": 0.5432525951557093,
"grad_norm": 1.048317313194275,
"learning_rate": 0.00016397921878006543,
"loss": 0.8453274369239807,
"step": 942
},
{
"epoch": 0.5438292964244521,
"grad_norm": 0.7326422929763794,
"learning_rate": 0.00016394073503944585,
"loss": 1.0167326927185059,
"step": 943
},
{
"epoch": 0.544405997693195,
"grad_norm": 0.877862274646759,
"learning_rate": 0.00016390225129882626,
"loss": 0.9589974880218506,
"step": 944
},
{
"epoch": 0.5449826989619377,
"grad_norm": 0.8096463680267334,
"learning_rate": 0.00016386376755820668,
"loss": 0.8364965915679932,
"step": 945
},
{
"epoch": 0.5455594002306805,
"grad_norm": 0.9232637882232666,
"learning_rate": 0.0001638252838175871,
"loss": 0.9332213997840881,
"step": 946
},
{
"epoch": 0.5461361014994233,
"grad_norm": 0.7885507941246033,
"learning_rate": 0.0001637868000769675,
"loss": 1.0532820224761963,
"step": 947
},
{
"epoch": 0.5467128027681661,
"grad_norm": 0.914097249507904,
"learning_rate": 0.0001637483163363479,
"loss": 0.8059665560722351,
"step": 948
},
{
"epoch": 0.5472895040369089,
"grad_norm": 0.8124399781227112,
"learning_rate": 0.0001637098325957283,
"loss": 0.7342300415039062,
"step": 949
},
{
"epoch": 0.5478662053056517,
"grad_norm": 0.8677952289581299,
"learning_rate": 0.00016367134885510873,
"loss": 1.2200864553451538,
"step": 950
},
{
"epoch": 0.5484429065743944,
"grad_norm": 0.8235622048377991,
"learning_rate": 0.00016363286511448914,
"loss": 1.2276276350021362,
"step": 951
},
{
"epoch": 0.5490196078431373,
"grad_norm": 0.8734779953956604,
"learning_rate": 0.00016359438137386956,
"loss": 1.481785535812378,
"step": 952
},
{
"epoch": 0.5495963091118801,
"grad_norm": 0.7058696746826172,
"learning_rate": 0.00016355589763324997,
"loss": 0.8971320390701294,
"step": 953
},
{
"epoch": 0.5501730103806228,
"grad_norm": 0.7818495035171509,
"learning_rate": 0.0001635174138926304,
"loss": 0.9900298118591309,
"step": 954
},
{
"epoch": 0.5507497116493656,
"grad_norm": 0.9933992028236389,
"learning_rate": 0.0001634789301520108,
"loss": 1.377812147140503,
"step": 955
},
{
"epoch": 0.5513264129181085,
"grad_norm": 0.6487358808517456,
"learning_rate": 0.0001634404464113912,
"loss": 0.8082116842269897,
"step": 956
},
{
"epoch": 0.5519031141868512,
"grad_norm": 0.7896233201026917,
"learning_rate": 0.0001634019626707716,
"loss": 0.8894538879394531,
"step": 957
},
{
"epoch": 0.552479815455594,
"grad_norm": 0.5499460697174072,
"learning_rate": 0.00016336347893015202,
"loss": 0.7779909372329712,
"step": 958
},
{
"epoch": 0.5530565167243368,
"grad_norm": 0.7304683327674866,
"learning_rate": 0.00016332499518953244,
"loss": 0.9466789960861206,
"step": 959
},
{
"epoch": 0.5536332179930796,
"grad_norm": 0.8766285181045532,
"learning_rate": 0.00016328651144891285,
"loss": 0.654015064239502,
"step": 960
},
{
"epoch": 0.5542099192618224,
"grad_norm": 0.5168980956077576,
"learning_rate": 0.00016324802770829327,
"loss": 0.7942756414413452,
"step": 961
},
{
"epoch": 0.5547866205305652,
"grad_norm": 0.8975361585617065,
"learning_rate": 0.00016320954396767368,
"loss": 1.1166660785675049,
"step": 962
},
{
"epoch": 0.5553633217993079,
"grad_norm": 0.559033215045929,
"learning_rate": 0.0001631710602270541,
"loss": 0.7238450050354004,
"step": 963
},
{
"epoch": 0.5559400230680508,
"grad_norm": 0.5114202499389648,
"learning_rate": 0.00016313257648643449,
"loss": 0.8229402303695679,
"step": 964
},
{
"epoch": 0.5565167243367936,
"grad_norm": 0.8146692514419556,
"learning_rate": 0.0001630940927458149,
"loss": 0.9510258436203003,
"step": 965
},
{
"epoch": 0.5570934256055363,
"grad_norm": 0.7686490416526794,
"learning_rate": 0.00016305560900519532,
"loss": 1.3754280805587769,
"step": 966
},
{
"epoch": 0.5576701268742791,
"grad_norm": 0.6895797252655029,
"learning_rate": 0.00016301712526457573,
"loss": 0.9850455522537231,
"step": 967
},
{
"epoch": 0.558246828143022,
"grad_norm": 0.6049807667732239,
"learning_rate": 0.00016297864152395615,
"loss": 0.6829259395599365,
"step": 968
},
{
"epoch": 0.5588235294117647,
"grad_norm": 0.7376249432563782,
"learning_rate": 0.00016294015778333656,
"loss": 0.7787905931472778,
"step": 969
},
{
"epoch": 0.5594002306805075,
"grad_norm": 0.5940505862236023,
"learning_rate": 0.00016290167404271698,
"loss": 0.7658302783966064,
"step": 970
},
{
"epoch": 0.5599769319492502,
"grad_norm": 0.8353221416473389,
"learning_rate": 0.0001628631903020974,
"loss": 1.0191570520401,
"step": 971
},
{
"epoch": 0.5605536332179931,
"grad_norm": 0.6136527061462402,
"learning_rate": 0.00016282470656147778,
"loss": 0.9413414001464844,
"step": 972
},
{
"epoch": 0.5611303344867359,
"grad_norm": 0.64887535572052,
"learning_rate": 0.0001627862228208582,
"loss": 0.763261616230011,
"step": 973
},
{
"epoch": 0.5617070357554786,
"grad_norm": 0.8027318716049194,
"learning_rate": 0.0001627477390802386,
"loss": 1.1142311096191406,
"step": 974
},
{
"epoch": 0.5622837370242214,
"grad_norm": 0.6630944609642029,
"learning_rate": 0.00016270925533961903,
"loss": 0.8240130543708801,
"step": 975
},
{
"epoch": 0.5628604382929643,
"grad_norm": 0.7404500246047974,
"learning_rate": 0.00016267077159899944,
"loss": 0.9690840244293213,
"step": 976
},
{
"epoch": 0.563437139561707,
"grad_norm": 1.0134172439575195,
"learning_rate": 0.00016263228785837986,
"loss": 1.4774882793426514,
"step": 977
},
{
"epoch": 0.5640138408304498,
"grad_norm": 0.8651242256164551,
"learning_rate": 0.00016259380411776027,
"loss": 0.898904025554657,
"step": 978
},
{
"epoch": 0.5645905420991926,
"grad_norm": 0.6225872039794922,
"learning_rate": 0.00016255532037714066,
"loss": 1.149839162826538,
"step": 979
},
{
"epoch": 0.5651672433679354,
"grad_norm": 0.5773558020591736,
"learning_rate": 0.00016251683663652108,
"loss": 0.516633152961731,
"step": 980
},
{
"epoch": 0.5657439446366782,
"grad_norm": 0.6350861191749573,
"learning_rate": 0.0001624783528959015,
"loss": 1.0271410942077637,
"step": 981
},
{
"epoch": 0.566320645905421,
"grad_norm": 0.8134899139404297,
"learning_rate": 0.0001624398691552819,
"loss": 0.8847084045410156,
"step": 982
},
{
"epoch": 0.5668973471741637,
"grad_norm": 0.793136477470398,
"learning_rate": 0.00016240138541466232,
"loss": 1.0517855882644653,
"step": 983
},
{
"epoch": 0.5674740484429066,
"grad_norm": 0.6838855743408203,
"learning_rate": 0.00016236290167404274,
"loss": 0.9592060446739197,
"step": 984
},
{
"epoch": 0.5680507497116494,
"grad_norm": 0.77060467004776,
"learning_rate": 0.00016232441793342315,
"loss": 1.1476876735687256,
"step": 985
},
{
"epoch": 0.5686274509803921,
"grad_norm": 0.6759986281394958,
"learning_rate": 0.00016228593419280357,
"loss": 0.9518548846244812,
"step": 986
},
{
"epoch": 0.5692041522491349,
"grad_norm": 0.6088658571243286,
"learning_rate": 0.00016224745045218396,
"loss": 0.6659010648727417,
"step": 987
},
{
"epoch": 0.5697808535178778,
"grad_norm": 0.9436719417572021,
"learning_rate": 0.00016220896671156437,
"loss": 1.1346865892410278,
"step": 988
},
{
"epoch": 0.5703575547866205,
"grad_norm": 1.0091006755828857,
"learning_rate": 0.0001621704829709448,
"loss": 1.1687716245651245,
"step": 989
},
{
"epoch": 0.5709342560553633,
"grad_norm": 0.9080367684364319,
"learning_rate": 0.0001621319992303252,
"loss": 1.0989638566970825,
"step": 990
},
{
"epoch": 0.5715109573241061,
"grad_norm": 0.7519204020500183,
"learning_rate": 0.00016209351548970562,
"loss": 1.3017445802688599,
"step": 991
},
{
"epoch": 0.5720876585928489,
"grad_norm": 0.545911431312561,
"learning_rate": 0.00016205503174908603,
"loss": 0.7622886300086975,
"step": 992
},
{
"epoch": 0.5726643598615917,
"grad_norm": 0.9163870215415955,
"learning_rate": 0.00016201654800846645,
"loss": 1.2744814157485962,
"step": 993
},
{
"epoch": 0.5732410611303345,
"grad_norm": 0.7644914388656616,
"learning_rate": 0.00016197806426784686,
"loss": 0.9071030616760254,
"step": 994
},
{
"epoch": 0.5738177623990772,
"grad_norm": 0.761933445930481,
"learning_rate": 0.00016193958052722725,
"loss": 1.0261884927749634,
"step": 995
},
{
"epoch": 0.5743944636678201,
"grad_norm": 0.5850253701210022,
"learning_rate": 0.00016190109678660767,
"loss": 0.8700547814369202,
"step": 996
},
{
"epoch": 0.5749711649365629,
"grad_norm": 0.8303119540214539,
"learning_rate": 0.00016186261304598808,
"loss": 0.7401360273361206,
"step": 997
},
{
"epoch": 0.5755478662053056,
"grad_norm": 0.8335464000701904,
"learning_rate": 0.0001618241293053685,
"loss": 1.058925986289978,
"step": 998
},
{
"epoch": 0.5761245674740484,
"grad_norm": 0.6967325806617737,
"learning_rate": 0.0001617856455647489,
"loss": 1.3550879955291748,
"step": 999
},
{
"epoch": 0.5767012687427913,
"grad_norm": 1.0509662628173828,
"learning_rate": 0.00016174716182412933,
"loss": 1.3809900283813477,
"step": 1000
},
{
"epoch": 0.577277970011534,
"grad_norm": 0.7688459157943726,
"learning_rate": 0.00016170867808350974,
"loss": 0.7888709306716919,
"step": 1001
},
{
"epoch": 0.5778546712802768,
"grad_norm": 1.4081027507781982,
"learning_rate": 0.00016167019434289016,
"loss": 0.8922286033630371,
"step": 1002
},
{
"epoch": 0.5784313725490197,
"grad_norm": 0.8513575196266174,
"learning_rate": 0.00016163171060227055,
"loss": 0.9064381718635559,
"step": 1003
},
{
"epoch": 0.5790080738177624,
"grad_norm": 0.8020631670951843,
"learning_rate": 0.00016159322686165096,
"loss": 1.0038318634033203,
"step": 1004
},
{
"epoch": 0.5795847750865052,
"grad_norm": 0.6308439373970032,
"learning_rate": 0.00016155474312103138,
"loss": 1.0535993576049805,
"step": 1005
},
{
"epoch": 0.580161476355248,
"grad_norm": 0.9487643837928772,
"learning_rate": 0.0001615162593804118,
"loss": 1.0733325481414795,
"step": 1006
},
{
"epoch": 0.5807381776239908,
"grad_norm": 0.5813226699829102,
"learning_rate": 0.0001614777756397922,
"loss": 0.6475256085395813,
"step": 1007
},
{
"epoch": 0.5813148788927336,
"grad_norm": 0.8787825703620911,
"learning_rate": 0.00016143929189917262,
"loss": 1.2669293880462646,
"step": 1008
},
{
"epoch": 0.5818915801614764,
"grad_norm": 0.5114219784736633,
"learning_rate": 0.00016140080815855304,
"loss": 0.5243850946426392,
"step": 1009
},
{
"epoch": 0.5824682814302191,
"grad_norm": 0.9315117597579956,
"learning_rate": 0.00016136232441793345,
"loss": 1.0958704948425293,
"step": 1010
},
{
"epoch": 0.583044982698962,
"grad_norm": 0.7866684794425964,
"learning_rate": 0.00016132384067731384,
"loss": 1.0202006101608276,
"step": 1011
},
{
"epoch": 0.5836216839677048,
"grad_norm": 0.9690834283828735,
"learning_rate": 0.00016128535693669426,
"loss": 0.7898403406143188,
"step": 1012
},
{
"epoch": 0.5841983852364475,
"grad_norm": 1.17559015750885,
"learning_rate": 0.00016124687319607467,
"loss": 1.0564637184143066,
"step": 1013
},
{
"epoch": 0.5847750865051903,
"grad_norm": 0.9403568506240845,
"learning_rate": 0.0001612083894554551,
"loss": 1.1451847553253174,
"step": 1014
},
{
"epoch": 0.5853517877739332,
"grad_norm": 0.7303722500801086,
"learning_rate": 0.0001611699057148355,
"loss": 1.143730878829956,
"step": 1015
},
{
"epoch": 0.5859284890426759,
"grad_norm": 0.9661723375320435,
"learning_rate": 0.00016113142197421592,
"loss": 1.1612937450408936,
"step": 1016
},
{
"epoch": 0.5865051903114187,
"grad_norm": 0.9506820440292358,
"learning_rate": 0.0001610929382335963,
"loss": 1.3300495147705078,
"step": 1017
},
{
"epoch": 0.5870818915801614,
"grad_norm": 0.9524713754653931,
"learning_rate": 0.00016105445449297672,
"loss": 1.4797887802124023,
"step": 1018
},
{
"epoch": 0.5876585928489043,
"grad_norm": 0.8756133317947388,
"learning_rate": 0.00016101597075235714,
"loss": 1.0017035007476807,
"step": 1019
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.8561094403266907,
"learning_rate": 0.00016097748701173752,
"loss": 1.4500423669815063,
"step": 1020
},
{
"epoch": 0.5888119953863898,
"grad_norm": 0.7503087520599365,
"learning_rate": 0.00016093900327111794,
"loss": 1.0606659650802612,
"step": 1021
},
{
"epoch": 0.5893886966551326,
"grad_norm": 0.5415161848068237,
"learning_rate": 0.00016090051953049836,
"loss": 0.6421483159065247,
"step": 1022
},
{
"epoch": 0.5899653979238755,
"grad_norm": 0.6148718595504761,
"learning_rate": 0.00016086203578987877,
"loss": 0.94537353515625,
"step": 1023
},
{
"epoch": 0.5905420991926182,
"grad_norm": 0.7274061441421509,
"learning_rate": 0.00016082355204925919,
"loss": 1.1045122146606445,
"step": 1024
},
{
"epoch": 0.591118800461361,
"grad_norm": 1.0995570421218872,
"learning_rate": 0.0001607850683086396,
"loss": 1.0006502866744995,
"step": 1025
},
{
"epoch": 0.5916955017301038,
"grad_norm": 0.6411669850349426,
"learning_rate": 0.00016074658456802002,
"loss": 0.8185054063796997,
"step": 1026
},
{
"epoch": 0.5922722029988466,
"grad_norm": 0.8972517848014832,
"learning_rate": 0.00016070810082740043,
"loss": 1.0834156274795532,
"step": 1027
},
{
"epoch": 0.5928489042675894,
"grad_norm": 1.3362998962402344,
"learning_rate": 0.00016066961708678082,
"loss": 1.3157958984375,
"step": 1028
},
{
"epoch": 0.5934256055363322,
"grad_norm": 0.9085165858268738,
"learning_rate": 0.00016063113334616124,
"loss": 1.0817850828170776,
"step": 1029
},
{
"epoch": 0.5940023068050749,
"grad_norm": 1.028162956237793,
"learning_rate": 0.00016059264960554165,
"loss": 1.324896216392517,
"step": 1030
},
{
"epoch": 0.5945790080738178,
"grad_norm": 0.6264161467552185,
"learning_rate": 0.00016055416586492207,
"loss": 0.7769796848297119,
"step": 1031
},
{
"epoch": 0.5951557093425606,
"grad_norm": 0.6027923822402954,
"learning_rate": 0.00016051568212430248,
"loss": 0.7691771388053894,
"step": 1032
},
{
"epoch": 0.5957324106113033,
"grad_norm": 1.1957632303237915,
"learning_rate": 0.0001604771983836829,
"loss": 1.5915735960006714,
"step": 1033
},
{
"epoch": 0.5963091118800461,
"grad_norm": 0.8243029713630676,
"learning_rate": 0.0001604387146430633,
"loss": 1.4467861652374268,
"step": 1034
},
{
"epoch": 0.596885813148789,
"grad_norm": 0.9241074919700623,
"learning_rate": 0.00016040023090244373,
"loss": 1.2037115097045898,
"step": 1035
},
{
"epoch": 0.5974625144175317,
"grad_norm": 0.7573208212852478,
"learning_rate": 0.00016036174716182411,
"loss": 1.111187219619751,
"step": 1036
},
{
"epoch": 0.5980392156862745,
"grad_norm": 0.9766779541969299,
"learning_rate": 0.00016032326342120453,
"loss": 1.3394712209701538,
"step": 1037
},
{
"epoch": 0.5986159169550173,
"grad_norm": 0.7223910093307495,
"learning_rate": 0.00016028477968058495,
"loss": 0.9714270830154419,
"step": 1038
},
{
"epoch": 0.5991926182237601,
"grad_norm": 0.8372020721435547,
"learning_rate": 0.00016024629593996536,
"loss": 0.9755414724349976,
"step": 1039
},
{
"epoch": 0.5997693194925029,
"grad_norm": 1.060224175453186,
"learning_rate": 0.00016020781219934578,
"loss": 1.0653870105743408,
"step": 1040
},
{
"epoch": 0.6003460207612457,
"grad_norm": 1.0068564414978027,
"learning_rate": 0.0001601693284587262,
"loss": 1.1695475578308105,
"step": 1041
},
{
"epoch": 0.6009227220299884,
"grad_norm": 0.8202903866767883,
"learning_rate": 0.0001601308447181066,
"loss": 1.430415391921997,
"step": 1042
},
{
"epoch": 0.6014994232987313,
"grad_norm": 0.6556461453437805,
"learning_rate": 0.00016009236097748702,
"loss": 0.6565566658973694,
"step": 1043
},
{
"epoch": 0.6020761245674741,
"grad_norm": 1.0711745023727417,
"learning_rate": 0.0001600538772368674,
"loss": 1.4629727602005005,
"step": 1044
},
{
"epoch": 0.6026528258362168,
"grad_norm": 0.857792317867279,
"learning_rate": 0.00016001539349624783,
"loss": 1.375361442565918,
"step": 1045
},
{
"epoch": 0.6032295271049596,
"grad_norm": 0.8610656261444092,
"learning_rate": 0.00015997690975562824,
"loss": 1.319663166999817,
"step": 1046
},
{
"epoch": 0.6038062283737025,
"grad_norm": 0.5466272830963135,
"learning_rate": 0.00015993842601500866,
"loss": 0.9326815009117126,
"step": 1047
},
{
"epoch": 0.6043829296424452,
"grad_norm": 0.5424578189849854,
"learning_rate": 0.00015989994227438907,
"loss": 0.8943756818771362,
"step": 1048
},
{
"epoch": 0.604959630911188,
"grad_norm": 1.0392166376113892,
"learning_rate": 0.00015986145853376949,
"loss": 1.1610779762268066,
"step": 1049
},
{
"epoch": 0.6055363321799307,
"grad_norm": 0.7397944331169128,
"learning_rate": 0.0001598229747931499,
"loss": 0.9297494888305664,
"step": 1050
},
{
"epoch": 0.6061130334486736,
"grad_norm": 0.7921435832977295,
"learning_rate": 0.00015978449105253032,
"loss": 0.9271104335784912,
"step": 1051
},
{
"epoch": 0.6066897347174164,
"grad_norm": 1.0713645219802856,
"learning_rate": 0.0001597460073119107,
"loss": 1.429350733757019,
"step": 1052
},
{
"epoch": 0.6072664359861591,
"grad_norm": 0.7312497496604919,
"learning_rate": 0.00015970752357129112,
"loss": 0.9167627096176147,
"step": 1053
},
{
"epoch": 0.6078431372549019,
"grad_norm": 0.7499086260795593,
"learning_rate": 0.00015966903983067154,
"loss": 0.7258137464523315,
"step": 1054
},
{
"epoch": 0.6084198385236448,
"grad_norm": 0.7300564646720886,
"learning_rate": 0.00015963055609005195,
"loss": 1.058071494102478,
"step": 1055
},
{
"epoch": 0.6089965397923875,
"grad_norm": 0.652527928352356,
"learning_rate": 0.00015959207234943237,
"loss": 0.6544615030288696,
"step": 1056
},
{
"epoch": 0.6095732410611303,
"grad_norm": 0.7193166613578796,
"learning_rate": 0.00015955358860881278,
"loss": 0.7395502328872681,
"step": 1057
},
{
"epoch": 0.6101499423298731,
"grad_norm": 0.7402684092521667,
"learning_rate": 0.0001595151048681932,
"loss": 0.8958665728569031,
"step": 1058
},
{
"epoch": 0.610726643598616,
"grad_norm": 1.0471738576889038,
"learning_rate": 0.0001594766211275736,
"loss": 1.383862018585205,
"step": 1059
},
{
"epoch": 0.6113033448673587,
"grad_norm": 0.926358699798584,
"learning_rate": 0.000159438137386954,
"loss": 1.3329360485076904,
"step": 1060
},
{
"epoch": 0.6118800461361015,
"grad_norm": 1.3576291799545288,
"learning_rate": 0.00015939965364633442,
"loss": 1.4153847694396973,
"step": 1061
},
{
"epoch": 0.6124567474048442,
"grad_norm": 1.043614387512207,
"learning_rate": 0.00015936116990571483,
"loss": 1.1355584859848022,
"step": 1062
},
{
"epoch": 0.6130334486735871,
"grad_norm": 0.6180047988891602,
"learning_rate": 0.00015932268616509525,
"loss": 0.7877006530761719,
"step": 1063
},
{
"epoch": 0.6136101499423299,
"grad_norm": 1.188005805015564,
"learning_rate": 0.00015928420242447566,
"loss": 1.185757040977478,
"step": 1064
},
{
"epoch": 0.6141868512110726,
"grad_norm": 0.6937184929847717,
"learning_rate": 0.00015924571868385608,
"loss": 0.8133529424667358,
"step": 1065
},
{
"epoch": 0.6147635524798154,
"grad_norm": 0.5152422785758972,
"learning_rate": 0.0001592072349432365,
"loss": 0.6955524682998657,
"step": 1066
},
{
"epoch": 0.6153402537485583,
"grad_norm": 0.8295215964317322,
"learning_rate": 0.0001591687512026169,
"loss": 0.9180642366409302,
"step": 1067
},
{
"epoch": 0.615916955017301,
"grad_norm": 1.131622314453125,
"learning_rate": 0.0001591302674619973,
"loss": 1.2194663286209106,
"step": 1068
},
{
"epoch": 0.6164936562860438,
"grad_norm": 0.744301438331604,
"learning_rate": 0.0001590917837213777,
"loss": 0.9852138161659241,
"step": 1069
},
{
"epoch": 0.6170703575547867,
"grad_norm": 0.7841970920562744,
"learning_rate": 0.00015905329998075813,
"loss": 1.302487850189209,
"step": 1070
},
{
"epoch": 0.6176470588235294,
"grad_norm": 0.6610711216926575,
"learning_rate": 0.00015901481624013854,
"loss": 0.8427870273590088,
"step": 1071
},
{
"epoch": 0.6182237600922722,
"grad_norm": 0.9735661745071411,
"learning_rate": 0.00015897633249951896,
"loss": 1.1720025539398193,
"step": 1072
},
{
"epoch": 0.618800461361015,
"grad_norm": 0.6673301458358765,
"learning_rate": 0.00015893784875889937,
"loss": 1.0172441005706787,
"step": 1073
},
{
"epoch": 0.6193771626297578,
"grad_norm": 1.0327497720718384,
"learning_rate": 0.0001588993650182798,
"loss": 1.168729305267334,
"step": 1074
},
{
"epoch": 0.6199538638985006,
"grad_norm": 0.6887943744659424,
"learning_rate": 0.0001588608812776602,
"loss": 0.9284838438034058,
"step": 1075
},
{
"epoch": 0.6205305651672434,
"grad_norm": 0.6660910844802856,
"learning_rate": 0.0001588223975370406,
"loss": 1.1769919395446777,
"step": 1076
},
{
"epoch": 0.6211072664359861,
"grad_norm": 0.7416674494743347,
"learning_rate": 0.000158783913796421,
"loss": 0.750725269317627,
"step": 1077
},
{
"epoch": 0.621683967704729,
"grad_norm": 0.6302111148834229,
"learning_rate": 0.00015874543005580142,
"loss": 0.8207563161849976,
"step": 1078
},
{
"epoch": 0.6222606689734718,
"grad_norm": 0.720021665096283,
"learning_rate": 0.00015870694631518184,
"loss": 1.133636474609375,
"step": 1079
},
{
"epoch": 0.6228373702422145,
"grad_norm": 0.9188029170036316,
"learning_rate": 0.00015866846257456225,
"loss": 1.5215458869934082,
"step": 1080
},
{
"epoch": 0.6234140715109573,
"grad_norm": 0.7337254881858826,
"learning_rate": 0.00015862997883394267,
"loss": 0.9544572830200195,
"step": 1081
},
{
"epoch": 0.6239907727797002,
"grad_norm": 1.0431314706802368,
"learning_rate": 0.00015859149509332308,
"loss": 1.0790281295776367,
"step": 1082
},
{
"epoch": 0.6245674740484429,
"grad_norm": 0.6344501376152039,
"learning_rate": 0.0001585530113527035,
"loss": 0.9151628017425537,
"step": 1083
},
{
"epoch": 0.6251441753171857,
"grad_norm": 1.332190752029419,
"learning_rate": 0.00015851452761208389,
"loss": 1.5466241836547852,
"step": 1084
},
{
"epoch": 0.6257208765859285,
"grad_norm": 0.7802074551582336,
"learning_rate": 0.0001584760438714643,
"loss": 1.1575053930282593,
"step": 1085
},
{
"epoch": 0.6262975778546713,
"grad_norm": 0.5755362510681152,
"learning_rate": 0.00015843756013084472,
"loss": 0.6923443078994751,
"step": 1086
},
{
"epoch": 0.6268742791234141,
"grad_norm": 0.8710469007492065,
"learning_rate": 0.00015839907639022513,
"loss": 1.0893003940582275,
"step": 1087
},
{
"epoch": 0.6274509803921569,
"grad_norm": 0.6689137816429138,
"learning_rate": 0.00015836059264960555,
"loss": 0.9777762293815613,
"step": 1088
},
{
"epoch": 0.6280276816608996,
"grad_norm": 0.9923802614212036,
"learning_rate": 0.00015832210890898596,
"loss": 1.2578145265579224,
"step": 1089
},
{
"epoch": 0.6286043829296425,
"grad_norm": 0.7596067190170288,
"learning_rate": 0.00015828362516836638,
"loss": 1.0804511308670044,
"step": 1090
},
{
"epoch": 0.6291810841983853,
"grad_norm": 0.9255754947662354,
"learning_rate": 0.0001582451414277468,
"loss": 1.2536742687225342,
"step": 1091
},
{
"epoch": 0.629757785467128,
"grad_norm": 0.6089752912521362,
"learning_rate": 0.00015820665768712718,
"loss": 0.8234043121337891,
"step": 1092
},
{
"epoch": 0.6303344867358708,
"grad_norm": 0.8412203192710876,
"learning_rate": 0.0001581681739465076,
"loss": 0.8689320683479309,
"step": 1093
},
{
"epoch": 0.6309111880046137,
"grad_norm": 0.6300414204597473,
"learning_rate": 0.000158129690205888,
"loss": 0.8836315274238586,
"step": 1094
},
{
"epoch": 0.6314878892733564,
"grad_norm": 0.8622999787330627,
"learning_rate": 0.00015809120646526843,
"loss": 0.8355990648269653,
"step": 1095
},
{
"epoch": 0.6320645905420992,
"grad_norm": 1.0277838706970215,
"learning_rate": 0.00015805272272464884,
"loss": 1.0228278636932373,
"step": 1096
},
{
"epoch": 0.6326412918108419,
"grad_norm": 0.7297544479370117,
"learning_rate": 0.00015801423898402926,
"loss": 0.9207032918930054,
"step": 1097
},
{
"epoch": 0.6332179930795848,
"grad_norm": 0.6923787593841553,
"learning_rate": 0.00015797575524340967,
"loss": 0.8914310932159424,
"step": 1098
},
{
"epoch": 0.6337946943483276,
"grad_norm": 0.984605073928833,
"learning_rate": 0.00015793727150279006,
"loss": 1.030419945716858,
"step": 1099
},
{
"epoch": 0.6343713956170703,
"grad_norm": 0.7933477759361267,
"learning_rate": 0.00015789878776217048,
"loss": 0.8263508081436157,
"step": 1100
},
{
"epoch": 0.6349480968858131,
"grad_norm": 0.6690862774848938,
"learning_rate": 0.0001578603040215509,
"loss": 0.8062323927879333,
"step": 1101
},
{
"epoch": 0.635524798154556,
"grad_norm": 1.1080838441848755,
"learning_rate": 0.0001578218202809313,
"loss": 1.0695234537124634,
"step": 1102
},
{
"epoch": 0.6361014994232987,
"grad_norm": 0.7373805046081543,
"learning_rate": 0.00015778333654031172,
"loss": 0.7782353162765503,
"step": 1103
},
{
"epoch": 0.6366782006920415,
"grad_norm": 0.9623069167137146,
"learning_rate": 0.00015774485279969214,
"loss": 1.299721121788025,
"step": 1104
},
{
"epoch": 0.6372549019607843,
"grad_norm": 0.8447510004043579,
"learning_rate": 0.00015770636905907255,
"loss": 0.751670241355896,
"step": 1105
},
{
"epoch": 0.6378316032295271,
"grad_norm": 0.7200034260749817,
"learning_rate": 0.00015766788531845297,
"loss": 0.8565016388893127,
"step": 1106
},
{
"epoch": 0.6384083044982699,
"grad_norm": 0.791018545627594,
"learning_rate": 0.00015762940157783336,
"loss": 1.014164924621582,
"step": 1107
},
{
"epoch": 0.6389850057670127,
"grad_norm": 0.7488639950752258,
"learning_rate": 0.00015759091783721377,
"loss": 0.7353352904319763,
"step": 1108
},
{
"epoch": 0.6395617070357554,
"grad_norm": 0.6376444697380066,
"learning_rate": 0.00015755243409659419,
"loss": 0.8452020287513733,
"step": 1109
},
{
"epoch": 0.6401384083044983,
"grad_norm": 0.7400408387184143,
"learning_rate": 0.0001575139503559746,
"loss": 0.8612061738967896,
"step": 1110
},
{
"epoch": 0.6407151095732411,
"grad_norm": 0.630378007888794,
"learning_rate": 0.00015747546661535502,
"loss": 0.8225241899490356,
"step": 1111
},
{
"epoch": 0.6412918108419838,
"grad_norm": 0.7687711715698242,
"learning_rate": 0.00015743698287473543,
"loss": 1.0129132270812988,
"step": 1112
},
{
"epoch": 0.6418685121107266,
"grad_norm": 0.8225964903831482,
"learning_rate": 0.00015739849913411585,
"loss": 1.0317823886871338,
"step": 1113
},
{
"epoch": 0.6424452133794695,
"grad_norm": 0.8062997460365295,
"learning_rate": 0.00015736001539349626,
"loss": 1.2668901681900024,
"step": 1114
},
{
"epoch": 0.6430219146482122,
"grad_norm": 0.7937533855438232,
"learning_rate": 0.00015732153165287665,
"loss": 0.5984291434288025,
"step": 1115
},
{
"epoch": 0.643598615916955,
"grad_norm": 0.6556064486503601,
"learning_rate": 0.00015728304791225707,
"loss": 0.6811074018478394,
"step": 1116
},
{
"epoch": 0.6441753171856978,
"grad_norm": 0.6815225481987,
"learning_rate": 0.00015724456417163748,
"loss": 0.8315191268920898,
"step": 1117
},
{
"epoch": 0.6447520184544406,
"grad_norm": 0.8624749779701233,
"learning_rate": 0.0001572060804310179,
"loss": 1.024225115776062,
"step": 1118
},
{
"epoch": 0.6453287197231834,
"grad_norm": 0.9867150187492371,
"learning_rate": 0.0001571675966903983,
"loss": 1.1838812828063965,
"step": 1119
},
{
"epoch": 0.6459054209919262,
"grad_norm": 0.9800993204116821,
"learning_rate": 0.00015712911294977873,
"loss": 1.0964932441711426,
"step": 1120
},
{
"epoch": 0.6464821222606689,
"grad_norm": 0.6755380034446716,
"learning_rate": 0.00015709062920915914,
"loss": 0.6732958555221558,
"step": 1121
},
{
"epoch": 0.6470588235294118,
"grad_norm": 0.6237842440605164,
"learning_rate": 0.00015705214546853956,
"loss": 0.769539475440979,
"step": 1122
},
{
"epoch": 0.6476355247981546,
"grad_norm": 0.9327729344367981,
"learning_rate": 0.00015701366172791995,
"loss": 1.2593892812728882,
"step": 1123
},
{
"epoch": 0.6482122260668973,
"grad_norm": 0.7165786623954773,
"learning_rate": 0.00015697517798730036,
"loss": 0.8721244931221008,
"step": 1124
},
{
"epoch": 0.6487889273356401,
"grad_norm": 0.7718213200569153,
"learning_rate": 0.00015693669424668078,
"loss": 0.9298558235168457,
"step": 1125
},
{
"epoch": 0.649365628604383,
"grad_norm": 0.7327983975410461,
"learning_rate": 0.0001568982105060612,
"loss": 0.9947003722190857,
"step": 1126
},
{
"epoch": 0.6499423298731257,
"grad_norm": 0.8242558240890503,
"learning_rate": 0.0001568597267654416,
"loss": 1.3076270818710327,
"step": 1127
},
{
"epoch": 0.6505190311418685,
"grad_norm": 0.5866062641143799,
"learning_rate": 0.00015682124302482202,
"loss": 0.7161552309989929,
"step": 1128
},
{
"epoch": 0.6510957324106112,
"grad_norm": 0.690351665019989,
"learning_rate": 0.00015678275928420244,
"loss": 0.7334930896759033,
"step": 1129
},
{
"epoch": 0.6516724336793541,
"grad_norm": 0.7475882172584534,
"learning_rate": 0.00015674427554358285,
"loss": 0.8960260152816772,
"step": 1130
},
{
"epoch": 0.6522491349480969,
"grad_norm": 0.7973214983940125,
"learning_rate": 0.00015670579180296324,
"loss": 0.9681750535964966,
"step": 1131
},
{
"epoch": 0.6528258362168397,
"grad_norm": 0.7747503519058228,
"learning_rate": 0.00015666730806234366,
"loss": 1.051071047782898,
"step": 1132
},
{
"epoch": 0.6534025374855824,
"grad_norm": 0.6149755120277405,
"learning_rate": 0.00015662882432172407,
"loss": 1.0745124816894531,
"step": 1133
},
{
"epoch": 0.6539792387543253,
"grad_norm": 0.8245506286621094,
"learning_rate": 0.0001565903405811045,
"loss": 1.3383489847183228,
"step": 1134
},
{
"epoch": 0.654555940023068,
"grad_norm": 0.754502534866333,
"learning_rate": 0.0001565518568404849,
"loss": 0.709721028804779,
"step": 1135
},
{
"epoch": 0.6551326412918108,
"grad_norm": 0.5991480946540833,
"learning_rate": 0.00015651337309986532,
"loss": 0.6601396203041077,
"step": 1136
},
{
"epoch": 0.6557093425605537,
"grad_norm": 0.7160611152648926,
"learning_rate": 0.00015647488935924573,
"loss": 1.244566559791565,
"step": 1137
},
{
"epoch": 0.6562860438292965,
"grad_norm": 0.6996898055076599,
"learning_rate": 0.00015643640561862615,
"loss": 0.7976762056350708,
"step": 1138
},
{
"epoch": 0.6568627450980392,
"grad_norm": 1.1391624212265015,
"learning_rate": 0.00015639792187800654,
"loss": 1.1150181293487549,
"step": 1139
},
{
"epoch": 0.657439446366782,
"grad_norm": 0.6305305361747742,
"learning_rate": 0.00015635943813738695,
"loss": 0.9086626768112183,
"step": 1140
},
{
"epoch": 0.6580161476355249,
"grad_norm": 1.1590427160263062,
"learning_rate": 0.00015632095439676737,
"loss": 1.2399204969406128,
"step": 1141
},
{
"epoch": 0.6585928489042676,
"grad_norm": 0.6845443844795227,
"learning_rate": 0.00015628247065614778,
"loss": 0.9434126019477844,
"step": 1142
},
{
"epoch": 0.6591695501730104,
"grad_norm": 0.8011909127235413,
"learning_rate": 0.0001562439869155282,
"loss": 0.9793667197227478,
"step": 1143
},
{
"epoch": 0.6597462514417531,
"grad_norm": 0.7350550293922424,
"learning_rate": 0.0001562055031749086,
"loss": 1.27531099319458,
"step": 1144
},
{
"epoch": 0.660322952710496,
"grad_norm": 0.9062415361404419,
"learning_rate": 0.00015616701943428903,
"loss": 0.9977236986160278,
"step": 1145
},
{
"epoch": 0.6608996539792388,
"grad_norm": 0.8427753448486328,
"learning_rate": 0.00015612853569366944,
"loss": 1.3097494840621948,
"step": 1146
},
{
"epoch": 0.6614763552479815,
"grad_norm": 0.7309291958808899,
"learning_rate": 0.00015609005195304983,
"loss": 1.1841623783111572,
"step": 1147
},
{
"epoch": 0.6620530565167243,
"grad_norm": 0.8518312573432922,
"learning_rate": 0.00015605156821243025,
"loss": 1.0959196090698242,
"step": 1148
},
{
"epoch": 0.6626297577854672,
"grad_norm": 0.7902095317840576,
"learning_rate": 0.00015601308447181066,
"loss": 1.186163067817688,
"step": 1149
},
{
"epoch": 0.6632064590542099,
"grad_norm": 0.8482567071914673,
"learning_rate": 0.00015597460073119108,
"loss": 0.9569811820983887,
"step": 1150
},
{
"epoch": 0.6637831603229527,
"grad_norm": 0.5328805446624756,
"learning_rate": 0.0001559361169905715,
"loss": 0.6388610005378723,
"step": 1151
},
{
"epoch": 0.6643598615916955,
"grad_norm": 0.6060228943824768,
"learning_rate": 0.0001558976332499519,
"loss": 0.7743721008300781,
"step": 1152
},
{
"epoch": 0.6649365628604383,
"grad_norm": 0.615100085735321,
"learning_rate": 0.00015585914950933232,
"loss": 0.8808379769325256,
"step": 1153
},
{
"epoch": 0.6655132641291811,
"grad_norm": 1.1238489151000977,
"learning_rate": 0.00015582066576871274,
"loss": 1.2252037525177002,
"step": 1154
},
{
"epoch": 0.6660899653979239,
"grad_norm": 0.8212980628013611,
"learning_rate": 0.00015578218202809313,
"loss": 1.0264016389846802,
"step": 1155
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.8575494885444641,
"learning_rate": 0.00015574369828747354,
"loss": 0.9453893899917603,
"step": 1156
},
{
"epoch": 0.6672433679354095,
"grad_norm": 0.8559103608131409,
"learning_rate": 0.00015570521454685396,
"loss": 1.01399564743042,
"step": 1157
},
{
"epoch": 0.6678200692041523,
"grad_norm": 0.8769490122795105,
"learning_rate": 0.00015566673080623437,
"loss": 1.1861730813980103,
"step": 1158
},
{
"epoch": 0.668396770472895,
"grad_norm": 0.5112201571464539,
"learning_rate": 0.0001556282470656148,
"loss": 0.6198689341545105,
"step": 1159
},
{
"epoch": 0.6689734717416378,
"grad_norm": 0.6346172094345093,
"learning_rate": 0.0001555897633249952,
"loss": 0.757227897644043,
"step": 1160
},
{
"epoch": 0.6695501730103807,
"grad_norm": 0.7918882966041565,
"learning_rate": 0.00015555127958437562,
"loss": 0.7224777936935425,
"step": 1161
},
{
"epoch": 0.6701268742791234,
"grad_norm": 0.5124825835227966,
"learning_rate": 0.00015551279584375603,
"loss": 0.7446980476379395,
"step": 1162
},
{
"epoch": 0.6707035755478662,
"grad_norm": 0.6950685977935791,
"learning_rate": 0.00015547431210313642,
"loss": 0.8628665804862976,
"step": 1163
},
{
"epoch": 0.671280276816609,
"grad_norm": 0.8380517363548279,
"learning_rate": 0.00015543582836251684,
"loss": 1.0211181640625,
"step": 1164
},
{
"epoch": 0.6718569780853518,
"grad_norm": 0.732266902923584,
"learning_rate": 0.00015539734462189725,
"loss": 0.7137742042541504,
"step": 1165
},
{
"epoch": 0.6724336793540946,
"grad_norm": 0.7325503826141357,
"learning_rate": 0.00015535886088127767,
"loss": 1.0089268684387207,
"step": 1166
},
{
"epoch": 0.6730103806228374,
"grad_norm": 0.8091567158699036,
"learning_rate": 0.00015532037714065808,
"loss": 1.0261311531066895,
"step": 1167
},
{
"epoch": 0.6735870818915801,
"grad_norm": 0.8078528642654419,
"learning_rate": 0.0001552818934000385,
"loss": 1.0196332931518555,
"step": 1168
},
{
"epoch": 0.674163783160323,
"grad_norm": 0.5558749437332153,
"learning_rate": 0.0001552434096594189,
"loss": 0.8882730007171631,
"step": 1169
},
{
"epoch": 0.6747404844290658,
"grad_norm": 0.7303665280342102,
"learning_rate": 0.00015520492591879933,
"loss": 0.9657995700836182,
"step": 1170
},
{
"epoch": 0.6753171856978085,
"grad_norm": 0.7512165904045105,
"learning_rate": 0.00015516644217817972,
"loss": 1.0741921663284302,
"step": 1171
},
{
"epoch": 0.6758938869665513,
"grad_norm": 0.7227686047554016,
"learning_rate": 0.0001549788542868128,
"loss": 1.0935313701629639,
"step": 1172
},
{
"epoch": 0.6764705882352942,
"grad_norm": 0.9613728523254395,
"learning_rate": 0.00015494040753556324,
"loss": 1.0458366870880127,
"step": 1173
},
{
"epoch": 0.6770472895040369,
"grad_norm": 0.7592456936836243,
"learning_rate": 0.00015490196078431375,
"loss": 1.048318862915039,
"step": 1174
},
{
"epoch": 0.6776239907727797,
"grad_norm": 0.6358122229576111,
"learning_rate": 0.0001548635140330642,
"loss": 0.9271713495254517,
"step": 1175
},
{
"epoch": 0.6782006920415224,
"grad_norm": 0.6779629588127136,
"learning_rate": 0.0001548250672818147,
"loss": 0.8732894062995911,
"step": 1176
},
{
"epoch": 0.6787773933102653,
"grad_norm": 0.7252342700958252,
"learning_rate": 0.00015478662053056518,
"loss": 1.016528606414795,
"step": 1177
},
{
"epoch": 0.6793540945790081,
"grad_norm": 0.5252419710159302,
"learning_rate": 0.00015474817377931566,
"loss": 0.6656200885772705,
"step": 1178
},
{
"epoch": 0.6799307958477508,
"grad_norm": 0.7480099201202393,
"learning_rate": 0.00015470972702806614,
"loss": 0.9825901389122009,
"step": 1179
},
{
"epoch": 0.6805074971164936,
"grad_norm": 0.5403528809547424,
"learning_rate": 0.00015467128027681662,
"loss": 0.8263649344444275,
"step": 1180
},
{
"epoch": 0.6810841983852365,
"grad_norm": 0.909685492515564,
"learning_rate": 0.0001546328335255671,
"loss": 1.1039624214172363,
"step": 1181
},
{
"epoch": 0.6816608996539792,
"grad_norm": 0.6782054305076599,
"learning_rate": 0.00015459438677431757,
"loss": 0.8667647242546082,
"step": 1182
},
{
"epoch": 0.682237600922722,
"grad_norm": 0.9437413811683655,
"learning_rate": 0.00015455594002306805,
"loss": 1.0089085102081299,
"step": 1183
},
{
"epoch": 0.6828143021914648,
"grad_norm": 0.631352424621582,
"learning_rate": 0.00015451749327181856,
"loss": 0.8900731801986694,
"step": 1184
},
{
"epoch": 0.6833910034602076,
"grad_norm": 0.9895037412643433,
"learning_rate": 0.000154479046520569,
"loss": 1.4409505128860474,
"step": 1185
},
{
"epoch": 0.6839677047289504,
"grad_norm": 0.655288815498352,
"learning_rate": 0.00015444059976931951,
"loss": 0.8149420022964478,
"step": 1186
},
{
"epoch": 0.6845444059976932,
"grad_norm": 0.906093418598175,
"learning_rate": 0.00015440215301806997,
"loss": 1.440996527671814,
"step": 1187
},
{
"epoch": 0.6851211072664359,
"grad_norm": 0.7067789435386658,
"learning_rate": 0.00015436370626682047,
"loss": 0.6415053009986877,
"step": 1188
},
{
"epoch": 0.6856978085351788,
"grad_norm": 0.7950546741485596,
"learning_rate": 0.00015432525951557095,
"loss": 1.0555880069732666,
"step": 1189
},
{
"epoch": 0.6862745098039216,
"grad_norm": 0.7521815299987793,
"learning_rate": 0.00015428681276432143,
"loss": 1.0289030075073242,
"step": 1190
},
{
"epoch": 0.6868512110726643,
"grad_norm": 0.8053890466690063,
"learning_rate": 0.0001542483660130719,
"loss": 1.0104256868362427,
"step": 1191
},
{
"epoch": 0.6874279123414071,
"grad_norm": 0.8960652351379395,
"learning_rate": 0.00015420991926182238,
"loss": 1.3124630451202393,
"step": 1192
},
{
"epoch": 0.68800461361015,
"grad_norm": 0.6445242762565613,
"learning_rate": 0.00015417147251057286,
"loss": 0.7147958278656006,
"step": 1193
},
{
"epoch": 0.6885813148788927,
"grad_norm": 0.8771377801895142,
"learning_rate": 0.00015413302575932334,
"loss": 1.1068731546401978,
"step": 1194
},
{
"epoch": 0.6891580161476355,
"grad_norm": 0.746562659740448,
"learning_rate": 0.00015409457900807382,
"loss": 0.8577734231948853,
"step": 1195
},
{
"epoch": 0.6897347174163783,
"grad_norm": 0.8225957155227661,
"learning_rate": 0.00015405613225682432,
"loss": 1.137495994567871,
"step": 1196
},
{
"epoch": 0.6903114186851211,
"grad_norm": 1.2180874347686768,
"learning_rate": 0.00015401768550557478,
"loss": 1.3055964708328247,
"step": 1197
},
{
"epoch": 0.6908881199538639,
"grad_norm": 0.8417837619781494,
"learning_rate": 0.00015397923875432528,
"loss": 0.719217836856842,
"step": 1198
},
{
"epoch": 0.6914648212226067,
"grad_norm": 0.5893595218658447,
"learning_rate": 0.00015394079200307573,
"loss": 0.7719886302947998,
"step": 1199
},
{
"epoch": 0.6920415224913494,
"grad_norm": 0.6734403371810913,
"learning_rate": 0.00015390234525182624,
"loss": 0.960877537727356,
"step": 1200
},
{
"epoch": 0.6926182237600923,
"grad_norm": 0.7350678443908691,
"learning_rate": 0.00015386389850057672,
"loss": 1.039952278137207,
"step": 1201
},
{
"epoch": 0.6931949250288351,
"grad_norm": 0.8072929978370667,
"learning_rate": 0.0001538254517493272,
"loss": 0.9792311787605286,
"step": 1202
},
{
"epoch": 0.6937716262975778,
"grad_norm": 0.6742820739746094,
"learning_rate": 0.00015378700499807767,
"loss": 0.8704882860183716,
"step": 1203
},
{
"epoch": 0.6943483275663207,
"grad_norm": 0.6590847969055176,
"learning_rate": 0.00015374855824682815,
"loss": 0.7836930155754089,
"step": 1204
},
{
"epoch": 0.6949250288350635,
"grad_norm": 0.6364882588386536,
"learning_rate": 0.00015371011149557863,
"loss": 0.6790116429328918,
"step": 1205
},
{
"epoch": 0.6955017301038062,
"grad_norm": 0.8620322346687317,
"learning_rate": 0.0001536716647443291,
"loss": 1.1667858362197876,
"step": 1206
},
{
"epoch": 0.696078431372549,
"grad_norm": 0.9262224435806274,
"learning_rate": 0.00015363321799307959,
"loss": 1.2684681415557861,
"step": 1207
},
{
"epoch": 0.6966551326412919,
"grad_norm": 0.7098090052604675,
"learning_rate": 0.0001535947712418301,
"loss": 1.108170986175537,
"step": 1208
},
{
"epoch": 0.6972318339100346,
"grad_norm": 0.8219681978225708,
"learning_rate": 0.00015355632449058054,
"loss": 1.1987258195877075,
"step": 1209
},
{
"epoch": 0.6978085351787774,
"grad_norm": 0.7267138957977295,
"learning_rate": 0.00015351787773933105,
"loss": 0.8790909051895142,
"step": 1210
},
{
"epoch": 0.6983852364475202,
"grad_norm": 0.9880861043930054,
"learning_rate": 0.0001534794309880815,
"loss": 0.7550561428070068,
"step": 1211
},
{
"epoch": 0.698961937716263,
"grad_norm": 1.0179109573364258,
"learning_rate": 0.000153440984236832,
"loss": 1.2887327671051025,
"step": 1212
},
{
"epoch": 0.6995386389850058,
"grad_norm": 1.0065605640411377,
"learning_rate": 0.00015340253748558246,
"loss": 1.3018262386322021,
"step": 1213
},
{
"epoch": 0.7001153402537486,
"grad_norm": 0.7868698835372925,
"learning_rate": 0.00015336409073433296,
"loss": 1.0050418376922607,
"step": 1214
},
{
"epoch": 0.7006920415224913,
"grad_norm": 1.2052333354949951,
"learning_rate": 0.00015332564398308344,
"loss": 1.4229861497879028,
"step": 1215
},
{
"epoch": 0.7012687427912342,
"grad_norm": 0.7077322006225586,
"learning_rate": 0.00015328719723183392,
"loss": 0.6043359041213989,
"step": 1216
},
{
"epoch": 0.701845444059977,
"grad_norm": 0.587632417678833,
"learning_rate": 0.0001532487504805844,
"loss": 0.6483091115951538,
"step": 1217
},
{
"epoch": 0.7024221453287197,
"grad_norm": 0.5759986042976379,
"learning_rate": 0.00015321030372933487,
"loss": 0.8392894864082336,
"step": 1218
},
{
"epoch": 0.7029988465974625,
"grad_norm": 0.6800678372383118,
"learning_rate": 0.00015317185697808535,
"loss": 0.8921798467636108,
"step": 1219
},
{
"epoch": 0.7035755478662054,
"grad_norm": 0.7683438658714294,
"learning_rate": 0.00015313341022683586,
"loss": 0.9112846851348877,
"step": 1220
},
{
"epoch": 0.7041522491349481,
"grad_norm": 1.0117342472076416,
"learning_rate": 0.0001530949634755863,
"loss": 1.4151829481124878,
"step": 1221
},
{
"epoch": 0.7047289504036909,
"grad_norm": 0.889950156211853,
"learning_rate": 0.00015305651672433681,
"loss": 1.190742015838623,
"step": 1222
},
{
"epoch": 0.7053056516724336,
"grad_norm": 0.7858697772026062,
"learning_rate": 0.00015301806997308727,
"loss": 1.0679411888122559,
"step": 1223
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.894363522529602,
"learning_rate": 0.00015297962322183777,
"loss": 1.1472891569137573,
"step": 1224
},
{
"epoch": 0.7064590542099193,
"grad_norm": 0.7669128775596619,
"learning_rate": 0.00015294117647058822,
"loss": 1.1536177396774292,
"step": 1225
},
{
"epoch": 0.707035755478662,
"grad_norm": 0.6551662683486938,
"learning_rate": 0.00015290272971933873,
"loss": 1.1004867553710938,
"step": 1226
},
{
"epoch": 0.7076124567474048,
"grad_norm": 1.0020555257797241,
"learning_rate": 0.0001528642829680892,
"loss": 1.2485133409500122,
"step": 1227
},
{
"epoch": 0.7081891580161477,
"grad_norm": 0.725662887096405,
"learning_rate": 0.00015282583621683968,
"loss": 0.8090496063232422,
"step": 1228
},
{
"epoch": 0.7087658592848904,
"grad_norm": 0.8500173091888428,
"learning_rate": 0.00015278738946559016,
"loss": 1.1222527027130127,
"step": 1229
},
{
"epoch": 0.7093425605536332,
"grad_norm": 0.7580368518829346,
"learning_rate": 0.00015274894271434064,
"loss": 0.8194168210029602,
"step": 1230
},
{
"epoch": 0.709919261822376,
"grad_norm": 0.936622679233551,
"learning_rate": 0.00015271049596309112,
"loss": 0.9981272220611572,
"step": 1231
},
{
"epoch": 0.7104959630911188,
"grad_norm": 0.8283603191375732,
"learning_rate": 0.00015267204921184162,
"loss": 0.9328891634941101,
"step": 1232
},
{
"epoch": 0.7110726643598616,
"grad_norm": 1.0028311014175415,
"learning_rate": 0.00015263360246059208,
"loss": 0.9482144117355347,
"step": 1233
},
{
"epoch": 0.7116493656286044,
"grad_norm": 1.1841291189193726,
"learning_rate": 0.00015259515570934258,
"loss": 1.4021642208099365,
"step": 1234
},
{
"epoch": 0.7122260668973471,
"grad_norm": 1.0274176597595215,
"learning_rate": 0.00015255670895809303,
"loss": 1.1408722400665283,
"step": 1235
},
{
"epoch": 0.71280276816609,
"grad_norm": 0.8339233994483948,
"learning_rate": 0.00015251826220684354,
"loss": 1.2026294469833374,
"step": 1236
},
{
"epoch": 0.7133794694348328,
"grad_norm": 0.8232172727584839,
"learning_rate": 0.000152479815455594,
"loss": 1.0658057928085327,
"step": 1237
},
{
"epoch": 0.7139561707035755,
"grad_norm": 0.6768394708633423,
"learning_rate": 0.0001524413687043445,
"loss": 0.7539021968841553,
"step": 1238
},
{
"epoch": 0.7145328719723183,
"grad_norm": 1.0153294801712036,
"learning_rate": 0.00015240292195309497,
"loss": 1.1792476177215576,
"step": 1239
},
{
"epoch": 0.7151095732410612,
"grad_norm": 1.2099579572677612,
"learning_rate": 0.00015236447520184545,
"loss": 1.482499599456787,
"step": 1240
},
{
"epoch": 0.7156862745098039,
"grad_norm": 0.5826729536056519,
"learning_rate": 0.00015232602845059593,
"loss": 0.7845430374145508,
"step": 1241
},
{
"epoch": 0.7162629757785467,
"grad_norm": 0.7632762789726257,
"learning_rate": 0.0001522875816993464,
"loss": 0.8908877968788147,
"step": 1242
},
{
"epoch": 0.7168396770472895,
"grad_norm": 0.835464358329773,
"learning_rate": 0.00015224913494809689,
"loss": 1.0795903205871582,
"step": 1243
},
{
"epoch": 0.7174163783160323,
"grad_norm": 0.998972475528717,
"learning_rate": 0.0001522106881968474,
"loss": 0.9715967178344727,
"step": 1244
},
{
"epoch": 0.7179930795847751,
"grad_norm": 0.5176213383674622,
"learning_rate": 0.00015217224144559784,
"loss": 0.7307795286178589,
"step": 1245
},
{
"epoch": 0.7185697808535179,
"grad_norm": 1.0009640455245972,
"learning_rate": 0.00015213379469434835,
"loss": 1.253312587738037,
"step": 1246
},
{
"epoch": 0.7191464821222606,
"grad_norm": 1.1499648094177246,
"learning_rate": 0.0001520953479430988,
"loss": 1.2523915767669678,
"step": 1247
},
{
"epoch": 0.7197231833910035,
"grad_norm": 0.9233465790748596,
"learning_rate": 0.0001520569011918493,
"loss": 1.025418996810913,
"step": 1248
},
{
"epoch": 0.7202998846597463,
"grad_norm": 0.5469316840171814,
"learning_rate": 0.00015201845444059975,
"loss": 0.6671372652053833,
"step": 1249
},
{
"epoch": 0.720876585928489,
"grad_norm": 0.7743379473686218,
"learning_rate": 0.00015198000768935026,
"loss": 1.2212378978729248,
"step": 1250
},
{
"epoch": 0.7214532871972318,
"grad_norm": 0.971682608127594,
"learning_rate": 0.00015194156093810074,
"loss": 1.2435131072998047,
"step": 1251
},
{
"epoch": 0.7220299884659747,
"grad_norm": 0.9899376630783081,
"learning_rate": 0.00015190311418685122,
"loss": 1.2595231533050537,
"step": 1252
},
{
"epoch": 0.7226066897347174,
"grad_norm": 0.8441123962402344,
"learning_rate": 0.0001518646674356017,
"loss": 0.9278808832168579,
"step": 1253
},
{
"epoch": 0.7231833910034602,
"grad_norm": 0.5254001021385193,
"learning_rate": 0.00015182622068435217,
"loss": 0.786496102809906,
"step": 1254
},
{
"epoch": 0.723760092272203,
"grad_norm": 0.9715943932533264,
"learning_rate": 0.00015178777393310265,
"loss": 0.9957152605056763,
"step": 1255
},
{
"epoch": 0.7243367935409458,
"grad_norm": 0.9919838905334473,
"learning_rate": 0.00015174932718185316,
"loss": 1.3595893383026123,
"step": 1256
},
{
"epoch": 0.7249134948096886,
"grad_norm": 0.7739357352256775,
"learning_rate": 0.0001517108804306036,
"loss": 0.7901654839515686,
"step": 1257
},
{
"epoch": 0.7254901960784313,
"grad_norm": 0.996926486492157,
"learning_rate": 0.00015167243367935411,
"loss": 1.0908658504486084,
"step": 1258
},
{
"epoch": 0.7260668973471741,
"grad_norm": 0.6757825016975403,
"learning_rate": 0.00015163398692810456,
"loss": 0.7795881032943726,
"step": 1259
},
{
"epoch": 0.726643598615917,
"grad_norm": 0.9458150863647461,
"learning_rate": 0.00015159554017685507,
"loss": 1.0505211353302002,
"step": 1260
},
{
"epoch": 0.7272202998846597,
"grad_norm": 0.8086127638816833,
"learning_rate": 0.00015155709342560552,
"loss": 0.9041070938110352,
"step": 1261
},
{
"epoch": 0.7277970011534025,
"grad_norm": 0.6491602659225464,
"learning_rate": 0.00015151864667435603,
"loss": 0.9067816734313965,
"step": 1262
},
{
"epoch": 0.7283737024221453,
"grad_norm": 0.5835777521133423,
"learning_rate": 0.0001514801999231065,
"loss": 0.7853602170944214,
"step": 1263
},
{
"epoch": 0.7289504036908881,
"grad_norm": 0.8881536722183228,
"learning_rate": 0.00015144175317185698,
"loss": 1.2767361402511597,
"step": 1264
},
{
"epoch": 0.7295271049596309,
"grad_norm": 0.6160046458244324,
"learning_rate": 0.00015140330642060746,
"loss": 0.7595696449279785,
"step": 1265
},
{
"epoch": 0.7301038062283737,
"grad_norm": 0.7877328991889954,
"learning_rate": 0.00015136485966935794,
"loss": 0.9727606773376465,
"step": 1266
},
{
"epoch": 0.7306805074971164,
"grad_norm": 0.6233464479446411,
"learning_rate": 0.00015132641291810842,
"loss": 0.6097822785377502,
"step": 1267
},
{
"epoch": 0.7312572087658593,
"grad_norm": 0.8846599459648132,
"learning_rate": 0.00015128796616685892,
"loss": 1.314606785774231,
"step": 1268
},
{
"epoch": 0.7318339100346021,
"grad_norm": 0.6752328872680664,
"learning_rate": 0.00015124951941560937,
"loss": 0.9257625341415405,
"step": 1269
},
{
"epoch": 0.7324106113033448,
"grad_norm": 0.6147440075874329,
"learning_rate": 0.00015121107266435988,
"loss": 0.7304266691207886,
"step": 1270
},
{
"epoch": 0.7329873125720877,
"grad_norm": 0.8625065088272095,
"learning_rate": 0.00015117262591311033,
"loss": 1.2385823726654053,
"step": 1271
},
{
"epoch": 0.7335640138408305,
"grad_norm": 0.6224170923233032,
"learning_rate": 0.00015113417916186084,
"loss": 0.7687395215034485,
"step": 1272
},
{
"epoch": 0.7341407151095732,
"grad_norm": 0.839799165725708,
"learning_rate": 0.0001510957324106113,
"loss": 1.0231621265411377,
"step": 1273
},
{
"epoch": 0.734717416378316,
"grad_norm": 0.8609519600868225,
"learning_rate": 0.0001510572856593618,
"loss": 1.1030302047729492,
"step": 1274
},
{
"epoch": 0.7352941176470589,
"grad_norm": 0.8059080243110657,
"learning_rate": 0.00015101883890811227,
"loss": 1.307667851448059,
"step": 1275
},
{
"epoch": 0.7358708189158016,
"grad_norm": 0.7881230115890503,
"learning_rate": 0.00015098039215686275,
"loss": 0.8685023784637451,
"step": 1276
},
{
"epoch": 0.7364475201845444,
"grad_norm": 0.6535466909408569,
"learning_rate": 0.00015094194540561323,
"loss": 0.8849316835403442,
"step": 1277
},
{
"epoch": 0.7370242214532872,
"grad_norm": 0.664448082447052,
"learning_rate": 0.0001509034986543637,
"loss": 0.809040904045105,
"step": 1278
},
{
"epoch": 0.73760092272203,
"grad_norm": 0.9526609182357788,
"learning_rate": 0.00015086505190311418,
"loss": 1.2887682914733887,
"step": 1279
},
{
"epoch": 0.7381776239907728,
"grad_norm": 0.8947210907936096,
"learning_rate": 0.00015082660515186466,
"loss": 1.0613007545471191,
"step": 1280
},
{
"epoch": 0.7387543252595156,
"grad_norm": 0.9127343893051147,
"learning_rate": 0.00015078815840061514,
"loss": 0.9401702284812927,
"step": 1281
},
{
"epoch": 0.7393310265282583,
"grad_norm": 1.0288292169570923,
"learning_rate": 0.00015074971164936565,
"loss": 1.2102299928665161,
"step": 1282
},
{
"epoch": 0.7399077277970012,
"grad_norm": 0.6608892679214478,
"learning_rate": 0.0001507112648981161,
"loss": 0.7817317247390747,
"step": 1283
},
{
"epoch": 0.740484429065744,
"grad_norm": 0.5857222080230713,
"learning_rate": 0.0001506728181468666,
"loss": 0.7468012571334839,
"step": 1284
},
{
"epoch": 0.7410611303344867,
"grad_norm": 0.6499783992767334,
"learning_rate": 0.00015063437139561708,
"loss": 0.7113574147224426,
"step": 1285
},
{
"epoch": 0.7416378316032295,
"grad_norm": 0.718450129032135,
"learning_rate": 0.00015059592464436756,
"loss": 0.9823046326637268,
"step": 1286
},
{
"epoch": 0.7422145328719724,
"grad_norm": 0.7987701296806335,
"learning_rate": 0.00015055747789311804,
"loss": 0.9410796761512756,
"step": 1287
},
{
"epoch": 0.7427912341407151,
"grad_norm": 0.7227610349655151,
"learning_rate": 0.00015051903114186852,
"loss": 0.7366760969161987,
"step": 1288
},
{
"epoch": 0.7433679354094579,
"grad_norm": 0.9411056637763977,
"learning_rate": 0.000150480584390619,
"loss": 0.9475510120391846,
"step": 1289
},
{
"epoch": 0.7439446366782007,
"grad_norm": 0.5987991690635681,
"learning_rate": 0.00015044213763936947,
"loss": 0.8084846138954163,
"step": 1290
},
{
"epoch": 0.7445213379469435,
"grad_norm": 0.6214851140975952,
"learning_rate": 0.00015040369088811995,
"loss": 0.6952444911003113,
"step": 1291
},
{
"epoch": 0.7450980392156863,
"grad_norm": 0.7398913502693176,
"learning_rate": 0.00015036524413687043,
"loss": 0.8432753086090088,
"step": 1292
},
{
"epoch": 0.745674740484429,
"grad_norm": 0.8513553142547607,
"learning_rate": 0.0001503267973856209,
"loss": 0.8751744627952576,
"step": 1293
},
{
"epoch": 0.7462514417531718,
"grad_norm": 0.7704481482505798,
"learning_rate": 0.0001502883506343714,
"loss": 0.9727562665939331,
"step": 1294
},
{
"epoch": 0.7468281430219147,
"grad_norm": 0.6925477385520935,
"learning_rate": 0.0001502499038831219,
"loss": 1.044316291809082,
"step": 1295
},
{
"epoch": 0.7474048442906575,
"grad_norm": 0.8089653253555298,
"learning_rate": 0.00015021145713187237,
"loss": 0.9385859966278076,
"step": 1296
},
{
"epoch": 0.7479815455594002,
"grad_norm": 0.8045443296432495,
"learning_rate": 0.00015017301038062285,
"loss": 1.093725562095642,
"step": 1297
},
{
"epoch": 0.748558246828143,
"grad_norm": 0.8403393626213074,
"learning_rate": 0.00015013456362937333,
"loss": 0.7081382870674133,
"step": 1298
},
{
"epoch": 0.7491349480968859,
"grad_norm": 0.8455471992492676,
"learning_rate": 0.0001500961168781238,
"loss": 1.2357611656188965,
"step": 1299
},
{
"epoch": 0.7497116493656286,
"grad_norm": 0.8819023966789246,
"learning_rate": 0.00015005767012687428,
"loss": 1.2907012701034546,
"step": 1300
},
{
"epoch": 0.7502883506343714,
"grad_norm": 0.6467103362083435,
"learning_rate": 0.00015001922337562476,
"loss": 0.7991781830787659,
"step": 1301
},
{
"epoch": 0.7508650519031141,
"grad_norm": 1.0841728448867798,
"learning_rate": 0.00014998077662437524,
"loss": 1.156419038772583,
"step": 1302
},
{
"epoch": 0.751441753171857,
"grad_norm": 0.4863538146018982,
"learning_rate": 0.00014994232987312572,
"loss": 0.5481974482536316,
"step": 1303
},
{
"epoch": 0.7520184544405998,
"grad_norm": 0.631119966506958,
"learning_rate": 0.0001499038831218762,
"loss": 0.7421573996543884,
"step": 1304
},
{
"epoch": 0.7525951557093425,
"grad_norm": 0.6919093728065491,
"learning_rate": 0.0001498654363706267,
"loss": 0.6554936170578003,
"step": 1305
},
{
"epoch": 0.7531718569780853,
"grad_norm": 0.7746281027793884,
"learning_rate": 0.00014982698961937718,
"loss": 0.9226951599121094,
"step": 1306
},
{
"epoch": 0.7537485582468282,
"grad_norm": 0.821020245552063,
"learning_rate": 0.00014978854286812766,
"loss": 1.2231357097625732,
"step": 1307
},
{
"epoch": 0.754325259515571,
"grad_norm": 0.6167652606964111,
"learning_rate": 0.00014975009611687814,
"loss": 0.9597879648208618,
"step": 1308
},
{
"epoch": 0.7549019607843137,
"grad_norm": 0.6786548495292664,
"learning_rate": 0.00014971164936562861,
"loss": 0.8253003358840942,
"step": 1309
},
{
"epoch": 0.7554786620530565,
"grad_norm": 0.9683876037597656,
"learning_rate": 0.0001496732026143791,
"loss": 1.1294584274291992,
"step": 1310
},
{
"epoch": 0.7560553633217993,
"grad_norm": 0.8556981086730957,
"learning_rate": 0.00014963475586312957,
"loss": 1.009643316268921,
"step": 1311
},
{
"epoch": 0.7566320645905421,
"grad_norm": 0.7639108896255493,
"learning_rate": 0.00014959630911188005,
"loss": 0.8871880769729614,
"step": 1312
},
{
"epoch": 0.7572087658592849,
"grad_norm": 0.9662507176399231,
"learning_rate": 0.00014955786236063053,
"loss": 1.2890512943267822,
"step": 1313
},
{
"epoch": 0.7577854671280276,
"grad_norm": 0.7260032892227173,
"learning_rate": 0.000149519415609381,
"loss": 1.2696185111999512,
"step": 1314
},
{
"epoch": 0.7583621683967705,
"grad_norm": 1.0413408279418945,
"learning_rate": 0.0001494809688581315,
"loss": 1.2239567041397095,
"step": 1315
},
{
"epoch": 0.7589388696655133,
"grad_norm": 0.9003005623817444,
"learning_rate": 0.00014944252210688196,
"loss": 1.248561143875122,
"step": 1316
},
{
"epoch": 0.759515570934256,
"grad_norm": 0.9604087471961975,
"learning_rate": 0.00014940407535563247,
"loss": 1.2369884252548218,
"step": 1317
},
{
"epoch": 0.7600922722029988,
"grad_norm": 0.7198401093482971,
"learning_rate": 0.00014936562860438295,
"loss": 0.743487536907196,
"step": 1318
},
{
"epoch": 0.7606689734717417,
"grad_norm": 0.7526591420173645,
"learning_rate": 0.00014932718185313342,
"loss": 0.7714953422546387,
"step": 1319
},
{
"epoch": 0.7612456747404844,
"grad_norm": 1.1336771249771118,
"learning_rate": 0.0001492887351018839,
"loss": 1.1577683687210083,
"step": 1320
},
{
"epoch": 0.7618223760092272,
"grad_norm": 0.7607272267341614,
"learning_rate": 0.00014925028835063438,
"loss": 0.903020977973938,
"step": 1321
},
{
"epoch": 0.76239907727797,
"grad_norm": 0.7855517268180847,
"learning_rate": 0.00014921184159938486,
"loss": 0.9421197772026062,
"step": 1322
},
{
"epoch": 0.7629757785467128,
"grad_norm": 0.9380967020988464,
"learning_rate": 0.00014917339484813534,
"loss": 1.0594120025634766,
"step": 1323
},
{
"epoch": 0.7635524798154556,
"grad_norm": 0.9255303740501404,
"learning_rate": 0.00014913494809688582,
"loss": 1.1912791728973389,
"step": 1324
},
{
"epoch": 0.7641291810841984,
"grad_norm": 0.7085497379302979,
"learning_rate": 0.00014909650134563632,
"loss": 0.7702199816703796,
"step": 1325
},
{
"epoch": 0.7647058823529411,
"grad_norm": 0.8080468773841858,
"learning_rate": 0.00014905805459438677,
"loss": 0.9640858769416809,
"step": 1326
},
{
"epoch": 0.765282583621684,
"grad_norm": 0.8854598999023438,
"learning_rate": 0.00014901960784313728,
"loss": 1.0912519693374634,
"step": 1327
},
{
"epoch": 0.7658592848904268,
"grad_norm": 1.158070683479309,
"learning_rate": 0.00014898116109188773,
"loss": 1.259207010269165,
"step": 1328
},
{
"epoch": 0.7664359861591695,
"grad_norm": 0.7163742780685425,
"learning_rate": 0.00014894271434063823,
"loss": 0.9091912508010864,
"step": 1329
},
{
"epoch": 0.7670126874279123,
"grad_norm": 0.6578546762466431,
"learning_rate": 0.0001489042675893887,
"loss": 1.13603937625885,
"step": 1330
},
{
"epoch": 0.7675893886966552,
"grad_norm": 0.641118586063385,
"learning_rate": 0.0001488658208381392,
"loss": 0.6926564574241638,
"step": 1331
},
{
"epoch": 0.7681660899653979,
"grad_norm": 1.3342225551605225,
"learning_rate": 0.00014882737408688967,
"loss": 1.1259536743164062,
"step": 1332
},
{
"epoch": 0.7687427912341407,
"grad_norm": 0.6777533292770386,
"learning_rate": 0.00014878892733564015,
"loss": 0.8380722403526306,
"step": 1333
},
{
"epoch": 0.7693194925028836,
"grad_norm": 0.5475529432296753,
"learning_rate": 0.00014875048058439063,
"loss": 0.7194100618362427,
"step": 1334
},
{
"epoch": 0.7698961937716263,
"grad_norm": 0.7109413743019104,
"learning_rate": 0.0001487120338331411,
"loss": 0.7877069711685181,
"step": 1335
},
{
"epoch": 0.7704728950403691,
"grad_norm": 0.5451337099075317,
"learning_rate": 0.00014867358708189158,
"loss": 0.7354110479354858,
"step": 1336
},
{
"epoch": 0.7710495963091119,
"grad_norm": 0.7789444327354431,
"learning_rate": 0.0001486351403306421,
"loss": 0.9675291776657104,
"step": 1337
},
{
"epoch": 0.7716262975778547,
"grad_norm": 0.7246870398521423,
"learning_rate": 0.00014859669357939254,
"loss": 0.9592723846435547,
"step": 1338
},
{
"epoch": 0.7722029988465975,
"grad_norm": 0.7461789846420288,
"learning_rate": 0.00014855824682814304,
"loss": 1.062403678894043,
"step": 1339
},
{
"epoch": 0.7727797001153403,
"grad_norm": 0.6598569750785828,
"learning_rate": 0.0001485198000768935,
"loss": 0.959195077419281,
"step": 1340
},
{
"epoch": 0.773356401384083,
"grad_norm": 0.8688694834709167,
"learning_rate": 0.000148481353325644,
"loss": 1.3393487930297852,
"step": 1341
},
{
"epoch": 0.7739331026528259,
"grad_norm": 0.7083797454833984,
"learning_rate": 0.00014844290657439448,
"loss": 0.9515122175216675,
"step": 1342
},
{
"epoch": 0.7745098039215687,
"grad_norm": 0.7261124849319458,
"learning_rate": 0.00014840445982314496,
"loss": 1.048977017402649,
"step": 1343
},
{
"epoch": 0.7750865051903114,
"grad_norm": 0.9450129270553589,
"learning_rate": 0.00014836601307189544,
"loss": 1.1335430145263672,
"step": 1344
},
{
"epoch": 0.7756632064590542,
"grad_norm": 0.47535234689712524,
"learning_rate": 0.00014832756632064591,
"loss": 0.6887091398239136,
"step": 1345
},
{
"epoch": 0.776239907727797,
"grad_norm": 0.714235782623291,
"learning_rate": 0.0001482891195693964,
"loss": 0.9414650201797485,
"step": 1346
},
{
"epoch": 0.7768166089965398,
"grad_norm": 0.6094812750816345,
"learning_rate": 0.00014825067281814687,
"loss": 0.8214763402938843,
"step": 1347
},
{
"epoch": 0.7773933102652826,
"grad_norm": 0.7122801542282104,
"learning_rate": 0.00014821222606689735,
"loss": 0.9144871830940247,
"step": 1348
},
{
"epoch": 0.7779700115340253,
"grad_norm": 0.8147172927856445,
"learning_rate": 0.00014817377931564785,
"loss": 1.1212399005889893,
"step": 1349
},
{
"epoch": 0.7785467128027682,
"grad_norm": 0.5866456627845764,
"learning_rate": 0.0001481353325643983,
"loss": 0.6841553449630737,
"step": 1350
},
{
"epoch": 0.779123414071511,
"grad_norm": 1.2120155096054077,
"learning_rate": 0.0001480968858131488,
"loss": 1.1782194375991821,
"step": 1351
},
{
"epoch": 0.7797001153402537,
"grad_norm": 0.8661918640136719,
"learning_rate": 0.00014805843906189926,
"loss": 1.1883846521377563,
"step": 1352
},
{
"epoch": 0.7802768166089965,
"grad_norm": 1.2335827350616455,
"learning_rate": 0.00014801999231064977,
"loss": 1.199598789215088,
"step": 1353
},
{
"epoch": 0.7808535178777394,
"grad_norm": 0.8413060307502747,
"learning_rate": 0.00014798154555940025,
"loss": 1.0878143310546875,
"step": 1354
},
{
"epoch": 0.7814302191464821,
"grad_norm": 1.042397379875183,
"learning_rate": 0.00014794309880815072,
"loss": 1.5179508924484253,
"step": 1355
},
{
"epoch": 0.7820069204152249,
"grad_norm": 1.2029002904891968,
"learning_rate": 0.0001479046520569012,
"loss": 1.361120343208313,
"step": 1356
},
{
"epoch": 0.7825836216839677,
"grad_norm": 0.9056934714317322,
"learning_rate": 0.00014786620530565168,
"loss": 1.0812435150146484,
"step": 1357
},
{
"epoch": 0.7831603229527105,
"grad_norm": 0.7730829119682312,
"learning_rate": 0.00014782775855440216,
"loss": 1.0833256244659424,
"step": 1358
},
{
"epoch": 0.7837370242214533,
"grad_norm": 0.8789440393447876,
"learning_rate": 0.00014778931180315264,
"loss": 1.0179883241653442,
"step": 1359
},
{
"epoch": 0.7843137254901961,
"grad_norm": 0.775190532207489,
"learning_rate": 0.00014775086505190312,
"loss": 1.0584783554077148,
"step": 1360
},
{
"epoch": 0.7848904267589388,
"grad_norm": 0.7954389452934265,
"learning_rate": 0.00014771241830065362,
"loss": 1.1697866916656494,
"step": 1361
},
{
"epoch": 0.7854671280276817,
"grad_norm": 0.8194144368171692,
"learning_rate": 0.00014767397154940407,
"loss": 0.9788481593132019,
"step": 1362
},
{
"epoch": 0.7860438292964245,
"grad_norm": 0.7247309684753418,
"learning_rate": 0.00014763552479815458,
"loss": 0.9953986406326294,
"step": 1363
},
{
"epoch": 0.7866205305651672,
"grad_norm": 0.8735687136650085,
"learning_rate": 0.00014759707804690503,
"loss": 1.108184576034546,
"step": 1364
},
{
"epoch": 0.78719723183391,
"grad_norm": 0.8578454256057739,
"learning_rate": 0.00014755863129565553,
"loss": 1.0608623027801514,
"step": 1365
},
{
"epoch": 0.7877739331026529,
"grad_norm": 1.038670301437378,
"learning_rate": 0.000147520184544406,
"loss": 1.2398217916488647,
"step": 1366
},
{
"epoch": 0.7883506343713956,
"grad_norm": 0.832326352596283,
"learning_rate": 0.0001474817377931565,
"loss": 1.5559954643249512,
"step": 1367
},
{
"epoch": 0.7889273356401384,
"grad_norm": 0.5325842499732971,
"learning_rate": 0.00014744329104190697,
"loss": 0.6711868047714233,
"step": 1368
},
{
"epoch": 0.7895040369088812,
"grad_norm": 0.6845494508743286,
"learning_rate": 0.00014740484429065745,
"loss": 0.9054516553878784,
"step": 1369
},
{
"epoch": 0.790080738177624,
"grad_norm": 0.8053160309791565,
"learning_rate": 0.00014736639753940793,
"loss": 1.1551737785339355,
"step": 1370
},
{
"epoch": 0.7906574394463668,
"grad_norm": 0.9268645644187927,
"learning_rate": 0.0001473279507881584,
"loss": 0.9230217933654785,
"step": 1371
},
{
"epoch": 0.7912341407151096,
"grad_norm": 1.0553678274154663,
"learning_rate": 0.00014728950403690888,
"loss": 1.2223023176193237,
"step": 1372
},
{
"epoch": 0.7918108419838523,
"grad_norm": 0.6177469491958618,
"learning_rate": 0.0001472510572856594,
"loss": 0.8992686867713928,
"step": 1373
},
{
"epoch": 0.7923875432525952,
"grad_norm": 1.138965368270874,
"learning_rate": 0.00014721261053440984,
"loss": 0.8630029559135437,
"step": 1374
},
{
"epoch": 0.792964244521338,
"grad_norm": 0.5512900948524475,
"learning_rate": 0.00014717416378316034,
"loss": 0.8302984237670898,
"step": 1375
},
{
"epoch": 0.7935409457900807,
"grad_norm": 0.6091440916061401,
"learning_rate": 0.0001471357170319108,
"loss": 0.7380212545394897,
"step": 1376
},
{
"epoch": 0.7941176470588235,
"grad_norm": 0.909902811050415,
"learning_rate": 0.0001470972702806613,
"loss": 1.0644478797912598,
"step": 1377
},
{
"epoch": 0.7946943483275664,
"grad_norm": 0.9841009378433228,
"learning_rate": 0.00014705882352941178,
"loss": 1.5122861862182617,
"step": 1378
},
{
"epoch": 0.7952710495963091,
"grad_norm": 0.7682785391807556,
"learning_rate": 0.00014702037677816226,
"loss": 0.8122522830963135,
"step": 1379
},
{
"epoch": 0.7958477508650519,
"grad_norm": 0.8022129535675049,
"learning_rate": 0.00014698193002691274,
"loss": 0.7516300678253174,
"step": 1380
},
{
"epoch": 0.7964244521337946,
"grad_norm": 0.8423136472702026,
"learning_rate": 0.00014694348327566321,
"loss": 0.9571545124053955,
"step": 1381
},
{
"epoch": 0.7970011534025375,
"grad_norm": 0.61954665184021,
"learning_rate": 0.0001469050365244137,
"loss": 0.8543866872787476,
"step": 1382
},
{
"epoch": 0.7975778546712803,
"grad_norm": 0.5888648629188538,
"learning_rate": 0.00014686658977316417,
"loss": 0.6958523988723755,
"step": 1383
},
{
"epoch": 0.798154555940023,
"grad_norm": 0.9419842958450317,
"learning_rate": 0.00014682814302191465,
"loss": 1.3051813840866089,
"step": 1384
},
{
"epoch": 0.7987312572087658,
"grad_norm": 1.1472746133804321,
"learning_rate": 0.00014678969627066515,
"loss": 1.284635305404663,
"step": 1385
},
{
"epoch": 0.7993079584775087,
"grad_norm": 0.5858578681945801,
"learning_rate": 0.0001467512495194156,
"loss": 0.7809937596321106,
"step": 1386
},
{
"epoch": 0.7998846597462514,
"grad_norm": 0.7086213231086731,
"learning_rate": 0.0001467128027681661,
"loss": 0.6571354269981384,
"step": 1387
},
{
"epoch": 0.8004613610149942,
"grad_norm": 0.8438594341278076,
"learning_rate": 0.00014667435601691656,
"loss": 0.9461796283721924,
"step": 1388
},
{
"epoch": 0.801038062283737,
"grad_norm": 0.6701700687408447,
"learning_rate": 0.00014663590926566707,
"loss": 0.7518469095230103,
"step": 1389
},
{
"epoch": 0.8016147635524798,
"grad_norm": 0.7239779233932495,
"learning_rate": 0.00014659746251441755,
"loss": 0.98681640625,
"step": 1390
},
{
"epoch": 0.8021914648212226,
"grad_norm": 0.9055145382881165,
"learning_rate": 0.00014655901576316802,
"loss": 1.038681983947754,
"step": 1391
},
{
"epoch": 0.8027681660899654,
"grad_norm": 0.674439013004303,
"learning_rate": 0.0001465205690119185,
"loss": 0.7289140820503235,
"step": 1392
},
{
"epoch": 0.8033448673587081,
"grad_norm": 0.6101412773132324,
"learning_rate": 0.00014648212226066898,
"loss": 0.8470169901847839,
"step": 1393
},
{
"epoch": 0.803921568627451,
"grad_norm": 1.0043631792068481,
"learning_rate": 0.00014644367550941946,
"loss": 0.9277285933494568,
"step": 1394
},
{
"epoch": 0.8044982698961938,
"grad_norm": 0.8795577883720398,
"learning_rate": 0.00014640522875816994,
"loss": 1.2433722019195557,
"step": 1395
},
{
"epoch": 0.8050749711649365,
"grad_norm": 0.469595730304718,
"learning_rate": 0.00014636678200692042,
"loss": 0.5572987794876099,
"step": 1396
},
{
"epoch": 0.8056516724336793,
"grad_norm": 0.8809022903442383,
"learning_rate": 0.00014632833525567092,
"loss": 1.1597031354904175,
"step": 1397
},
{
"epoch": 0.8062283737024222,
"grad_norm": 0.9675459861755371,
"learning_rate": 0.00014628988850442137,
"loss": 1.0070991516113281,
"step": 1398
},
{
"epoch": 0.8068050749711649,
"grad_norm": 0.8547102212905884,
"learning_rate": 0.00014625144175317188,
"loss": 0.9210143089294434,
"step": 1399
},
{
"epoch": 0.8073817762399077,
"grad_norm": 0.5635284185409546,
"learning_rate": 0.00014621299500192233,
"loss": 0.5849195122718811,
"step": 1400
},
{
"epoch": 0.8079584775086506,
"grad_norm": 0.8755897283554077,
"learning_rate": 0.00014617454825067283,
"loss": 1.014789342880249,
"step": 1401
},
{
"epoch": 0.8085351787773933,
"grad_norm": 0.6002927422523499,
"learning_rate": 0.00014613610149942328,
"loss": 0.8705483675003052,
"step": 1402
},
{
"epoch": 0.8091118800461361,
"grad_norm": 0.9547945857048035,
"learning_rate": 0.0001460976547481738,
"loss": 1.0433237552642822,
"step": 1403
},
{
"epoch": 0.8096885813148789,
"grad_norm": 0.8594508767127991,
"learning_rate": 0.00014605920799692427,
"loss": 0.857754111289978,
"step": 1404
},
{
"epoch": 0.8102652825836217,
"grad_norm": 0.632087230682373,
"learning_rate": 0.00014602076124567475,
"loss": 1.0932989120483398,
"step": 1405
},
{
"epoch": 0.8108419838523645,
"grad_norm": 0.6727497577667236,
"learning_rate": 0.00014598231449442523,
"loss": 1.1335169076919556,
"step": 1406
},
{
"epoch": 0.8114186851211073,
"grad_norm": 1.050377368927002,
"learning_rate": 0.0001459438677431757,
"loss": 1.1787501573562622,
"step": 1407
},
{
"epoch": 0.81199538638985,
"grad_norm": 0.624580442905426,
"learning_rate": 0.00014590542099192618,
"loss": 0.8040243983268738,
"step": 1408
},
{
"epoch": 0.8125720876585929,
"grad_norm": 0.644497275352478,
"learning_rate": 0.0001458669742406767,
"loss": 0.9769735336303711,
"step": 1409
},
{
"epoch": 0.8131487889273357,
"grad_norm": 0.8106479048728943,
"learning_rate": 0.00014582852748942714,
"loss": 1.2847563028335571,
"step": 1410
},
{
"epoch": 0.8137254901960784,
"grad_norm": 0.6234838962554932,
"learning_rate": 0.00014579008073817764,
"loss": 0.7418760061264038,
"step": 1411
},
{
"epoch": 0.8143021914648212,
"grad_norm": 0.7591360807418823,
"learning_rate": 0.0001457516339869281,
"loss": 1.0062642097473145,
"step": 1412
},
{
"epoch": 0.8148788927335641,
"grad_norm": 0.7684062123298645,
"learning_rate": 0.0001457131872356786,
"loss": 0.9963294267654419,
"step": 1413
},
{
"epoch": 0.8154555940023068,
"grad_norm": 0.8234810829162598,
"learning_rate": 0.00014567474048442905,
"loss": 0.9132286310195923,
"step": 1414
},
{
"epoch": 0.8160322952710496,
"grad_norm": 1.3752492666244507,
"learning_rate": 0.00014563629373317956,
"loss": 1.3458770513534546,
"step": 1415
},
{
"epoch": 0.8166089965397924,
"grad_norm": 0.8771060109138489,
"learning_rate": 0.00014559784698193004,
"loss": 0.9146612882614136,
"step": 1416
},
{
"epoch": 0.8171856978085352,
"grad_norm": 0.5799472332000732,
"learning_rate": 0.0001455594002306805,
"loss": 0.8132292032241821,
"step": 1417
},
{
"epoch": 0.817762399077278,
"grad_norm": 1.0692527294158936,
"learning_rate": 0.000145520953479431,
"loss": 1.0524235963821411,
"step": 1418
},
{
"epoch": 0.8183391003460208,
"grad_norm": 0.6880149245262146,
"learning_rate": 0.00014548250672818147,
"loss": 0.8549849987030029,
"step": 1419
},
{
"epoch": 0.8189158016147635,
"grad_norm": 0.9311429858207703,
"learning_rate": 0.00014544405997693195,
"loss": 1.2363505363464355,
"step": 1420
},
{
"epoch": 0.8194925028835064,
"grad_norm": 0.6105409860610962,
"learning_rate": 0.00014540561322568245,
"loss": 0.8256676197052002,
"step": 1421
},
{
"epoch": 0.8200692041522492,
"grad_norm": 0.9718572497367859,
"learning_rate": 0.0001453671664744329,
"loss": 1.349236249923706,
"step": 1422
},
{
"epoch": 0.8206459054209919,
"grad_norm": 0.9589305520057678,
"learning_rate": 0.0001453287197231834,
"loss": 0.8896529674530029,
"step": 1423
},
{
"epoch": 0.8212226066897347,
"grad_norm": 1.1475483179092407,
"learning_rate": 0.00014529027297193386,
"loss": 1.392863154411316,
"step": 1424
},
{
"epoch": 0.8217993079584776,
"grad_norm": 0.9420047402381897,
"learning_rate": 0.00014525182622068437,
"loss": 1.1920685768127441,
"step": 1425
},
{
"epoch": 0.8223760092272203,
"grad_norm": 0.584073007106781,
"learning_rate": 0.00014521337946943482,
"loss": 0.5488528609275818,
"step": 1426
},
{
"epoch": 0.8229527104959631,
"grad_norm": 0.6110360622406006,
"learning_rate": 0.00014517493271818532,
"loss": 0.7226777672767639,
"step": 1427
},
{
"epoch": 0.8235294117647058,
"grad_norm": 0.5320557355880737,
"learning_rate": 0.0001451364859669358,
"loss": 0.5602037906646729,
"step": 1428
},
{
"epoch": 0.8241061130334487,
"grad_norm": 0.5847785472869873,
"learning_rate": 0.00014509803921568628,
"loss": 0.632820725440979,
"step": 1429
},
{
"epoch": 0.8246828143021915,
"grad_norm": 1.1915888786315918,
"learning_rate": 0.00014505959246443676,
"loss": 1.2395484447479248,
"step": 1430
},
{
"epoch": 0.8252595155709342,
"grad_norm": 0.7745262980461121,
"learning_rate": 0.00014502114571318724,
"loss": 0.9293632507324219,
"step": 1431
},
{
"epoch": 0.825836216839677,
"grad_norm": 0.9716136455535889,
"learning_rate": 0.00014498269896193771,
"loss": 1.2587440013885498,
"step": 1432
},
{
"epoch": 0.8264129181084199,
"grad_norm": 0.6674740314483643,
"learning_rate": 0.00014494425221068822,
"loss": 0.9000645875930786,
"step": 1433
},
{
"epoch": 0.8269896193771626,
"grad_norm": 0.9345766305923462,
"learning_rate": 0.00014490580545943867,
"loss": 0.9881076812744141,
"step": 1434
},
{
"epoch": 0.8275663206459054,
"grad_norm": 0.8641346096992493,
"learning_rate": 0.00014486735870818918,
"loss": 1.0706219673156738,
"step": 1435
},
{
"epoch": 0.8281430219146482,
"grad_norm": 0.8997068405151367,
"learning_rate": 0.00014482891195693963,
"loss": 0.932431697845459,
"step": 1436
},
{
"epoch": 0.828719723183391,
"grad_norm": 0.7539141774177551,
"learning_rate": 0.00014479046520569013,
"loss": 0.8891205191612244,
"step": 1437
},
{
"epoch": 0.8292964244521338,
"grad_norm": 0.8675488233566284,
"learning_rate": 0.00014475201845444058,
"loss": 0.9973325729370117,
"step": 1438
},
{
"epoch": 0.8298731257208766,
"grad_norm": 0.7566542029380798,
"learning_rate": 0.0001447135717031911,
"loss": 1.1265358924865723,
"step": 1439
},
{
"epoch": 0.8304498269896193,
"grad_norm": 0.902654230594635,
"learning_rate": 0.00014467512495194157,
"loss": 1.0915746688842773,
"step": 1440
},
{
"epoch": 0.8310265282583622,
"grad_norm": 0.618813693523407,
"learning_rate": 0.00014463667820069205,
"loss": 0.6798044443130493,
"step": 1441
},
{
"epoch": 0.831603229527105,
"grad_norm": 0.6372320055961609,
"learning_rate": 0.00014459823144944252,
"loss": 0.8383584022521973,
"step": 1442
},
{
"epoch": 0.8321799307958477,
"grad_norm": 0.742468535900116,
"learning_rate": 0.000144559784698193,
"loss": 1.0003979206085205,
"step": 1443
},
{
"epoch": 0.8327566320645905,
"grad_norm": 0.9815142750740051,
"learning_rate": 0.00014452133794694348,
"loss": 1.2571461200714111,
"step": 1444
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.7362657785415649,
"learning_rate": 0.000144482891195694,
"loss": 0.9890142679214478,
"step": 1445
},
{
"epoch": 0.8339100346020761,
"grad_norm": 1.047896385192871,
"learning_rate": 0.00014444444444444444,
"loss": 0.7491689920425415,
"step": 1446
},
{
"epoch": 0.8344867358708189,
"grad_norm": 1.0869019031524658,
"learning_rate": 0.00014440599769319494,
"loss": 1.0598435401916504,
"step": 1447
},
{
"epoch": 0.8350634371395617,
"grad_norm": 0.8003841042518616,
"learning_rate": 0.0001443675509419454,
"loss": 0.7503578662872314,
"step": 1448
},
{
"epoch": 0.8356401384083045,
"grad_norm": 1.3352385759353638,
"learning_rate": 0.0001443291041906959,
"loss": 1.7147669792175293,
"step": 1449
},
{
"epoch": 0.8362168396770473,
"grad_norm": 0.7203720808029175,
"learning_rate": 0.00014429065743944635,
"loss": 0.7103738188743591,
"step": 1450
},
{
"epoch": 0.8367935409457901,
"grad_norm": 0.7292425036430359,
"learning_rate": 0.00014425221068819686,
"loss": 0.9089938402175903,
"step": 1451
},
{
"epoch": 0.8373702422145328,
"grad_norm": 1.5864981412887573,
"learning_rate": 0.00014421376393694733,
"loss": 1.2735176086425781,
"step": 1452
},
{
"epoch": 0.8379469434832757,
"grad_norm": 0.5966582298278809,
"learning_rate": 0.0001441753171856978,
"loss": 0.8211960196495056,
"step": 1453
},
{
"epoch": 0.8385236447520185,
"grad_norm": 0.6568999886512756,
"learning_rate": 0.0001441368704344483,
"loss": 0.9273509979248047,
"step": 1454
},
{
"epoch": 0.8391003460207612,
"grad_norm": 0.6672592163085938,
"learning_rate": 0.00014409842368319877,
"loss": 0.7854159474372864,
"step": 1455
},
{
"epoch": 0.839677047289504,
"grad_norm": 1.1119751930236816,
"learning_rate": 0.00014405997693194925,
"loss": 1.2850849628448486,
"step": 1456
},
{
"epoch": 0.8402537485582469,
"grad_norm": 0.8437113165855408,
"learning_rate": 0.00014402153018069975,
"loss": 0.9052360653877258,
"step": 1457
},
{
"epoch": 0.8408304498269896,
"grad_norm": 1.1120409965515137,
"learning_rate": 0.0001439830834294502,
"loss": 1.4261767864227295,
"step": 1458
},
{
"epoch": 0.8414071510957324,
"grad_norm": 0.6494320631027222,
"learning_rate": 0.0001439446366782007,
"loss": 0.8434788584709167,
"step": 1459
},
{
"epoch": 0.8419838523644751,
"grad_norm": 0.5622795820236206,
"learning_rate": 0.00014390618992695116,
"loss": 0.646868109703064,
"step": 1460
},
{
"epoch": 0.842560553633218,
"grad_norm": 0.8375677466392517,
"learning_rate": 0.00014386774317570167,
"loss": 1.0123827457427979,
"step": 1461
},
{
"epoch": 0.8431372549019608,
"grad_norm": 0.6013731956481934,
"learning_rate": 0.00014382929642445214,
"loss": 0.7129334211349487,
"step": 1462
},
{
"epoch": 0.8437139561707035,
"grad_norm": 0.7148757576942444,
"learning_rate": 0.00014379084967320262,
"loss": 0.7350738048553467,
"step": 1463
},
{
"epoch": 0.8442906574394463,
"grad_norm": 0.7380696535110474,
"learning_rate": 0.0001437524029219531,
"loss": 0.7962418794631958,
"step": 1464
},
{
"epoch": 0.8448673587081892,
"grad_norm": 0.6836022734642029,
"learning_rate": 0.00014371395617070358,
"loss": 1.0249385833740234,
"step": 1465
},
{
"epoch": 0.845444059976932,
"grad_norm": 0.8065418004989624,
"learning_rate": 0.00014367550941945406,
"loss": 1.0036308765411377,
"step": 1466
},
{
"epoch": 0.8460207612456747,
"grad_norm": 0.8336586356163025,
"learning_rate": 0.00014363706266820454,
"loss": 0.9442139863967896,
"step": 1467
},
{
"epoch": 0.8465974625144176,
"grad_norm": 0.9105651378631592,
"learning_rate": 0.00014359861591695501,
"loss": 1.198281168937683,
"step": 1468
},
{
"epoch": 0.8471741637831603,
"grad_norm": 0.6932002902030945,
"learning_rate": 0.0001435601691657055,
"loss": 0.76617431640625,
"step": 1469
},
{
"epoch": 0.8477508650519031,
"grad_norm": 0.6474612951278687,
"learning_rate": 0.00014352172241445597,
"loss": 0.9350631237030029,
"step": 1470
},
{
"epoch": 0.8483275663206459,
"grad_norm": 1.0232489109039307,
"learning_rate": 0.00014348327566320648,
"loss": 1.2790873050689697,
"step": 1471
},
{
"epoch": 0.8489042675893888,
"grad_norm": 0.5638800263404846,
"learning_rate": 0.00014344482891195695,
"loss": 0.6640872359275818,
"step": 1472
},
{
"epoch": 0.8494809688581315,
"grad_norm": 0.7060153484344482,
"learning_rate": 0.00014340638216070743,
"loss": 0.549694299697876,
"step": 1473
},
{
"epoch": 0.8500576701268743,
"grad_norm": 0.7553113698959351,
"learning_rate": 0.0001433679354094579,
"loss": 0.6748926639556885,
"step": 1474
},
{
"epoch": 0.850634371395617,
"grad_norm": 1.0750683546066284,
"learning_rate": 0.0001433294886582084,
"loss": 1.2567592859268188,
"step": 1475
},
{
"epoch": 0.8512110726643599,
"grad_norm": 0.8767377138137817,
"learning_rate": 0.00014329104190695887,
"loss": 0.8606712818145752,
"step": 1476
},
{
"epoch": 0.8517877739331027,
"grad_norm": 0.8583175539970398,
"learning_rate": 0.00014325259515570935,
"loss": 1.0961095094680786,
"step": 1477
},
{
"epoch": 0.8523644752018454,
"grad_norm": 0.8185640573501587,
"learning_rate": 0.00014321414840445982,
"loss": 0.9456279277801514,
"step": 1478
},
{
"epoch": 0.8529411764705882,
"grad_norm": 0.7922638058662415,
"learning_rate": 0.0001431757016532103,
"loss": 0.8527402281761169,
"step": 1479
},
{
"epoch": 0.8535178777393311,
"grad_norm": 0.8317216634750366,
"learning_rate": 0.00014313725490196078,
"loss": 1.0812233686447144,
"step": 1480
},
{
"epoch": 0.8540945790080738,
"grad_norm": 0.5592607855796814,
"learning_rate": 0.00014309880815071126,
"loss": 0.6856215000152588,
"step": 1481
},
{
"epoch": 0.8546712802768166,
"grad_norm": 0.6144684553146362,
"learning_rate": 0.00014306036139946174,
"loss": 0.8217105269432068,
"step": 1482
},
{
"epoch": 0.8552479815455594,
"grad_norm": 0.8721742630004883,
"learning_rate": 0.00014302191464821224,
"loss": 1.1268048286437988,
"step": 1483
},
{
"epoch": 0.8558246828143022,
"grad_norm": 0.7512510418891907,
"learning_rate": 0.00014298346789696272,
"loss": 0.7509297132492065,
"step": 1484
},
{
"epoch": 0.856401384083045,
"grad_norm": 0.7145662307739258,
"learning_rate": 0.0001429450211457132,
"loss": 0.787600040435791,
"step": 1485
},
{
"epoch": 0.8569780853517878,
"grad_norm": 0.5714643597602844,
"learning_rate": 0.00014290657439446368,
"loss": 0.5843244791030884,
"step": 1486
},
{
"epoch": 0.8575547866205305,
"grad_norm": 0.567432701587677,
"learning_rate": 0.00014286812764321416,
"loss": 0.5819793939590454,
"step": 1487
},
{
"epoch": 0.8581314878892734,
"grad_norm": 0.7957308888435364,
"learning_rate": 0.00014282968089196463,
"loss": 1.127239465713501,
"step": 1488
},
{
"epoch": 0.8587081891580162,
"grad_norm": 0.6828871369361877,
"learning_rate": 0.0001427912341407151,
"loss": 0.8339288234710693,
"step": 1489
},
{
"epoch": 0.8592848904267589,
"grad_norm": 0.6947774887084961,
"learning_rate": 0.0001427527873894656,
"loss": 0.8848856687545776,
"step": 1490
},
{
"epoch": 0.8598615916955017,
"grad_norm": 0.7703558802604675,
"learning_rate": 0.00014271434063821607,
"loss": 1.1964079141616821,
"step": 1491
},
{
"epoch": 0.8604382929642446,
"grad_norm": 0.9820204973220825,
"learning_rate": 0.00014267589388696655,
"loss": 1.3156203031539917,
"step": 1492
},
{
"epoch": 0.8610149942329873,
"grad_norm": 0.663357138633728,
"learning_rate": 0.00014263744713571703,
"loss": 1.1208245754241943,
"step": 1493
},
{
"epoch": 0.8615916955017301,
"grad_norm": 0.6204859018325806,
"learning_rate": 0.00014259900038446753,
"loss": 0.8412761688232422,
"step": 1494
},
{
"epoch": 0.8621683967704729,
"grad_norm": 0.8673816323280334,
"learning_rate": 0.000142560553633218,
"loss": 0.9236775040626526,
"step": 1495
},
{
"epoch": 0.8627450980392157,
"grad_norm": 0.6511439681053162,
"learning_rate": 0.0001425221068819685,
"loss": 0.8711351156234741,
"step": 1496
},
{
"epoch": 0.8633217993079585,
"grad_norm": 0.5167029500007629,
"learning_rate": 0.00014248366013071897,
"loss": 0.6116561889648438,
"step": 1497
},
{
"epoch": 0.8638985005767013,
"grad_norm": 0.6007522940635681,
"learning_rate": 0.00014244521337946944,
"loss": 0.7663001418113708,
"step": 1498
},
{
"epoch": 0.864475201845444,
"grad_norm": 0.5924880504608154,
"learning_rate": 0.00014240676662821992,
"loss": 0.6707437038421631,
"step": 1499
},
{
"epoch": 0.8650519031141869,
"grad_norm": 0.859641969203949,
"learning_rate": 0.0001423683198769704,
"loss": 1.0436668395996094,
"step": 1500
}
],
"logging_steps": 1,
"max_steps": 5202,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.361771665599693e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}