haniem-codet5 / checkpoint-15276 /trainer_state.json
root
Upload trained Java code repair model
39534cc
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 15276,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009819324430479183,
"grad_norm": 0.24714912474155426,
"learning_rate": 4.983961770096884e-05,
"loss": 0.3447,
"step": 50
},
{
"epoch": 0.019638648860958365,
"grad_norm": 0.12986360490322113,
"learning_rate": 4.967596229379419e-05,
"loss": 0.0057,
"step": 100
},
{
"epoch": 0.02945797329143755,
"grad_norm": 0.13787369430065155,
"learning_rate": 4.9512306886619534e-05,
"loss": 0.0041,
"step": 150
},
{
"epoch": 0.03927729772191673,
"grad_norm": 0.032512035220861435,
"learning_rate": 4.9348651479444883e-05,
"loss": 0.0031,
"step": 200
},
{
"epoch": 0.049096622152395915,
"grad_norm": 0.04660605266690254,
"learning_rate": 4.9184996072270227e-05,
"loss": 0.0017,
"step": 250
},
{
"epoch": 0.0589159465828751,
"grad_norm": 0.12520131468772888,
"learning_rate": 4.9021340665095576e-05,
"loss": 0.0023,
"step": 300
},
{
"epoch": 0.06873527101335428,
"grad_norm": 0.03921537473797798,
"learning_rate": 4.8857685257920926e-05,
"loss": 0.0016,
"step": 350
},
{
"epoch": 0.07855459544383346,
"grad_norm": 0.019508639350533485,
"learning_rate": 4.869402985074627e-05,
"loss": 0.0015,
"step": 400
},
{
"epoch": 0.08837391987431265,
"grad_norm": 0.03215921297669411,
"learning_rate": 4.853037444357162e-05,
"loss": 0.0018,
"step": 450
},
{
"epoch": 0.09819324430479183,
"grad_norm": 0.01678670570254326,
"learning_rate": 4.836671903639696e-05,
"loss": 0.0012,
"step": 500
},
{
"epoch": 0.10801256873527101,
"grad_norm": 0.04076138511300087,
"learning_rate": 4.820306362922231e-05,
"loss": 0.0013,
"step": 550
},
{
"epoch": 0.1178318931657502,
"grad_norm": 0.028516946360468864,
"learning_rate": 4.803940822204766e-05,
"loss": 0.001,
"step": 600
},
{
"epoch": 0.12765121759622938,
"grad_norm": 0.0024934338871389627,
"learning_rate": 4.7875752814873006e-05,
"loss": 0.0013,
"step": 650
},
{
"epoch": 0.13747054202670855,
"grad_norm": 0.009505635127425194,
"learning_rate": 4.7712097407698356e-05,
"loss": 0.0006,
"step": 700
},
{
"epoch": 0.14728986645718775,
"grad_norm": 0.004126217681914568,
"learning_rate": 4.75484420005237e-05,
"loss": 0.0009,
"step": 750
},
{
"epoch": 0.15710919088766692,
"grad_norm": 0.001864357735030353,
"learning_rate": 4.738478659334905e-05,
"loss": 0.0006,
"step": 800
},
{
"epoch": 0.16692851531814612,
"grad_norm": 0.05715826526284218,
"learning_rate": 4.722113118617439e-05,
"loss": 0.0009,
"step": 850
},
{
"epoch": 0.1767478397486253,
"grad_norm": 0.01866191253066063,
"learning_rate": 4.7057475778999735e-05,
"loss": 0.0008,
"step": 900
},
{
"epoch": 0.1865671641791045,
"grad_norm": 0.02309831976890564,
"learning_rate": 4.6893820371825085e-05,
"loss": 0.0009,
"step": 950
},
{
"epoch": 0.19638648860958366,
"grad_norm": 0.008644412271678448,
"learning_rate": 4.6730164964650435e-05,
"loss": 0.0008,
"step": 1000
},
{
"epoch": 0.20620581304006286,
"grad_norm": 0.0034850463271141052,
"learning_rate": 4.656650955747578e-05,
"loss": 0.0008,
"step": 1050
},
{
"epoch": 0.21602513747054203,
"grad_norm": 0.015649326145648956,
"learning_rate": 4.640285415030113e-05,
"loss": 0.0005,
"step": 1100
},
{
"epoch": 0.2258444619010212,
"grad_norm": 0.017577216029167175,
"learning_rate": 4.623919874312648e-05,
"loss": 0.0005,
"step": 1150
},
{
"epoch": 0.2356637863315004,
"grad_norm": 0.001943176961503923,
"learning_rate": 4.607554333595182e-05,
"loss": 0.0006,
"step": 1200
},
{
"epoch": 0.24548311076197957,
"grad_norm": 0.013552217744290829,
"learning_rate": 4.591188792877717e-05,
"loss": 0.0007,
"step": 1250
},
{
"epoch": 0.25530243519245877,
"grad_norm": 0.0009095704299397767,
"learning_rate": 4.574823252160252e-05,
"loss": 0.0006,
"step": 1300
},
{
"epoch": 0.26512175962293794,
"grad_norm": 0.12323999404907227,
"learning_rate": 4.5584577114427864e-05,
"loss": 0.001,
"step": 1350
},
{
"epoch": 0.2749410840534171,
"grad_norm": 0.0046529932878911495,
"learning_rate": 4.5420921707253214e-05,
"loss": 0.0006,
"step": 1400
},
{
"epoch": 0.28476040848389633,
"grad_norm": 0.0006078369333408773,
"learning_rate": 4.525726630007856e-05,
"loss": 0.0005,
"step": 1450
},
{
"epoch": 0.2945797329143755,
"grad_norm": 0.0011770805576816201,
"learning_rate": 4.509361089290391e-05,
"loss": 0.0003,
"step": 1500
},
{
"epoch": 0.3043990573448547,
"grad_norm": 0.004274972248822451,
"learning_rate": 4.492995548572925e-05,
"loss": 0.0005,
"step": 1550
},
{
"epoch": 0.31421838177533384,
"grad_norm": 0.0007647090242244303,
"learning_rate": 4.476630007855459e-05,
"loss": 0.0007,
"step": 1600
},
{
"epoch": 0.324037706205813,
"grad_norm": 0.013916688971221447,
"learning_rate": 4.460264467137994e-05,
"loss": 0.0005,
"step": 1650
},
{
"epoch": 0.33385703063629224,
"grad_norm": 0.001363221788778901,
"learning_rate": 4.443898926420529e-05,
"loss": 0.0004,
"step": 1700
},
{
"epoch": 0.3436763550667714,
"grad_norm": 0.00033090286888182163,
"learning_rate": 4.4275333857030636e-05,
"loss": 0.0004,
"step": 1750
},
{
"epoch": 0.3534956794972506,
"grad_norm": 0.000668744498398155,
"learning_rate": 4.4111678449855986e-05,
"loss": 0.0006,
"step": 1800
},
{
"epoch": 0.36331500392772975,
"grad_norm": 0.0539139024913311,
"learning_rate": 4.394802304268133e-05,
"loss": 0.0005,
"step": 1850
},
{
"epoch": 0.373134328358209,
"grad_norm": 0.0008487588493153453,
"learning_rate": 4.378436763550668e-05,
"loss": 0.0004,
"step": 1900
},
{
"epoch": 0.38295365278868815,
"grad_norm": 0.0008443322731181979,
"learning_rate": 4.362071222833203e-05,
"loss": 0.0006,
"step": 1950
},
{
"epoch": 0.3927729772191673,
"grad_norm": 0.00504659116268158,
"learning_rate": 4.345705682115737e-05,
"loss": 0.0006,
"step": 2000
},
{
"epoch": 0.4025923016496465,
"grad_norm": 0.19237877428531647,
"learning_rate": 4.329340141398272e-05,
"loss": 0.0003,
"step": 2050
},
{
"epoch": 0.4124116260801257,
"grad_norm": 0.052810050547122955,
"learning_rate": 4.312974600680807e-05,
"loss": 0.0007,
"step": 2100
},
{
"epoch": 0.4222309505106049,
"grad_norm": 0.0010182256810367107,
"learning_rate": 4.2966090599633415e-05,
"loss": 0.0006,
"step": 2150
},
{
"epoch": 0.43205027494108406,
"grad_norm": 0.008011899888515472,
"learning_rate": 4.2802435192458765e-05,
"loss": 0.0004,
"step": 2200
},
{
"epoch": 0.4418695993715632,
"grad_norm": 0.004001118242740631,
"learning_rate": 4.263877978528411e-05,
"loss": 0.0003,
"step": 2250
},
{
"epoch": 0.4516889238020424,
"grad_norm": 0.0005824828986078501,
"learning_rate": 4.247512437810945e-05,
"loss": 0.0004,
"step": 2300
},
{
"epoch": 0.4615082482325216,
"grad_norm": 0.00035399169428274035,
"learning_rate": 4.23114689709348e-05,
"loss": 0.0006,
"step": 2350
},
{
"epoch": 0.4713275726630008,
"grad_norm": 0.0008132366347126663,
"learning_rate": 4.2147813563760144e-05,
"loss": 0.0004,
"step": 2400
},
{
"epoch": 0.48114689709347996,
"grad_norm": 0.00029086056747473776,
"learning_rate": 4.1984158156585494e-05,
"loss": 0.0004,
"step": 2450
},
{
"epoch": 0.49096622152395913,
"grad_norm": 0.009674192406237125,
"learning_rate": 4.1820502749410844e-05,
"loss": 0.0003,
"step": 2500
},
{
"epoch": 0.5007855459544384,
"grad_norm": 0.0002215866988990456,
"learning_rate": 4.165684734223619e-05,
"loss": 0.0003,
"step": 2550
},
{
"epoch": 0.5106048703849175,
"grad_norm": 0.0003756559453904629,
"learning_rate": 4.149319193506154e-05,
"loss": 0.0003,
"step": 2600
},
{
"epoch": 0.5204241948153967,
"grad_norm": 3.785878652706742e-05,
"learning_rate": 4.132953652788688e-05,
"loss": 0.0002,
"step": 2650
},
{
"epoch": 0.5302435192458759,
"grad_norm": 0.007607210893183947,
"learning_rate": 4.116588112071223e-05,
"loss": 0.0003,
"step": 2700
},
{
"epoch": 0.540062843676355,
"grad_norm": 0.02198871783912182,
"learning_rate": 4.100222571353758e-05,
"loss": 0.0002,
"step": 2750
},
{
"epoch": 0.5498821681068342,
"grad_norm": 0.00041103907278738916,
"learning_rate": 4.0838570306362923e-05,
"loss": 0.0002,
"step": 2800
},
{
"epoch": 0.5597014925373134,
"grad_norm": 0.01565481722354889,
"learning_rate": 4.067491489918827e-05,
"loss": 0.0003,
"step": 2850
},
{
"epoch": 0.5695208169677927,
"grad_norm": 0.0117128761485219,
"learning_rate": 4.051125949201362e-05,
"loss": 0.0004,
"step": 2900
},
{
"epoch": 0.5793401413982718,
"grad_norm": 0.004789320752024651,
"learning_rate": 4.0347604084838966e-05,
"loss": 0.0004,
"step": 2950
},
{
"epoch": 0.589159465828751,
"grad_norm": 0.0011219610460102558,
"learning_rate": 4.018394867766431e-05,
"loss": 0.0004,
"step": 3000
},
{
"epoch": 0.5989787902592302,
"grad_norm": 0.0518529899418354,
"learning_rate": 4.002029327048966e-05,
"loss": 0.0004,
"step": 3050
},
{
"epoch": 0.6087981146897093,
"grad_norm": 0.01609092392027378,
"learning_rate": 3.9856637863315e-05,
"loss": 0.0003,
"step": 3100
},
{
"epoch": 0.6186174391201885,
"grad_norm": 0.002343968488276005,
"learning_rate": 3.969298245614035e-05,
"loss": 0.0003,
"step": 3150
},
{
"epoch": 0.6284367635506677,
"grad_norm": 0.010258428752422333,
"learning_rate": 3.9529327048965696e-05,
"loss": 0.0003,
"step": 3200
},
{
"epoch": 0.6382560879811469,
"grad_norm": 0.0007253732765093446,
"learning_rate": 3.9365671641791046e-05,
"loss": 0.0004,
"step": 3250
},
{
"epoch": 0.648075412411626,
"grad_norm": 0.01221112348139286,
"learning_rate": 3.9202016234616395e-05,
"loss": 0.0003,
"step": 3300
},
{
"epoch": 0.6578947368421053,
"grad_norm": 0.007547037675976753,
"learning_rate": 3.903836082744174e-05,
"loss": 0.0004,
"step": 3350
},
{
"epoch": 0.6677140612725845,
"grad_norm": 0.0037480357568711042,
"learning_rate": 3.887470542026709e-05,
"loss": 0.0003,
"step": 3400
},
{
"epoch": 0.6775333857030637,
"grad_norm": 9.382007556268945e-05,
"learning_rate": 3.871105001309244e-05,
"loss": 0.0005,
"step": 3450
},
{
"epoch": 0.6873527101335428,
"grad_norm": 0.0019377778517082334,
"learning_rate": 3.854739460591778e-05,
"loss": 0.0003,
"step": 3500
},
{
"epoch": 0.697172034564022,
"grad_norm": 0.00031467623193748295,
"learning_rate": 3.838373919874313e-05,
"loss": 0.0003,
"step": 3550
},
{
"epoch": 0.7069913589945012,
"grad_norm": 0.012330332770943642,
"learning_rate": 3.8220083791568475e-05,
"loss": 0.0004,
"step": 3600
},
{
"epoch": 0.7168106834249803,
"grad_norm": 0.43893539905548096,
"learning_rate": 3.8056428384393825e-05,
"loss": 0.002,
"step": 3650
},
{
"epoch": 0.7266300078554595,
"grad_norm": 0.010738670825958252,
"learning_rate": 3.789277297721917e-05,
"loss": 0.0008,
"step": 3700
},
{
"epoch": 0.7364493322859387,
"grad_norm": 0.0009258920326828957,
"learning_rate": 3.772911757004452e-05,
"loss": 0.0005,
"step": 3750
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.002003765432164073,
"learning_rate": 3.756546216286986e-05,
"loss": 0.0004,
"step": 3800
},
{
"epoch": 0.7560879811468971,
"grad_norm": 0.0003309960593469441,
"learning_rate": 3.740180675569521e-05,
"loss": 0.0004,
"step": 3850
},
{
"epoch": 0.7659073055773763,
"grad_norm": 0.014759145677089691,
"learning_rate": 3.7238151348520554e-05,
"loss": 0.0004,
"step": 3900
},
{
"epoch": 0.7757266300078555,
"grad_norm": 0.00703453179448843,
"learning_rate": 3.7074495941345904e-05,
"loss": 0.0005,
"step": 3950
},
{
"epoch": 0.7855459544383346,
"grad_norm": 0.0093814292922616,
"learning_rate": 3.691084053417125e-05,
"loss": 0.0004,
"step": 4000
},
{
"epoch": 0.7953652788688138,
"grad_norm": 0.00033789846929721534,
"learning_rate": 3.67471851269966e-05,
"loss": 0.0003,
"step": 4050
},
{
"epoch": 0.805184603299293,
"grad_norm": 0.004072342533618212,
"learning_rate": 3.658352971982195e-05,
"loss": 0.0004,
"step": 4100
},
{
"epoch": 0.8150039277297721,
"grad_norm": 0.005905472207814455,
"learning_rate": 3.641987431264729e-05,
"loss": 0.0004,
"step": 4150
},
{
"epoch": 0.8248232521602514,
"grad_norm": 0.008759435266256332,
"learning_rate": 3.625621890547264e-05,
"loss": 0.0004,
"step": 4200
},
{
"epoch": 0.8346425765907306,
"grad_norm": 0.01638154312968254,
"learning_rate": 3.609256349829799e-05,
"loss": 0.0003,
"step": 4250
},
{
"epoch": 0.8444619010212098,
"grad_norm": 0.003001204691827297,
"learning_rate": 3.592890809112333e-05,
"loss": 0.0004,
"step": 4300
},
{
"epoch": 0.8542812254516889,
"grad_norm": 0.0006073117256164551,
"learning_rate": 3.576525268394868e-05,
"loss": 0.0003,
"step": 4350
},
{
"epoch": 0.8641005498821681,
"grad_norm": 0.003654340049251914,
"learning_rate": 3.5601597276774026e-05,
"loss": 0.0005,
"step": 4400
},
{
"epoch": 0.8739198743126473,
"grad_norm": 0.2316829115152359,
"learning_rate": 3.5437941869599376e-05,
"loss": 0.0005,
"step": 4450
},
{
"epoch": 0.8837391987431265,
"grad_norm": 0.027139848098158836,
"learning_rate": 3.527428646242472e-05,
"loss": 0.0005,
"step": 4500
},
{
"epoch": 0.8935585231736056,
"grad_norm": 0.004498090595006943,
"learning_rate": 3.511063105525006e-05,
"loss": 0.0004,
"step": 4550
},
{
"epoch": 0.9033778476040848,
"grad_norm": 0.010397707112133503,
"learning_rate": 3.494697564807541e-05,
"loss": 0.0003,
"step": 4600
},
{
"epoch": 0.9131971720345641,
"grad_norm": 0.00014255594578571618,
"learning_rate": 3.478332024090076e-05,
"loss": 0.0003,
"step": 4650
},
{
"epoch": 0.9230164964650432,
"grad_norm": 0.0028459234163165092,
"learning_rate": 3.4619664833726105e-05,
"loss": 0.0003,
"step": 4700
},
{
"epoch": 0.9328358208955224,
"grad_norm": 0.0030736555345356464,
"learning_rate": 3.4456009426551455e-05,
"loss": 0.0002,
"step": 4750
},
{
"epoch": 0.9426551453260016,
"grad_norm": 0.0026709907688200474,
"learning_rate": 3.4292354019376805e-05,
"loss": 0.0003,
"step": 4800
},
{
"epoch": 0.9524744697564808,
"grad_norm": 0.00022516479657497257,
"learning_rate": 3.412869861220215e-05,
"loss": 0.0003,
"step": 4850
},
{
"epoch": 0.9622937941869599,
"grad_norm": 2.6079122108058073e-05,
"learning_rate": 3.39650432050275e-05,
"loss": 0.0002,
"step": 4900
},
{
"epoch": 0.9721131186174391,
"grad_norm": 0.0010724954772740602,
"learning_rate": 3.380138779785284e-05,
"loss": 0.0003,
"step": 4950
},
{
"epoch": 0.9819324430479183,
"grad_norm": 0.00028140624635852873,
"learning_rate": 3.363773239067819e-05,
"loss": 0.0004,
"step": 5000
},
{
"epoch": 0.9917517674783974,
"grad_norm": 0.002564377384260297,
"learning_rate": 3.347407698350354e-05,
"loss": 0.0003,
"step": 5050
},
{
"epoch": 1.0015710919088767,
"grad_norm": 0.0027151070535182953,
"learning_rate": 3.3310421576328884e-05,
"loss": 0.0002,
"step": 5100
},
{
"epoch": 1.011390416339356,
"grad_norm": 0.0002080993290292099,
"learning_rate": 3.3146766169154234e-05,
"loss": 0.0003,
"step": 5150
},
{
"epoch": 1.021209740769835,
"grad_norm": 3.149580516037531e-05,
"learning_rate": 3.298311076197958e-05,
"loss": 0.0003,
"step": 5200
},
{
"epoch": 1.0310290652003142,
"grad_norm": 0.005241520702838898,
"learning_rate": 3.281945535480492e-05,
"loss": 0.0003,
"step": 5250
},
{
"epoch": 1.0408483896307934,
"grad_norm": 0.005873743910342455,
"learning_rate": 3.265579994763027e-05,
"loss": 0.0002,
"step": 5300
},
{
"epoch": 1.0506677140612726,
"grad_norm": 0.006296052597463131,
"learning_rate": 3.2492144540455613e-05,
"loss": 0.0003,
"step": 5350
},
{
"epoch": 1.0604870384917517,
"grad_norm": 0.00039794211625121534,
"learning_rate": 3.232848913328096e-05,
"loss": 0.0004,
"step": 5400
},
{
"epoch": 1.070306362922231,
"grad_norm": 0.0039017247036099434,
"learning_rate": 3.216483372610631e-05,
"loss": 0.0003,
"step": 5450
},
{
"epoch": 1.08012568735271,
"grad_norm": 0.002064199186861515,
"learning_rate": 3.2001178318931656e-05,
"loss": 0.0003,
"step": 5500
},
{
"epoch": 1.0899450117831893,
"grad_norm": 0.0002229887613793835,
"learning_rate": 3.1837522911757006e-05,
"loss": 0.0003,
"step": 5550
},
{
"epoch": 1.0997643362136684,
"grad_norm": 3.4759577829390764e-05,
"learning_rate": 3.1673867504582356e-05,
"loss": 0.0004,
"step": 5600
},
{
"epoch": 1.1095836606441476,
"grad_norm": 2.991587643919047e-05,
"learning_rate": 3.15102120974077e-05,
"loss": 0.0002,
"step": 5650
},
{
"epoch": 1.1194029850746268,
"grad_norm": 0.0017788242548704147,
"learning_rate": 3.134655669023305e-05,
"loss": 0.0003,
"step": 5700
},
{
"epoch": 1.129222309505106,
"grad_norm": 0.008592754602432251,
"learning_rate": 3.118290128305839e-05,
"loss": 0.0003,
"step": 5750
},
{
"epoch": 1.139041633935585,
"grad_norm": 0.0017567313043400645,
"learning_rate": 3.101924587588374e-05,
"loss": 0.0003,
"step": 5800
},
{
"epoch": 1.1488609583660645,
"grad_norm": 8.339332271134481e-05,
"learning_rate": 3.085559046870909e-05,
"loss": 0.0002,
"step": 5850
},
{
"epoch": 1.1586802827965437,
"grad_norm": 0.00681919464841485,
"learning_rate": 3.0691935061534435e-05,
"loss": 0.0003,
"step": 5900
},
{
"epoch": 1.1684996072270228,
"grad_norm": 1.7358417608193122e-05,
"learning_rate": 3.052827965435978e-05,
"loss": 0.0002,
"step": 5950
},
{
"epoch": 1.178318931657502,
"grad_norm": 0.005276073236018419,
"learning_rate": 3.0364624247185132e-05,
"loss": 0.0003,
"step": 6000
},
{
"epoch": 1.1881382560879812,
"grad_norm": 1.6649060853524134e-05,
"learning_rate": 3.0200968840010475e-05,
"loss": 0.0003,
"step": 6050
},
{
"epoch": 1.1979575805184604,
"grad_norm": 1.6310365026583895e-05,
"learning_rate": 3.003731343283582e-05,
"loss": 0.0001,
"step": 6100
},
{
"epoch": 1.2077769049489395,
"grad_norm": 8.225607598433271e-05,
"learning_rate": 2.987365802566117e-05,
"loss": 0.0003,
"step": 6150
},
{
"epoch": 1.2175962293794187,
"grad_norm": 0.06942308694124222,
"learning_rate": 2.9710002618486515e-05,
"loss": 0.0006,
"step": 6200
},
{
"epoch": 1.2274155538098979,
"grad_norm": 0.0003915784473065287,
"learning_rate": 2.9546347211311865e-05,
"loss": 0.0008,
"step": 6250
},
{
"epoch": 1.237234878240377,
"grad_norm": 5.1170645747333765e-05,
"learning_rate": 2.9382691804137208e-05,
"loss": 0.0004,
"step": 6300
},
{
"epoch": 1.2470542026708562,
"grad_norm": 0.005075642839074135,
"learning_rate": 2.9219036396962558e-05,
"loss": 0.0006,
"step": 6350
},
{
"epoch": 1.2568735271013354,
"grad_norm": 0.013923396356403828,
"learning_rate": 2.9055380989787908e-05,
"loss": 0.0003,
"step": 6400
},
{
"epoch": 1.2666928515318145,
"grad_norm": 0.010688439942896366,
"learning_rate": 2.889172558261325e-05,
"loss": 0.0004,
"step": 6450
},
{
"epoch": 1.2765121759622937,
"grad_norm": 0.005004653707146645,
"learning_rate": 2.8728070175438597e-05,
"loss": 0.0002,
"step": 6500
},
{
"epoch": 1.286331500392773,
"grad_norm": 0.015412525273859501,
"learning_rate": 2.8564414768263947e-05,
"loss": 0.0003,
"step": 6550
},
{
"epoch": 1.2961508248232523,
"grad_norm": 0.002696032403036952,
"learning_rate": 2.840075936108929e-05,
"loss": 0.0002,
"step": 6600
},
{
"epoch": 1.3059701492537314,
"grad_norm": 0.0005274811992421746,
"learning_rate": 2.823710395391464e-05,
"loss": 0.0003,
"step": 6650
},
{
"epoch": 1.3157894736842106,
"grad_norm": 2.0785410015378147e-05,
"learning_rate": 2.8073448546739983e-05,
"loss": 0.0003,
"step": 6700
},
{
"epoch": 1.3256087981146898,
"grad_norm": 0.0011626353953033686,
"learning_rate": 2.7909793139565333e-05,
"loss": 0.0001,
"step": 6750
},
{
"epoch": 1.335428122545169,
"grad_norm": 0.0003097439184784889,
"learning_rate": 2.774613773239068e-05,
"loss": 0.0002,
"step": 6800
},
{
"epoch": 1.3452474469756481,
"grad_norm": 5.218560909270309e-05,
"learning_rate": 2.7582482325216026e-05,
"loss": 0.0002,
"step": 6850
},
{
"epoch": 1.3550667714061273,
"grad_norm": 4.878486288362183e-05,
"learning_rate": 2.7418826918041373e-05,
"loss": 0.0002,
"step": 6900
},
{
"epoch": 1.3648860958366065,
"grad_norm": 1.3592688446806278e-05,
"learning_rate": 2.7255171510866723e-05,
"loss": 0.0003,
"step": 6950
},
{
"epoch": 1.3747054202670856,
"grad_norm": 0.0010299599962309003,
"learning_rate": 2.7091516103692066e-05,
"loss": 0.0003,
"step": 7000
},
{
"epoch": 1.3845247446975648,
"grad_norm": 0.005047979764640331,
"learning_rate": 2.6927860696517416e-05,
"loss": 0.0003,
"step": 7050
},
{
"epoch": 1.394344069128044,
"grad_norm": 0.00903361290693283,
"learning_rate": 2.676420528934276e-05,
"loss": 0.0005,
"step": 7100
},
{
"epoch": 1.4041633935585232,
"grad_norm": 0.00216244556941092,
"learning_rate": 2.660054988216811e-05,
"loss": 0.0004,
"step": 7150
},
{
"epoch": 1.4139827179890023,
"grad_norm": 0.008215115405619144,
"learning_rate": 2.6436894474993455e-05,
"loss": 0.0003,
"step": 7200
},
{
"epoch": 1.4238020424194815,
"grad_norm": 0.00024291670706588775,
"learning_rate": 2.62732390678188e-05,
"loss": 0.0003,
"step": 7250
},
{
"epoch": 1.4336213668499607,
"grad_norm": 0.004875461105257273,
"learning_rate": 2.610958366064415e-05,
"loss": 0.0003,
"step": 7300
},
{
"epoch": 1.4434406912804398,
"grad_norm": 0.000312354473862797,
"learning_rate": 2.59459282534695e-05,
"loss": 0.0003,
"step": 7350
},
{
"epoch": 1.453260015710919,
"grad_norm": 8.923052519094199e-05,
"learning_rate": 2.578227284629484e-05,
"loss": 0.0004,
"step": 7400
},
{
"epoch": 1.4630793401413982,
"grad_norm": 0.008472305722534657,
"learning_rate": 2.561861743912019e-05,
"loss": 0.0003,
"step": 7450
},
{
"epoch": 1.4728986645718773,
"grad_norm": 4.937009362038225e-05,
"learning_rate": 2.5454962031945535e-05,
"loss": 0.0002,
"step": 7500
},
{
"epoch": 1.4827179890023565,
"grad_norm": 0.0038520190864801407,
"learning_rate": 2.5291306624770885e-05,
"loss": 0.0003,
"step": 7550
},
{
"epoch": 1.4925373134328357,
"grad_norm": 0.002998525742441416,
"learning_rate": 2.512765121759623e-05,
"loss": 0.0003,
"step": 7600
},
{
"epoch": 1.5023566378633149,
"grad_norm": 0.0005952705978415906,
"learning_rate": 2.4963995810421578e-05,
"loss": 0.0003,
"step": 7650
},
{
"epoch": 1.512175962293794,
"grad_norm": 2.007077455346007e-05,
"learning_rate": 2.4800340403246924e-05,
"loss": 0.0002,
"step": 7700
},
{
"epoch": 1.5219952867242734,
"grad_norm": 0.002962449798360467,
"learning_rate": 2.463668499607227e-05,
"loss": 0.0003,
"step": 7750
},
{
"epoch": 1.5318146111547526,
"grad_norm": 3.4512224374338984e-05,
"learning_rate": 2.447302958889762e-05,
"loss": 0.0002,
"step": 7800
},
{
"epoch": 1.5416339355852318,
"grad_norm": 5.3643165301764384e-05,
"learning_rate": 2.4309374181722967e-05,
"loss": 0.0002,
"step": 7850
},
{
"epoch": 1.551453260015711,
"grad_norm": 4.536865890258923e-05,
"learning_rate": 2.4145718774548314e-05,
"loss": 0.0003,
"step": 7900
},
{
"epoch": 1.56127258444619,
"grad_norm": 0.0017064092680811882,
"learning_rate": 2.3982063367373657e-05,
"loss": 0.0003,
"step": 7950
},
{
"epoch": 1.5710919088766693,
"grad_norm": 0.0028170356526970863,
"learning_rate": 2.3818407960199007e-05,
"loss": 0.0003,
"step": 8000
},
{
"epoch": 1.5809112333071484,
"grad_norm": 6.762581324437633e-05,
"learning_rate": 2.3654752553024353e-05,
"loss": 0.0003,
"step": 8050
},
{
"epoch": 1.5907305577376276,
"grad_norm": 5.572327063418925e-05,
"learning_rate": 2.34910971458497e-05,
"loss": 0.0001,
"step": 8100
},
{
"epoch": 1.6005498821681068,
"grad_norm": 4.8497397074243054e-05,
"learning_rate": 2.3327441738675046e-05,
"loss": 0.0003,
"step": 8150
},
{
"epoch": 1.6103692065985862,
"grad_norm": 0.000499077548738569,
"learning_rate": 2.3163786331500396e-05,
"loss": 0.0003,
"step": 8200
},
{
"epoch": 1.6201885310290653,
"grad_norm": 0.0007215180085040629,
"learning_rate": 2.3000130924325743e-05,
"loss": 0.0003,
"step": 8250
},
{
"epoch": 1.6300078554595445,
"grad_norm": 0.00015634812007192522,
"learning_rate": 2.2836475517151086e-05,
"loss": 0.0002,
"step": 8300
},
{
"epoch": 1.6398271798900237,
"grad_norm": 0.010850101709365845,
"learning_rate": 2.2672820109976432e-05,
"loss": 0.0002,
"step": 8350
},
{
"epoch": 1.6496465043205029,
"grad_norm": 0.0009706264827400446,
"learning_rate": 2.2509164702801782e-05,
"loss": 0.0004,
"step": 8400
},
{
"epoch": 1.659465828750982,
"grad_norm": 3.204784661647864e-05,
"learning_rate": 2.234550929562713e-05,
"loss": 0.0003,
"step": 8450
},
{
"epoch": 1.6692851531814612,
"grad_norm": 0.009143730625510216,
"learning_rate": 2.2181853888452475e-05,
"loss": 0.0003,
"step": 8500
},
{
"epoch": 1.6791044776119404,
"grad_norm": 0.005089063663035631,
"learning_rate": 2.2018198481277822e-05,
"loss": 0.0004,
"step": 8550
},
{
"epoch": 1.6889238020424195,
"grad_norm": 0.0249613169580698,
"learning_rate": 2.1854543074103172e-05,
"loss": 0.0003,
"step": 8600
},
{
"epoch": 1.6987431264728987,
"grad_norm": 0.000527512573171407,
"learning_rate": 2.1690887666928515e-05,
"loss": 0.0003,
"step": 8650
},
{
"epoch": 1.7085624509033779,
"grad_norm": 0.0011643558973446488,
"learning_rate": 2.152723225975386e-05,
"loss": 0.0001,
"step": 8700
},
{
"epoch": 1.718381775333857,
"grad_norm": 0.0030941637232899666,
"learning_rate": 2.1363576852579208e-05,
"loss": 0.0002,
"step": 8750
},
{
"epoch": 1.7282010997643362,
"grad_norm": 2.4613122150185518e-05,
"learning_rate": 2.1199921445404558e-05,
"loss": 0.0002,
"step": 8800
},
{
"epoch": 1.7380204241948154,
"grad_norm": 0.009969648905098438,
"learning_rate": 2.1036266038229905e-05,
"loss": 0.0003,
"step": 8850
},
{
"epoch": 1.7478397486252946,
"grad_norm": 0.0028629146981984377,
"learning_rate": 2.087261063105525e-05,
"loss": 0.0003,
"step": 8900
},
{
"epoch": 1.7576590730557737,
"grad_norm": 0.005532170180231333,
"learning_rate": 2.0708955223880598e-05,
"loss": 0.0002,
"step": 8950
},
{
"epoch": 1.767478397486253,
"grad_norm": 0.0001023332224576734,
"learning_rate": 2.0545299816705944e-05,
"loss": 0.0004,
"step": 9000
},
{
"epoch": 1.777297721916732,
"grad_norm": 0.0016099640633910894,
"learning_rate": 2.038164440953129e-05,
"loss": 0.0002,
"step": 9050
},
{
"epoch": 1.7871170463472112,
"grad_norm": 0.00755878584459424,
"learning_rate": 2.0217989002356637e-05,
"loss": 0.0003,
"step": 9100
},
{
"epoch": 1.7969363707776904,
"grad_norm": 0.00131377880461514,
"learning_rate": 2.0054333595181984e-05,
"loss": 0.0003,
"step": 9150
},
{
"epoch": 1.8067556952081696,
"grad_norm": 0.0066925594583153725,
"learning_rate": 1.9890678188007334e-05,
"loss": 0.0003,
"step": 9200
},
{
"epoch": 1.8165750196386488,
"grad_norm": 0.0016435191500931978,
"learning_rate": 1.972702278083268e-05,
"loss": 0.0004,
"step": 9250
},
{
"epoch": 1.826394344069128,
"grad_norm": 0.0049284519627690315,
"learning_rate": 1.9563367373658027e-05,
"loss": 0.0003,
"step": 9300
},
{
"epoch": 1.836213668499607,
"grad_norm": 0.007083178497850895,
"learning_rate": 1.9399711966483373e-05,
"loss": 0.0002,
"step": 9350
},
{
"epoch": 1.8460329929300863,
"grad_norm": 0.003799445927143097,
"learning_rate": 1.923605655930872e-05,
"loss": 0.0002,
"step": 9400
},
{
"epoch": 1.8558523173605654,
"grad_norm": 8.943451575760264e-06,
"learning_rate": 1.9072401152134066e-05,
"loss": 0.0002,
"step": 9450
},
{
"epoch": 1.8656716417910446,
"grad_norm": 4.004701168014435e-06,
"learning_rate": 1.8908745744959413e-05,
"loss": 0.0001,
"step": 9500
},
{
"epoch": 1.875490966221524,
"grad_norm": 0.0016747256740927696,
"learning_rate": 1.8745090337784763e-05,
"loss": 0.0003,
"step": 9550
},
{
"epoch": 1.8853102906520032,
"grad_norm": 0.00582944555208087,
"learning_rate": 1.858143493061011e-05,
"loss": 0.0003,
"step": 9600
},
{
"epoch": 1.8951296150824823,
"grad_norm": 5.8452515077078715e-06,
"learning_rate": 1.8417779523435456e-05,
"loss": 0.0002,
"step": 9650
},
{
"epoch": 1.9049489395129615,
"grad_norm": 2.22074459088617e-06,
"learning_rate": 1.8254124116260802e-05,
"loss": 0.0003,
"step": 9700
},
{
"epoch": 1.9147682639434407,
"grad_norm": 0.0001783396874088794,
"learning_rate": 1.809046870908615e-05,
"loss": 0.0002,
"step": 9750
},
{
"epoch": 1.9245875883739199,
"grad_norm": 9.459259308641776e-05,
"learning_rate": 1.7926813301911495e-05,
"loss": 0.0003,
"step": 9800
},
{
"epoch": 1.934406912804399,
"grad_norm": 0.001501628546975553,
"learning_rate": 1.7763157894736842e-05,
"loss": 0.0002,
"step": 9850
},
{
"epoch": 1.9442262372348782,
"grad_norm": 0.008209704421460629,
"learning_rate": 1.759950248756219e-05,
"loss": 0.0002,
"step": 9900
},
{
"epoch": 1.9540455616653576,
"grad_norm": 2.8594949981197715e-05,
"learning_rate": 1.743584708038754e-05,
"loss": 0.0003,
"step": 9950
},
{
"epoch": 1.9638648860958368,
"grad_norm": 0.0010227253660559654,
"learning_rate": 1.7272191673212885e-05,
"loss": 0.0002,
"step": 10000
},
{
"epoch": 1.973684210526316,
"grad_norm": 1.06588067865232e-05,
"learning_rate": 1.710853626603823e-05,
"loss": 0.0003,
"step": 10050
},
{
"epoch": 1.983503534956795,
"grad_norm": 0.0035111424513161182,
"learning_rate": 1.6944880858863578e-05,
"loss": 0.0003,
"step": 10100
},
{
"epoch": 1.9933228593872743,
"grad_norm": 7.919372001197189e-05,
"learning_rate": 1.6781225451688925e-05,
"loss": 0.0002,
"step": 10150
},
{
"epoch": 2.0031421838177534,
"grad_norm": 0.00608315784484148,
"learning_rate": 1.661757004451427e-05,
"loss": 0.0002,
"step": 10200
},
{
"epoch": 2.0129615082482326,
"grad_norm": 6.566229330928763e-06,
"learning_rate": 1.6453914637339618e-05,
"loss": 0.0002,
"step": 10250
},
{
"epoch": 2.022780832678712,
"grad_norm": 2.852589432222885e-06,
"learning_rate": 1.6290259230164964e-05,
"loss": 0.0001,
"step": 10300
},
{
"epoch": 2.032600157109191,
"grad_norm": 2.3466156562790275e-05,
"learning_rate": 1.6126603822990314e-05,
"loss": 0.0002,
"step": 10350
},
{
"epoch": 2.04241948153967,
"grad_norm": 0.006443875841796398,
"learning_rate": 1.596294841581566e-05,
"loss": 0.0004,
"step": 10400
},
{
"epoch": 2.0522388059701493,
"grad_norm": 0.0006049483199603856,
"learning_rate": 1.5799293008641007e-05,
"loss": 0.0002,
"step": 10450
},
{
"epoch": 2.0620581304006285,
"grad_norm": 0.0022505486849695444,
"learning_rate": 1.563563760146635e-05,
"loss": 0.0002,
"step": 10500
},
{
"epoch": 2.0718774548311076,
"grad_norm": 4.880329652223736e-05,
"learning_rate": 1.54719821942917e-05,
"loss": 0.0002,
"step": 10550
},
{
"epoch": 2.081696779261587,
"grad_norm": 0.006252319552004337,
"learning_rate": 1.5308326787117047e-05,
"loss": 0.0003,
"step": 10600
},
{
"epoch": 2.091516103692066,
"grad_norm": 0.0014218160649761558,
"learning_rate": 1.5144671379942393e-05,
"loss": 0.0004,
"step": 10650
},
{
"epoch": 2.101335428122545,
"grad_norm": 1.5717498172307387e-05,
"learning_rate": 1.498101597276774e-05,
"loss": 0.0003,
"step": 10700
},
{
"epoch": 2.1111547525530243,
"grad_norm": 2.7057717488787603e-06,
"learning_rate": 1.4817360565593088e-05,
"loss": 0.0003,
"step": 10750
},
{
"epoch": 2.1209740769835035,
"grad_norm": 0.003399658016860485,
"learning_rate": 1.4653705158418435e-05,
"loss": 0.0002,
"step": 10800
},
{
"epoch": 2.1307934014139827,
"grad_norm": 0.0001567010476719588,
"learning_rate": 1.4490049751243781e-05,
"loss": 0.0002,
"step": 10850
},
{
"epoch": 2.140612725844462,
"grad_norm": 1.0206712431681808e-05,
"learning_rate": 1.432639434406913e-05,
"loss": 0.0002,
"step": 10900
},
{
"epoch": 2.150432050274941,
"grad_norm": 0.0023415617179125547,
"learning_rate": 1.4162738936894476e-05,
"loss": 0.0003,
"step": 10950
},
{
"epoch": 2.16025137470542,
"grad_norm": 9.877283446257934e-05,
"learning_rate": 1.3999083529719822e-05,
"loss": 0.0002,
"step": 11000
},
{
"epoch": 2.1700706991358993,
"grad_norm": 0.0009852441726252437,
"learning_rate": 1.3835428122545169e-05,
"loss": 0.0001,
"step": 11050
},
{
"epoch": 2.1798900235663785,
"grad_norm": 2.5751623979886062e-05,
"learning_rate": 1.3671772715370517e-05,
"loss": 0.0002,
"step": 11100
},
{
"epoch": 2.1897093479968577,
"grad_norm": 0.00046911800745874643,
"learning_rate": 1.3508117308195864e-05,
"loss": 0.0003,
"step": 11150
},
{
"epoch": 2.199528672427337,
"grad_norm": 2.548624252085574e-05,
"learning_rate": 1.334446190102121e-05,
"loss": 0.0003,
"step": 11200
},
{
"epoch": 2.209347996857816,
"grad_norm": 0.00012927035277243704,
"learning_rate": 1.3180806493846557e-05,
"loss": 0.0002,
"step": 11250
},
{
"epoch": 2.219167321288295,
"grad_norm": 5.643380973197054e-06,
"learning_rate": 1.3017151086671905e-05,
"loss": 0.0002,
"step": 11300
},
{
"epoch": 2.2289866457187744,
"grad_norm": 0.00517466152086854,
"learning_rate": 1.2853495679497251e-05,
"loss": 0.0003,
"step": 11350
},
{
"epoch": 2.2388059701492535,
"grad_norm": 0.0007839313475415111,
"learning_rate": 1.2689840272322598e-05,
"loss": 0.0002,
"step": 11400
},
{
"epoch": 2.2486252945797327,
"grad_norm": 0.003385524032637477,
"learning_rate": 1.2526184865147945e-05,
"loss": 0.0002,
"step": 11450
},
{
"epoch": 2.258444619010212,
"grad_norm": 0.006902114022523165,
"learning_rate": 1.2362529457973291e-05,
"loss": 0.0002,
"step": 11500
},
{
"epoch": 2.268263943440691,
"grad_norm": 0.002237598644569516,
"learning_rate": 1.219887405079864e-05,
"loss": 0.0002,
"step": 11550
},
{
"epoch": 2.27808326787117,
"grad_norm": 2.9005691430938896e-06,
"learning_rate": 1.2035218643623986e-05,
"loss": 0.0002,
"step": 11600
},
{
"epoch": 2.28790259230165,
"grad_norm": 0.0008452658075839281,
"learning_rate": 1.1871563236449332e-05,
"loss": 0.0002,
"step": 11650
},
{
"epoch": 2.297721916732129,
"grad_norm": 0.0014033624902367592,
"learning_rate": 1.1707907829274679e-05,
"loss": 0.0002,
"step": 11700
},
{
"epoch": 2.307541241162608,
"grad_norm": 0.001146289287135005,
"learning_rate": 1.1544252422100027e-05,
"loss": 0.0002,
"step": 11750
},
{
"epoch": 2.3173605655930873,
"grad_norm": 6.133544957265258e-06,
"learning_rate": 1.1380597014925374e-05,
"loss": 0.0001,
"step": 11800
},
{
"epoch": 2.3271798900235665,
"grad_norm": 0.0015589894028380513,
"learning_rate": 1.121694160775072e-05,
"loss": 0.0004,
"step": 11850
},
{
"epoch": 2.3369992144540457,
"grad_norm": 0.00035342929186299443,
"learning_rate": 1.1053286200576067e-05,
"loss": 0.0002,
"step": 11900
},
{
"epoch": 2.346818538884525,
"grad_norm": 1.368164703308139e-05,
"learning_rate": 1.0889630793401415e-05,
"loss": 0.0003,
"step": 11950
},
{
"epoch": 2.356637863315004,
"grad_norm": 0.0021006593015044928,
"learning_rate": 1.0725975386226761e-05,
"loss": 0.0002,
"step": 12000
},
{
"epoch": 2.366457187745483,
"grad_norm": 7.3638998401293065e-06,
"learning_rate": 1.0562319979052108e-05,
"loss": 0.0002,
"step": 12050
},
{
"epoch": 2.3762765121759624,
"grad_norm": 0.006797213107347488,
"learning_rate": 1.0398664571877455e-05,
"loss": 0.0002,
"step": 12100
},
{
"epoch": 2.3860958366064415,
"grad_norm": 0.002575602615252137,
"learning_rate": 1.0235009164702803e-05,
"loss": 0.0004,
"step": 12150
},
{
"epoch": 2.3959151610369207,
"grad_norm": 0.0026493787299841642,
"learning_rate": 1.007135375752815e-05,
"loss": 0.0003,
"step": 12200
},
{
"epoch": 2.4057344854674,
"grad_norm": 5.164716185390716e-06,
"learning_rate": 9.907698350353496e-06,
"loss": 0.0003,
"step": 12250
},
{
"epoch": 2.415553809897879,
"grad_norm": 0.0033771705348044634,
"learning_rate": 9.744042943178842e-06,
"loss": 0.0003,
"step": 12300
},
{
"epoch": 2.425373134328358,
"grad_norm": 3.062731593672652e-06,
"learning_rate": 9.58038753600419e-06,
"loss": 0.0003,
"step": 12350
},
{
"epoch": 2.4351924587588374,
"grad_norm": 0.002900635125115514,
"learning_rate": 9.416732128829537e-06,
"loss": 0.0002,
"step": 12400
},
{
"epoch": 2.4450117831893166,
"grad_norm": 0.005432957783341408,
"learning_rate": 9.253076721654884e-06,
"loss": 0.0003,
"step": 12450
},
{
"epoch": 2.4548311076197957,
"grad_norm": 0.0060834819450974464,
"learning_rate": 9.089421314480232e-06,
"loss": 0.0002,
"step": 12500
},
{
"epoch": 2.464650432050275,
"grad_norm": 0.0003888920182362199,
"learning_rate": 8.925765907305578e-06,
"loss": 0.0002,
"step": 12550
},
{
"epoch": 2.474469756480754,
"grad_norm": 0.0008291418780572712,
"learning_rate": 8.762110500130925e-06,
"loss": 0.0002,
"step": 12600
},
{
"epoch": 2.4842890809112332,
"grad_norm": 4.13081716033048e-06,
"learning_rate": 8.598455092956271e-06,
"loss": 0.0002,
"step": 12650
},
{
"epoch": 2.4941084053417124,
"grad_norm": 0.0027352613396942616,
"learning_rate": 8.43479968578162e-06,
"loss": 0.0002,
"step": 12700
},
{
"epoch": 2.5039277297721916,
"grad_norm": 0.004523648414760828,
"learning_rate": 8.271144278606966e-06,
"loss": 0.0003,
"step": 12750
},
{
"epoch": 2.5137470542026707,
"grad_norm": 0.0014511903282254934,
"learning_rate": 8.107488871432313e-06,
"loss": 0.0002,
"step": 12800
},
{
"epoch": 2.52356637863315,
"grad_norm": 0.001870101667009294,
"learning_rate": 7.94383346425766e-06,
"loss": 0.0003,
"step": 12850
},
{
"epoch": 2.533385703063629,
"grad_norm": 1.8049751133730751e-06,
"learning_rate": 7.780178057083008e-06,
"loss": 0.0002,
"step": 12900
},
{
"epoch": 2.5432050274941083,
"grad_norm": 2.639750891830772e-05,
"learning_rate": 7.616522649908353e-06,
"loss": 0.0002,
"step": 12950
},
{
"epoch": 2.5530243519245874,
"grad_norm": 0.004915285389870405,
"learning_rate": 7.452867242733701e-06,
"loss": 0.0003,
"step": 13000
},
{
"epoch": 2.562843676355067,
"grad_norm": 0.0013427536468952894,
"learning_rate": 7.289211835559047e-06,
"loss": 0.0002,
"step": 13050
},
{
"epoch": 2.572663000785546,
"grad_norm": 0.006909032352268696,
"learning_rate": 7.1255564283843945e-06,
"loss": 0.0002,
"step": 13100
},
{
"epoch": 2.5824823252160254,
"grad_norm": 0.004452712833881378,
"learning_rate": 6.961901021209741e-06,
"loss": 0.0002,
"step": 13150
},
{
"epoch": 2.5923016496465046,
"grad_norm": 1.5004067108748131e-06,
"learning_rate": 6.798245614035088e-06,
"loss": 0.0002,
"step": 13200
},
{
"epoch": 2.6021209740769837,
"grad_norm": 9.272382158087566e-06,
"learning_rate": 6.634590206860435e-06,
"loss": 0.0002,
"step": 13250
},
{
"epoch": 2.611940298507463,
"grad_norm": 0.004868045449256897,
"learning_rate": 6.470934799685782e-06,
"loss": 0.0002,
"step": 13300
},
{
"epoch": 2.621759622937942,
"grad_norm": 6.646020665357355e-06,
"learning_rate": 6.307279392511129e-06,
"loss": 0.0002,
"step": 13350
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.0015008870977908373,
"learning_rate": 6.143623985336476e-06,
"loss": 0.0003,
"step": 13400
},
{
"epoch": 2.6413982717989004,
"grad_norm": 0.0031040972098708153,
"learning_rate": 5.979968578161823e-06,
"loss": 0.0003,
"step": 13450
},
{
"epoch": 2.6512175962293796,
"grad_norm": 0.0015553946141153574,
"learning_rate": 5.81631317098717e-06,
"loss": 0.0003,
"step": 13500
},
{
"epoch": 2.6610369206598588,
"grad_norm": 0.0015114744892343879,
"learning_rate": 5.652657763812517e-06,
"loss": 0.0002,
"step": 13550
},
{
"epoch": 2.670856245090338,
"grad_norm": 0.006511743646115065,
"learning_rate": 5.489002356637864e-06,
"loss": 0.0002,
"step": 13600
},
{
"epoch": 2.680675569520817,
"grad_norm": 3.6607066249416675e-06,
"learning_rate": 5.3253469494632106e-06,
"loss": 0.0002,
"step": 13650
},
{
"epoch": 2.6904948939512963,
"grad_norm": 0.0008585217874497175,
"learning_rate": 5.161691542288558e-06,
"loss": 0.0003,
"step": 13700
},
{
"epoch": 2.7003142183817754,
"grad_norm": 0.0005489959730766714,
"learning_rate": 4.9980361351139045e-06,
"loss": 0.0003,
"step": 13750
},
{
"epoch": 2.7101335428122546,
"grad_norm": 3.3158432870550314e-06,
"learning_rate": 4.834380727939251e-06,
"loss": 0.0002,
"step": 13800
},
{
"epoch": 2.7199528672427338,
"grad_norm": 0.0030354801565408707,
"learning_rate": 4.670725320764598e-06,
"loss": 0.0003,
"step": 13850
},
{
"epoch": 2.729772191673213,
"grad_norm": 3.2985217330860905e-06,
"learning_rate": 4.507069913589945e-06,
"loss": 0.0002,
"step": 13900
},
{
"epoch": 2.739591516103692,
"grad_norm": 7.288464257726446e-05,
"learning_rate": 4.343414506415292e-06,
"loss": 0.0002,
"step": 13950
},
{
"epoch": 2.7494108405341713,
"grad_norm": 0.0043890466913580894,
"learning_rate": 4.179759099240639e-06,
"loss": 0.0003,
"step": 14000
},
{
"epoch": 2.7592301649646505,
"grad_norm": 0.0019504046067595482,
"learning_rate": 4.016103692065986e-06,
"loss": 0.0002,
"step": 14050
},
{
"epoch": 2.7690494893951296,
"grad_norm": 0.0015608868561685085,
"learning_rate": 3.852448284891333e-06,
"loss": 0.0003,
"step": 14100
},
{
"epoch": 2.778868813825609,
"grad_norm": 0.002886673668399453,
"learning_rate": 3.6887928777166797e-06,
"loss": 0.0002,
"step": 14150
},
{
"epoch": 2.788688138256088,
"grad_norm": 0.0009562448249198496,
"learning_rate": 3.5251374705420267e-06,
"loss": 0.0003,
"step": 14200
},
{
"epoch": 2.798507462686567,
"grad_norm": 2.2277029074757593e-06,
"learning_rate": 3.3614820633673736e-06,
"loss": 0.0003,
"step": 14250
},
{
"epoch": 2.8083267871170463,
"grad_norm": 0.0023437589406967163,
"learning_rate": 3.1978266561927206e-06,
"loss": 0.0002,
"step": 14300
},
{
"epoch": 2.8181461115475255,
"grad_norm": 1.8503330920793815e-06,
"learning_rate": 3.034171249018068e-06,
"loss": 0.0002,
"step": 14350
},
{
"epoch": 2.8279654359780046,
"grad_norm": 4.966601863998221e-06,
"learning_rate": 2.870515841843415e-06,
"loss": 0.0002,
"step": 14400
},
{
"epoch": 2.837784760408484,
"grad_norm": 1.3409814982878743e-06,
"learning_rate": 2.706860434668762e-06,
"loss": 0.0002,
"step": 14450
},
{
"epoch": 2.847604084838963,
"grad_norm": 6.203641532920301e-05,
"learning_rate": 2.543205027494109e-06,
"loss": 0.0002,
"step": 14500
},
{
"epoch": 2.857423409269442,
"grad_norm": 0.0002616413403302431,
"learning_rate": 2.3795496203194553e-06,
"loss": 0.0003,
"step": 14550
},
{
"epoch": 2.8672427336999213,
"grad_norm": 6.303464488155441e-07,
"learning_rate": 2.2158942131448023e-06,
"loss": 0.0002,
"step": 14600
},
{
"epoch": 2.8770620581304005,
"grad_norm": 0.0017262320034205914,
"learning_rate": 2.0522388059701493e-06,
"loss": 0.0003,
"step": 14650
},
{
"epoch": 2.8868813825608797,
"grad_norm": 0.0033360267989337444,
"learning_rate": 1.8885833987954962e-06,
"loss": 0.0003,
"step": 14700
},
{
"epoch": 2.896700706991359,
"grad_norm": 2.7147841592523037e-06,
"learning_rate": 1.7249279916208432e-06,
"loss": 0.0002,
"step": 14750
},
{
"epoch": 2.906520031421838,
"grad_norm": 0.0019037205493077636,
"learning_rate": 1.5612725844461901e-06,
"loss": 0.0002,
"step": 14800
},
{
"epoch": 2.916339355852317,
"grad_norm": 0.00883456040173769,
"learning_rate": 1.397617177271537e-06,
"loss": 0.0003,
"step": 14850
},
{
"epoch": 2.9261586802827964,
"grad_norm": 0.000769551086705178,
"learning_rate": 1.233961770096884e-06,
"loss": 0.0003,
"step": 14900
},
{
"epoch": 2.9359780047132755,
"grad_norm": 1.074038755177753e-05,
"learning_rate": 1.070306362922231e-06,
"loss": 0.0002,
"step": 14950
},
{
"epoch": 2.9457973291437547,
"grad_norm": 0.0046028513461351395,
"learning_rate": 9.066509557475779e-07,
"loss": 0.0003,
"step": 15000
},
{
"epoch": 2.955616653574234,
"grad_norm": 0.0024120802991092205,
"learning_rate": 7.429955485729249e-07,
"loss": 0.0002,
"step": 15050
},
{
"epoch": 2.965435978004713,
"grad_norm": 0.0019912375137209892,
"learning_rate": 5.793401413982719e-07,
"loss": 0.0003,
"step": 15100
},
{
"epoch": 2.975255302435192,
"grad_norm": 0.0013518768828362226,
"learning_rate": 4.1568473422361876e-07,
"loss": 0.0003,
"step": 15150
},
{
"epoch": 2.9850746268656714,
"grad_norm": 5.804280954180285e-05,
"learning_rate": 2.520293270489657e-07,
"loss": 0.0003,
"step": 15200
},
{
"epoch": 2.994893951296151,
"grad_norm": 0.0008266063523478806,
"learning_rate": 8.837391987431265e-08,
"loss": 0.0002,
"step": 15250
}
],
"logging_steps": 50,
"max_steps": 15276,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.72042823237632e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}