klora_2000_skill / 49 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
1f94fb4 verified
raw
history blame
27.6 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 778,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012853470437017995,
"grad_norm": 1.0161515474319458,
"learning_rate": 1.2244897959183673e-06,
"loss": 1.2722,
"step": 5
},
{
"epoch": 0.02570694087403599,
"grad_norm": 0.878677487373352,
"learning_rate": 2.7551020408163266e-06,
"loss": 1.3141,
"step": 10
},
{
"epoch": 0.038560411311053984,
"grad_norm": 0.7686222195625305,
"learning_rate": 4.2857142857142855e-06,
"loss": 1.2799,
"step": 15
},
{
"epoch": 0.05141388174807198,
"grad_norm": 0.545415997505188,
"learning_rate": 5.816326530612245e-06,
"loss": 1.2905,
"step": 20
},
{
"epoch": 0.06426735218508997,
"grad_norm": 0.5554194450378418,
"learning_rate": 7.346938775510204e-06,
"loss": 1.232,
"step": 25
},
{
"epoch": 0.07712082262210797,
"grad_norm": 0.5022542476654053,
"learning_rate": 8.877551020408163e-06,
"loss": 1.1671,
"step": 30
},
{
"epoch": 0.08997429305912596,
"grad_norm": 0.4759712815284729,
"learning_rate": 1.0408163265306123e-05,
"loss": 1.1927,
"step": 35
},
{
"epoch": 0.10282776349614396,
"grad_norm": 0.5466979146003723,
"learning_rate": 1.1938775510204082e-05,
"loss": 1.1454,
"step": 40
},
{
"epoch": 0.11568123393316196,
"grad_norm": 0.42459335923194885,
"learning_rate": 1.3469387755102042e-05,
"loss": 1.1662,
"step": 45
},
{
"epoch": 0.12853470437017994,
"grad_norm": 0.45232370495796204,
"learning_rate": 1.5e-05,
"loss": 1.1538,
"step": 50
},
{
"epoch": 0.14138817480719795,
"grad_norm": 0.4375338852405548,
"learning_rate": 1.6530612244897957e-05,
"loss": 1.1508,
"step": 55
},
{
"epoch": 0.15424164524421594,
"grad_norm": 0.40976741909980774,
"learning_rate": 1.806122448979592e-05,
"loss": 1.1557,
"step": 60
},
{
"epoch": 0.16709511568123395,
"grad_norm": 0.4517289996147156,
"learning_rate": 1.9591836734693877e-05,
"loss": 1.1779,
"step": 65
},
{
"epoch": 0.17994858611825193,
"grad_norm": 0.43108654022216797,
"learning_rate": 2.1122448979591836e-05,
"loss": 1.1197,
"step": 70
},
{
"epoch": 0.1928020565552699,
"grad_norm": 0.43453311920166016,
"learning_rate": 2.2653061224489794e-05,
"loss": 1.1141,
"step": 75
},
{
"epoch": 0.20565552699228792,
"grad_norm": 0.5921583771705627,
"learning_rate": 2.4183673469387756e-05,
"loss": 1.1139,
"step": 80
},
{
"epoch": 0.2185089974293059,
"grad_norm": 0.8495065569877625,
"learning_rate": 2.5714285714285714e-05,
"loss": 1.1039,
"step": 85
},
{
"epoch": 0.23136246786632392,
"grad_norm": 0.5331577658653259,
"learning_rate": 2.7244897959183673e-05,
"loss": 1.0815,
"step": 90
},
{
"epoch": 0.2442159383033419,
"grad_norm": 0.5275247097015381,
"learning_rate": 2.877551020408163e-05,
"loss": 1.0436,
"step": 95
},
{
"epoch": 0.2570694087403599,
"grad_norm": 0.498080849647522,
"learning_rate": 2.9999978301629866e-05,
"loss": 1.0805,
"step": 100
},
{
"epoch": 0.2699228791773779,
"grad_norm": 0.5699777603149414,
"learning_rate": 2.999921886526661e-05,
"loss": 1.0579,
"step": 105
},
{
"epoch": 0.2827763496143959,
"grad_norm": 0.5747466087341309,
"learning_rate": 2.999737457317172e-05,
"loss": 1.1137,
"step": 110
},
{
"epoch": 0.29562982005141386,
"grad_norm": 0.4869830906391144,
"learning_rate": 2.9994445558738194e-05,
"loss": 1.0894,
"step": 115
},
{
"epoch": 0.30848329048843187,
"grad_norm": 0.5086082816123962,
"learning_rate": 2.999043203381427e-05,
"loss": 1.0055,
"step": 120
},
{
"epoch": 0.3213367609254499,
"grad_norm": 0.5111905932426453,
"learning_rate": 2.9985334288688106e-05,
"loss": 1.0172,
"step": 125
},
{
"epoch": 0.3341902313624679,
"grad_norm": 0.5968515276908875,
"learning_rate": 2.997915269206677e-05,
"loss": 1.0152,
"step": 130
},
{
"epoch": 0.34704370179948585,
"grad_norm": 0.6744562387466431,
"learning_rate": 2.9971887691049578e-05,
"loss": 0.9944,
"step": 135
},
{
"epoch": 0.35989717223650386,
"grad_norm": 0.5791626572608948,
"learning_rate": 2.9963539811095754e-05,
"loss": 0.9451,
"step": 140
},
{
"epoch": 0.37275064267352187,
"grad_norm": 0.6787604689598083,
"learning_rate": 2.9954109655986444e-05,
"loss": 0.9964,
"step": 145
},
{
"epoch": 0.3856041131105398,
"grad_norm": 0.5651743412017822,
"learning_rate": 2.9943597907781013e-05,
"loss": 0.9361,
"step": 150
},
{
"epoch": 0.39845758354755784,
"grad_norm": 0.7102354764938354,
"learning_rate": 2.9932005326767748e-05,
"loss": 0.9206,
"step": 155
},
{
"epoch": 0.41131105398457585,
"grad_norm": 0.6241850256919861,
"learning_rate": 2.9919332751408837e-05,
"loss": 0.9521,
"step": 160
},
{
"epoch": 0.4241645244215938,
"grad_norm": 0.6255318522453308,
"learning_rate": 2.9905581098279747e-05,
"loss": 0.946,
"step": 165
},
{
"epoch": 0.4370179948586118,
"grad_norm": 0.6290681958198547,
"learning_rate": 2.9890751362002923e-05,
"loss": 0.9135,
"step": 170
},
{
"epoch": 0.4498714652956298,
"grad_norm": 0.7111775875091553,
"learning_rate": 2.9874844615175846e-05,
"loss": 0.9554,
"step": 175
},
{
"epoch": 0.46272493573264784,
"grad_norm": 0.6591962575912476,
"learning_rate": 2.985786200829346e-05,
"loss": 0.9218,
"step": 180
},
{
"epoch": 0.4755784061696658,
"grad_norm": 0.7715244889259338,
"learning_rate": 2.9839804769664957e-05,
"loss": 0.8853,
"step": 185
},
{
"epoch": 0.4884318766066838,
"grad_norm": 0.6468695402145386,
"learning_rate": 2.982067420532494e-05,
"loss": 0.8898,
"step": 190
},
{
"epoch": 0.5012853470437018,
"grad_norm": 0.7891976833343506,
"learning_rate": 2.980047169893895e-05,
"loss": 0.8674,
"step": 195
},
{
"epoch": 0.5141388174807198,
"grad_norm": 0.7604276537895203,
"learning_rate": 2.9779198711703414e-05,
"loss": 0.9642,
"step": 200
},
{
"epoch": 0.5269922879177378,
"grad_norm": 0.8270877003669739,
"learning_rate": 2.9756856782239924e-05,
"loss": 0.8366,
"step": 205
},
{
"epoch": 0.5398457583547558,
"grad_norm": 0.7906156778335571,
"learning_rate": 2.973344752648398e-05,
"loss": 0.8761,
"step": 210
},
{
"epoch": 0.5526992287917738,
"grad_norm": 0.7102853655815125,
"learning_rate": 2.9708972637568106e-05,
"loss": 0.8519,
"step": 215
},
{
"epoch": 0.5655526992287918,
"grad_norm": 0.7394425272941589,
"learning_rate": 2.9683433885699393e-05,
"loss": 0.8693,
"step": 220
},
{
"epoch": 0.5784061696658098,
"grad_norm": 0.7369321584701538,
"learning_rate": 2.965683311803144e-05,
"loss": 0.8805,
"step": 225
},
{
"epoch": 0.5912596401028277,
"grad_norm": 0.7407816648483276,
"learning_rate": 2.962917225853081e-05,
"loss": 0.8342,
"step": 230
},
{
"epoch": 0.6041131105398457,
"grad_norm": 0.7909078001976013,
"learning_rate": 2.960045330783781e-05,
"loss": 0.8429,
"step": 235
},
{
"epoch": 0.6169665809768637,
"grad_norm": 0.7501896619796753,
"learning_rate": 2.957067834312183e-05,
"loss": 0.812,
"step": 240
},
{
"epoch": 0.6298200514138818,
"grad_norm": 0.9143732786178589,
"learning_rate": 2.9539849517931084e-05,
"loss": 0.8153,
"step": 245
},
{
"epoch": 0.6426735218508998,
"grad_norm": 0.8319126963615417,
"learning_rate": 2.9507969062036884e-05,
"loss": 0.831,
"step": 250
},
{
"epoch": 0.6555269922879178,
"grad_norm": 0.9196388125419617,
"learning_rate": 2.9475039281272315e-05,
"loss": 0.8021,
"step": 255
},
{
"epoch": 0.6683804627249358,
"grad_norm": 0.785527765750885,
"learning_rate": 2.9441062557365505e-05,
"loss": 0.7962,
"step": 260
},
{
"epoch": 0.6812339331619537,
"grad_norm": 0.7972485423088074,
"learning_rate": 2.9406041347767342e-05,
"loss": 0.8106,
"step": 265
},
{
"epoch": 0.6940874035989717,
"grad_norm": 0.8886427879333496,
"learning_rate": 2.9369978185473732e-05,
"loss": 0.7575,
"step": 270
},
{
"epoch": 0.7069408740359897,
"grad_norm": 0.8090516924858093,
"learning_rate": 2.9332875678842385e-05,
"loss": 0.8337,
"step": 275
},
{
"epoch": 0.7197943444730077,
"grad_norm": 0.8744608759880066,
"learning_rate": 2.929473651140419e-05,
"loss": 0.8028,
"step": 280
},
{
"epoch": 0.7326478149100257,
"grad_norm": 0.9550356268882751,
"learning_rate": 2.9255563441669085e-05,
"loss": 0.7823,
"step": 285
},
{
"epoch": 0.7455012853470437,
"grad_norm": 0.9044423699378967,
"learning_rate": 2.9215359302926564e-05,
"loss": 0.7508,
"step": 290
},
{
"epoch": 0.7583547557840618,
"grad_norm": 0.874662458896637,
"learning_rate": 2.917412700304075e-05,
"loss": 0.7513,
"step": 295
},
{
"epoch": 0.7712082262210797,
"grad_norm": 0.9646016955375671,
"learning_rate": 2.913186952424007e-05,
"loss": 0.7954,
"step": 300
},
{
"epoch": 0.7840616966580977,
"grad_norm": 0.9356961846351624,
"learning_rate": 2.9088589922901544e-05,
"loss": 0.7316,
"step": 305
},
{
"epoch": 0.7969151670951157,
"grad_norm": 1.0034047365188599,
"learning_rate": 2.9044291329329772e-05,
"loss": 0.7385,
"step": 310
},
{
"epoch": 0.8097686375321337,
"grad_norm": 1.038320779800415,
"learning_rate": 2.8998976947530478e-05,
"loss": 0.7038,
"step": 315
},
{
"epoch": 0.8226221079691517,
"grad_norm": 0.9056432843208313,
"learning_rate": 2.8952650054978792e-05,
"loss": 0.7287,
"step": 320
},
{
"epoch": 0.8354755784061697,
"grad_norm": 0.8862286806106567,
"learning_rate": 2.8905314002382196e-05,
"loss": 0.7359,
"step": 325
},
{
"epoch": 0.8483290488431876,
"grad_norm": 0.924501895904541,
"learning_rate": 2.8856972213438183e-05,
"loss": 0.6987,
"step": 330
},
{
"epoch": 0.8611825192802056,
"grad_norm": 0.9231320023536682,
"learning_rate": 2.8807628184586618e-05,
"loss": 0.7152,
"step": 335
},
{
"epoch": 0.8740359897172236,
"grad_norm": 0.9292797446250916,
"learning_rate": 2.8757285484756853e-05,
"loss": 0.6684,
"step": 340
},
{
"epoch": 0.8868894601542416,
"grad_norm": 0.8607897758483887,
"learning_rate": 2.870594775510961e-05,
"loss": 0.6443,
"step": 345
},
{
"epoch": 0.8997429305912596,
"grad_norm": 0.918314516544342,
"learning_rate": 2.8653618708773598e-05,
"loss": 0.6427,
"step": 350
},
{
"epoch": 0.9125964010282777,
"grad_norm": 0.9614541530609131,
"learning_rate": 2.8600302130576966e-05,
"loss": 0.6409,
"step": 355
},
{
"epoch": 0.9254498714652957,
"grad_norm": 0.9149335622787476,
"learning_rate": 2.854600187677357e-05,
"loss": 0.6544,
"step": 360
},
{
"epoch": 0.9383033419023136,
"grad_norm": 1.0157501697540283,
"learning_rate": 2.849072187476403e-05,
"loss": 0.6663,
"step": 365
},
{
"epoch": 0.9511568123393316,
"grad_norm": 0.8862146139144897,
"learning_rate": 2.8434466122811694e-05,
"loss": 0.6654,
"step": 370
},
{
"epoch": 0.9640102827763496,
"grad_norm": 0.9366165399551392,
"learning_rate": 2.8377238689753448e-05,
"loss": 0.6497,
"step": 375
},
{
"epoch": 0.9768637532133676,
"grad_norm": 0.9608516097068787,
"learning_rate": 2.831904371470542e-05,
"loss": 0.6262,
"step": 380
},
{
"epoch": 0.9897172236503856,
"grad_norm": 0.9198379516601562,
"learning_rate": 2.825988540676362e-05,
"loss": 0.6893,
"step": 385
},
{
"epoch": 1.0025706940874035,
"grad_norm": 1.1038013696670532,
"learning_rate": 2.81997680446995e-05,
"loss": 0.5883,
"step": 390
},
{
"epoch": 1.0154241645244215,
"grad_norm": 1.0748287439346313,
"learning_rate": 2.8138695976650474e-05,
"loss": 0.5292,
"step": 395
},
{
"epoch": 1.0282776349614395,
"grad_norm": 1.0702522993087769,
"learning_rate": 2.807667361980544e-05,
"loss": 0.5584,
"step": 400
},
{
"epoch": 1.0411311053984575,
"grad_norm": 1.0195493698120117,
"learning_rate": 2.8013705460085298e-05,
"loss": 0.5249,
"step": 405
},
{
"epoch": 1.0539845758354756,
"grad_norm": 1.0030614137649536,
"learning_rate": 2.7949796051818478e-05,
"loss": 0.5383,
"step": 410
},
{
"epoch": 1.0668380462724936,
"grad_norm": 1.0707740783691406,
"learning_rate": 2.7884950017411556e-05,
"loss": 0.578,
"step": 415
},
{
"epoch": 1.0796915167095116,
"grad_norm": 1.021653652191162,
"learning_rate": 2.7819172047014916e-05,
"loss": 0.5773,
"step": 420
},
{
"epoch": 1.0925449871465296,
"grad_norm": 1.0462572574615479,
"learning_rate": 2.7752466898183518e-05,
"loss": 0.5325,
"step": 425
},
{
"epoch": 1.1053984575835476,
"grad_norm": 0.8722683191299438,
"learning_rate": 2.7684839395532815e-05,
"loss": 0.5503,
"step": 430
},
{
"epoch": 1.1182519280205656,
"grad_norm": 1.03123939037323,
"learning_rate": 2.761629443038978e-05,
"loss": 0.5297,
"step": 435
},
{
"epoch": 1.1311053984575836,
"grad_norm": 1.130732774734497,
"learning_rate": 2.7546836960439146e-05,
"loss": 0.5413,
"step": 440
},
{
"epoch": 1.1439588688946016,
"grad_norm": 0.9612518548965454,
"learning_rate": 2.7476472009364814e-05,
"loss": 0.5987,
"step": 445
},
{
"epoch": 1.1568123393316196,
"grad_norm": 1.2290369272232056,
"learning_rate": 2.7405204666486513e-05,
"loss": 0.5066,
"step": 450
},
{
"epoch": 1.1696658097686377,
"grad_norm": 1.1223726272583008,
"learning_rate": 2.7333040086391692e-05,
"loss": 0.4859,
"step": 455
},
{
"epoch": 1.1825192802056554,
"grad_norm": 1.047003984451294,
"learning_rate": 2.7259983488562726e-05,
"loss": 0.5298,
"step": 460
},
{
"epoch": 1.1953727506426735,
"grad_norm": 1.047174096107483,
"learning_rate": 2.718604015699937e-05,
"loss": 0.4896,
"step": 465
},
{
"epoch": 1.2082262210796915,
"grad_norm": 1.0913561582565308,
"learning_rate": 2.7111215439836596e-05,
"loss": 0.5099,
"step": 470
},
{
"epoch": 1.2210796915167095,
"grad_norm": 0.9646836519241333,
"learning_rate": 2.7035514748957798e-05,
"loss": 0.5123,
"step": 475
},
{
"epoch": 1.2339331619537275,
"grad_norm": 0.9636846780776978,
"learning_rate": 2.6958943559603316e-05,
"loss": 0.535,
"step": 480
},
{
"epoch": 1.2467866323907455,
"grad_norm": 1.0172802209854126,
"learning_rate": 2.6881507409974473e-05,
"loss": 0.4792,
"step": 485
},
{
"epoch": 1.2596401028277635,
"grad_norm": 1.0088897943496704,
"learning_rate": 2.6803211900832975e-05,
"loss": 0.4895,
"step": 490
},
{
"epoch": 1.2724935732647815,
"grad_norm": 1.17917001247406,
"learning_rate": 2.6724062695095853e-05,
"loss": 0.4796,
"step": 495
},
{
"epoch": 1.2853470437017995,
"grad_norm": 1.1440588235855103,
"learning_rate": 2.6644065517425857e-05,
"loss": 0.5509,
"step": 500
},
{
"epoch": 1.2982005141388175,
"grad_norm": 1.267622470855713,
"learning_rate": 2.65632261538174e-05,
"loss": 0.5004,
"step": 505
},
{
"epoch": 1.3110539845758356,
"grad_norm": 0.9338383078575134,
"learning_rate": 2.64815504511781e-05,
"loss": 0.4886,
"step": 510
},
{
"epoch": 1.3239074550128533,
"grad_norm": 1.0977288484573364,
"learning_rate": 2.639904431690587e-05,
"loss": 0.4851,
"step": 515
},
{
"epoch": 1.3367609254498714,
"grad_norm": 1.1947989463806152,
"learning_rate": 2.631571371846164e-05,
"loss": 0.4573,
"step": 520
},
{
"epoch": 1.3496143958868894,
"grad_norm": 1.1320568323135376,
"learning_rate": 2.6231564682937762e-05,
"loss": 0.4805,
"step": 525
},
{
"epoch": 1.3624678663239074,
"grad_norm": 1.0743120908737183,
"learning_rate": 2.614660329662209e-05,
"loss": 0.4867,
"step": 530
},
{
"epoch": 1.3753213367609254,
"grad_norm": 1.1182608604431152,
"learning_rate": 2.606083570455776e-05,
"loss": 0.4444,
"step": 535
},
{
"epoch": 1.3881748071979434,
"grad_norm": 1.0361340045928955,
"learning_rate": 2.5974268110098727e-05,
"loss": 0.4507,
"step": 540
},
{
"epoch": 1.4010282776349614,
"grad_norm": 1.0230952501296997,
"learning_rate": 2.588690677446113e-05,
"loss": 0.4262,
"step": 545
},
{
"epoch": 1.4138817480719794,
"grad_norm": 1.0415043830871582,
"learning_rate": 2.5798758016270384e-05,
"loss": 0.4946,
"step": 550
},
{
"epoch": 1.4267352185089974,
"grad_norm": 1.0534740686416626,
"learning_rate": 2.570982821110421e-05,
"loss": 0.4764,
"step": 555
},
{
"epoch": 1.4395886889460154,
"grad_norm": 1.078011155128479,
"learning_rate": 2.5620123791031488e-05,
"loss": 0.4319,
"step": 560
},
{
"epoch": 1.4524421593830334,
"grad_norm": 0.988162100315094,
"learning_rate": 2.5529651244147035e-05,
"loss": 0.4761,
"step": 565
},
{
"epoch": 1.4652956298200515,
"grad_norm": 1.1337324380874634,
"learning_rate": 2.5438417114102358e-05,
"loss": 0.4563,
"step": 570
},
{
"epoch": 1.4781491002570695,
"grad_norm": 1.0532126426696777,
"learning_rate": 2.5346427999632342e-05,
"loss": 0.4486,
"step": 575
},
{
"epoch": 1.4910025706940875,
"grad_norm": 1.0289413928985596,
"learning_rate": 2.5253690554078018e-05,
"loss": 0.4767,
"step": 580
},
{
"epoch": 1.5038560411311055,
"grad_norm": 1.106880784034729,
"learning_rate": 2.5160211484905285e-05,
"loss": 0.4757,
"step": 585
},
{
"epoch": 1.5167095115681235,
"grad_norm": 0.9928240180015564,
"learning_rate": 2.5065997553219846e-05,
"loss": 0.46,
"step": 590
},
{
"epoch": 1.5295629820051415,
"grad_norm": 1.1204873323440552,
"learning_rate": 2.4971055573278135e-05,
"loss": 0.3968,
"step": 595
},
{
"epoch": 1.5424164524421595,
"grad_norm": 0.9707500338554382,
"learning_rate": 2.48753924119945e-05,
"loss": 0.4138,
"step": 600
},
{
"epoch": 1.5552699228791775,
"grad_norm": 1.307215690612793,
"learning_rate": 2.47790149884445e-05,
"loss": 0.4653,
"step": 605
},
{
"epoch": 1.5681233933161953,
"grad_norm": 1.1242326498031616,
"learning_rate": 2.468193027336451e-05,
"loss": 0.4385,
"step": 610
},
{
"epoch": 1.5809768637532133,
"grad_norm": 1.0686546564102173,
"learning_rate": 2.4584145288647497e-05,
"loss": 0.4359,
"step": 615
},
{
"epoch": 1.5938303341902313,
"grad_norm": 0.9722070693969727,
"learning_rate": 2.448566710683518e-05,
"loss": 0.4189,
"step": 620
},
{
"epoch": 1.6066838046272494,
"grad_norm": 1.043075680732727,
"learning_rate": 2.4386502850606477e-05,
"loss": 0.4478,
"step": 625
},
{
"epoch": 1.6195372750642674,
"grad_norm": 1.1963540315628052,
"learning_rate": 2.4286659692262342e-05,
"loss": 0.4276,
"step": 630
},
{
"epoch": 1.6323907455012854,
"grad_norm": 0.9894089102745056,
"learning_rate": 2.4186144853206997e-05,
"loss": 0.3736,
"step": 635
},
{
"epoch": 1.6452442159383034,
"grad_norm": 1.1344518661499023,
"learning_rate": 2.4084965603425663e-05,
"loss": 0.3955,
"step": 640
},
{
"epoch": 1.6580976863753212,
"grad_norm": 1.0184675455093384,
"learning_rate": 2.398312926095869e-05,
"loss": 0.3938,
"step": 645
},
{
"epoch": 1.6709511568123392,
"grad_norm": 0.9903520941734314,
"learning_rate": 2.3880643191372306e-05,
"loss": 0.4075,
"step": 650
},
{
"epoch": 1.6838046272493572,
"grad_norm": 1.0676472187042236,
"learning_rate": 2.3777514807225857e-05,
"loss": 0.404,
"step": 655
},
{
"epoch": 1.6966580976863752,
"grad_norm": 1.0501329898834229,
"learning_rate": 2.3673751567535683e-05,
"loss": 0.4091,
"step": 660
},
{
"epoch": 1.7095115681233932,
"grad_norm": 0.9608376622200012,
"learning_rate": 2.3569360977235625e-05,
"loss": 0.4083,
"step": 665
},
{
"epoch": 1.7223650385604112,
"grad_norm": 1.2020519971847534,
"learning_rate": 2.346435058663423e-05,
"loss": 0.3767,
"step": 670
},
{
"epoch": 1.7352185089974292,
"grad_norm": 1.165675401687622,
"learning_rate": 2.335872799086862e-05,
"loss": 0.4,
"step": 675
},
{
"epoch": 1.7480719794344473,
"grad_norm": 1.3436106443405151,
"learning_rate": 2.325250082935518e-05,
"loss": 0.3921,
"step": 680
},
{
"epoch": 1.7609254498714653,
"grad_norm": 1.2254986763000488,
"learning_rate": 2.314567678523703e-05,
"loss": 0.363,
"step": 685
},
{
"epoch": 1.7737789203084833,
"grad_norm": 1.0373125076293945,
"learning_rate": 2.3038263584828272e-05,
"loss": 0.3791,
"step": 690
},
{
"epoch": 1.7866323907455013,
"grad_norm": 1.060325026512146,
"learning_rate": 2.2930268997055234e-05,
"loss": 0.3559,
"step": 695
},
{
"epoch": 1.7994858611825193,
"grad_norm": 1.0755804777145386,
"learning_rate": 2.282170083289451e-05,
"loss": 0.367,
"step": 700
},
{
"epoch": 1.8123393316195373,
"grad_norm": 0.9357439875602722,
"learning_rate": 2.271256694480803e-05,
"loss": 0.3539,
"step": 705
},
{
"epoch": 1.8251928020565553,
"grad_norm": 1.1248786449432373,
"learning_rate": 2.2602875226175115e-05,
"loss": 0.3601,
"step": 710
},
{
"epoch": 1.8380462724935733,
"grad_norm": 1.1184437274932861,
"learning_rate": 2.2492633610721562e-05,
"loss": 0.3506,
"step": 715
},
{
"epoch": 1.8508997429305913,
"grad_norm": 1.078311562538147,
"learning_rate": 2.2381850071945826e-05,
"loss": 0.355,
"step": 720
},
{
"epoch": 1.8637532133676094,
"grad_norm": 1.202223539352417,
"learning_rate": 2.2270532622542308e-05,
"loss": 0.3526,
"step": 725
},
{
"epoch": 1.8766066838046274,
"grad_norm": 1.1870999336242676,
"learning_rate": 2.2158689313821812e-05,
"loss": 0.3556,
"step": 730
},
{
"epoch": 1.8894601542416454,
"grad_norm": 1.0799825191497803,
"learning_rate": 2.2046328235129237e-05,
"loss": 0.3354,
"step": 735
},
{
"epoch": 1.9023136246786634,
"grad_norm": 0.9738938212394714,
"learning_rate": 2.193345751325847e-05,
"loss": 0.3546,
"step": 740
},
{
"epoch": 1.9151670951156814,
"grad_norm": 0.9771959781646729,
"learning_rate": 2.1820085311864616e-05,
"loss": 0.3732,
"step": 745
},
{
"epoch": 1.9280205655526992,
"grad_norm": 1.138285517692566,
"learning_rate": 2.170621983087351e-05,
"loss": 0.332,
"step": 750
},
{
"epoch": 1.9408740359897172,
"grad_norm": 1.048834204673767,
"learning_rate": 2.1591869305888694e-05,
"loss": 0.3499,
"step": 755
},
{
"epoch": 1.9537275064267352,
"grad_norm": 1.0362058877944946,
"learning_rate": 2.1477042007595676e-05,
"loss": 0.3614,
"step": 760
},
{
"epoch": 1.9665809768637532,
"grad_norm": 1.0137721300125122,
"learning_rate": 2.1361746241163807e-05,
"loss": 0.3326,
"step": 765
},
{
"epoch": 1.9794344473007712,
"grad_norm": 1.0517054796218872,
"learning_rate": 2.1245990345645562e-05,
"loss": 0.3399,
"step": 770
},
{
"epoch": 1.9922879177377892,
"grad_norm": 1.1292747259140015,
"learning_rate": 2.1129782693373374e-05,
"loss": 0.3438,
"step": 775
}
],
"logging_steps": 5,
"max_steps": 1945,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2389199453544776e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}