| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1638, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01833180568285976, |
| "grad_norm": 0.6833510994911194, |
| "learning_rate": 0.00018, |
| "loss": 1.8247451782226562, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03666361136571952, |
| "grad_norm": 0.9012069702148438, |
| "learning_rate": 0.0001988943488943489, |
| "loss": 1.4739965438842773, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.054995417048579284, |
| "grad_norm": 0.8267062306404114, |
| "learning_rate": 0.00019766584766584767, |
| "loss": 1.3358205795288085, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07332722273143905, |
| "grad_norm": 0.7565646767616272, |
| "learning_rate": 0.00019643734643734644, |
| "loss": 1.2644735336303712, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09165902841429881, |
| "grad_norm": 0.8591431975364685, |
| "learning_rate": 0.0001952088452088452, |
| "loss": 1.244968318939209, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10999083409715857, |
| "grad_norm": 0.8081830143928528, |
| "learning_rate": 0.000193980343980344, |
| "loss": 1.209956455230713, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12832263978001834, |
| "grad_norm": 0.8808525800704956, |
| "learning_rate": 0.00019275184275184277, |
| "loss": 1.1500988960266114, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1466544454628781, |
| "grad_norm": 0.82117760181427, |
| "learning_rate": 0.00019152334152334154, |
| "loss": 1.1469905853271485, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16498625114573787, |
| "grad_norm": 0.8583332896232605, |
| "learning_rate": 0.0001902948402948403, |
| "loss": 1.1948189735412598, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18331805682859761, |
| "grad_norm": 0.6712405681610107, |
| "learning_rate": 0.00018906633906633907, |
| "loss": 1.1391284942626954, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2016498625114574, |
| "grad_norm": 0.8613548874855042, |
| "learning_rate": 0.00018783783783783784, |
| "loss": 1.1040291786193848, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21998166819431714, |
| "grad_norm": 0.8608964085578918, |
| "learning_rate": 0.0001866093366093366, |
| "loss": 1.1578070640563964, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2383134738771769, |
| "grad_norm": 0.8638626337051392, |
| "learning_rate": 0.0001853808353808354, |
| "loss": 1.1500712394714356, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2566452795600367, |
| "grad_norm": 0.8131710886955261, |
| "learning_rate": 0.00018415233415233417, |
| "loss": 1.0960933685302734, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.27497708524289644, |
| "grad_norm": 1.023303508758545, |
| "learning_rate": 0.00018292383292383292, |
| "loss": 1.106197452545166, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2933088909257562, |
| "grad_norm": 0.7958722114562988, |
| "learning_rate": 0.0001816953316953317, |
| "loss": 1.1257465362548829, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.31164069660861593, |
| "grad_norm": 0.8529394865036011, |
| "learning_rate": 0.00018046683046683048, |
| "loss": 1.062838077545166, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.32997250229147573, |
| "grad_norm": 0.8443934917449951, |
| "learning_rate": 0.00017923832923832925, |
| "loss": 1.023012638092041, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3483043079743355, |
| "grad_norm": 0.8035740852355957, |
| "learning_rate": 0.00017800982800982802, |
| "loss": 1.0435395240783691, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.36663611365719523, |
| "grad_norm": 0.8863442540168762, |
| "learning_rate": 0.00017678132678132678, |
| "loss": 1.0918319702148438, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.384967919340055, |
| "grad_norm": 0.8198781609535217, |
| "learning_rate": 0.00017555282555282555, |
| "loss": 1.0226572036743165, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4032997250229148, |
| "grad_norm": 0.9801501035690308, |
| "learning_rate": 0.00017432432432432432, |
| "loss": 1.0847922325134278, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4216315307057745, |
| "grad_norm": 0.6737959980964661, |
| "learning_rate": 0.00017309582309582312, |
| "loss": 1.0508249282836915, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4399633363886343, |
| "grad_norm": 0.7932195067405701, |
| "learning_rate": 0.00017186732186732188, |
| "loss": 1.042081069946289, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.458295142071494, |
| "grad_norm": 0.864284873008728, |
| "learning_rate": 0.00017063882063882065, |
| "loss": 1.0485817909240722, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4766269477543538, |
| "grad_norm": 0.8705862164497375, |
| "learning_rate": 0.00016941031941031942, |
| "loss": 1.0782199859619142, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.49495875343721357, |
| "grad_norm": 0.8180854916572571, |
| "learning_rate": 0.0001681818181818182, |
| "loss": 1.075644302368164, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5132905591200734, |
| "grad_norm": 0.8689812421798706, |
| "learning_rate": 0.00016695331695331696, |
| "loss": 1.0431486129760743, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5316223648029331, |
| "grad_norm": 0.7797616720199585, |
| "learning_rate": 0.00016572481572481573, |
| "loss": 1.019082736968994, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5499541704857929, |
| "grad_norm": 0.9403369426727295, |
| "learning_rate": 0.0001644963144963145, |
| "loss": 1.0104355812072754, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5682859761686526, |
| "grad_norm": 0.8061522245407104, |
| "learning_rate": 0.0001632678132678133, |
| "loss": 1.015712833404541, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5866177818515124, |
| "grad_norm": 1.0389378070831299, |
| "learning_rate": 0.00016203931203931203, |
| "loss": 0.983332347869873, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6049495875343721, |
| "grad_norm": 0.8335319757461548, |
| "learning_rate": 0.00016081081081081083, |
| "loss": 1.0035177230834962, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6232813932172319, |
| "grad_norm": 0.8039399981498718, |
| "learning_rate": 0.0001595823095823096, |
| "loss": 1.0274381637573242, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6416131989000916, |
| "grad_norm": 0.777919352054596, |
| "learning_rate": 0.00015835380835380836, |
| "loss": 1.0467321395874023, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6599450045829515, |
| "grad_norm": 0.8876609802246094, |
| "learning_rate": 0.00015712530712530713, |
| "loss": 0.9848588943481446, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6782768102658112, |
| "grad_norm": 0.8413349986076355, |
| "learning_rate": 0.0001558968058968059, |
| "loss": 1.0367840766906737, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.696608615948671, |
| "grad_norm": 0.9921192526817322, |
| "learning_rate": 0.0001546683046683047, |
| "loss": 1.0018555641174316, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7149404216315307, |
| "grad_norm": 0.8272864818572998, |
| "learning_rate": 0.00015343980343980344, |
| "loss": 1.0093633651733398, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7332722273143905, |
| "grad_norm": 0.7949515581130981, |
| "learning_rate": 0.00015221130221130223, |
| "loss": 1.0220769882202148, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7516040329972502, |
| "grad_norm": 0.8337849378585815, |
| "learning_rate": 0.000150982800982801, |
| "loss": 0.9723053932189941, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.76993583868011, |
| "grad_norm": 0.9521737694740295, |
| "learning_rate": 0.00014975429975429974, |
| "loss": 1.0630863189697266, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7882676443629697, |
| "grad_norm": 0.8320823311805725, |
| "learning_rate": 0.00014852579852579854, |
| "loss": 0.9916687965393066, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8065994500458296, |
| "grad_norm": 0.903413712978363, |
| "learning_rate": 0.0001472972972972973, |
| "loss": 0.9519875526428223, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8249312557286893, |
| "grad_norm": 0.8783673048019409, |
| "learning_rate": 0.00014606879606879607, |
| "loss": 1.0056891441345215, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.843263061411549, |
| "grad_norm": 0.8581491708755493, |
| "learning_rate": 0.00014484029484029484, |
| "loss": 1.0211298942565918, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8615948670944088, |
| "grad_norm": 0.7977339029312134, |
| "learning_rate": 0.0001436117936117936, |
| "loss": 0.9528703689575195, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8799266727772685, |
| "grad_norm": 0.8142527937889099, |
| "learning_rate": 0.0001423832923832924, |
| "loss": 1.0111416816711425, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8982584784601283, |
| "grad_norm": 0.865929126739502, |
| "learning_rate": 0.00014115479115479115, |
| "loss": 0.978369140625, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.916590284142988, |
| "grad_norm": 0.7955005764961243, |
| "learning_rate": 0.00013992628992628994, |
| "loss": 0.998965835571289, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9349220898258478, |
| "grad_norm": 0.7812423706054688, |
| "learning_rate": 0.0001386977886977887, |
| "loss": 0.9544276237487793, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9532538955087076, |
| "grad_norm": 0.817484974861145, |
| "learning_rate": 0.00013746928746928748, |
| "loss": 0.9699355125427246, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9715857011915674, |
| "grad_norm": 0.875234842300415, |
| "learning_rate": 0.00013624078624078625, |
| "loss": 0.9715826034545898, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9899175068744271, |
| "grad_norm": 0.7700145244598389, |
| "learning_rate": 0.00013501228501228501, |
| "loss": 0.9811925888061523, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.0073327222731439, |
| "grad_norm": 0.7260869145393372, |
| "learning_rate": 0.0001337837837837838, |
| "loss": 0.882848072052002, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.0256645279560037, |
| "grad_norm": 0.7263541221618652, |
| "learning_rate": 0.00013255528255528255, |
| "loss": 0.8015734672546386, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0439963336388633, |
| "grad_norm": 0.8721809387207031, |
| "learning_rate": 0.00013132678132678135, |
| "loss": 0.778080940246582, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0623281393217232, |
| "grad_norm": 0.8094732165336609, |
| "learning_rate": 0.00013009828009828011, |
| "loss": 0.7774394989013672, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.0806599450045828, |
| "grad_norm": 0.8383634686470032, |
| "learning_rate": 0.00012886977886977886, |
| "loss": 0.7659544944763184, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0989917506874427, |
| "grad_norm": 0.9429551959037781, |
| "learning_rate": 0.00012764127764127765, |
| "loss": 0.7822256565093995, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1173235563703026, |
| "grad_norm": 0.8432884812355042, |
| "learning_rate": 0.00012641277641277642, |
| "loss": 0.7877971649169921, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.1356553620531622, |
| "grad_norm": 0.9461238384246826, |
| "learning_rate": 0.0001251842751842752, |
| "loss": 0.8412753105163574, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.153987167736022, |
| "grad_norm": 0.9495576620101929, |
| "learning_rate": 0.00012395577395577396, |
| "loss": 0.7389075279235839, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1723189734188817, |
| "grad_norm": 0.8137982487678528, |
| "learning_rate": 0.00012272727272727272, |
| "loss": 0.7511651992797852, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1906507791017416, |
| "grad_norm": 0.9718158841133118, |
| "learning_rate": 0.00012149877149877152, |
| "loss": 0.7982583999633789, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.2089825847846012, |
| "grad_norm": 1.0837777853012085, |
| "learning_rate": 0.00012027027027027027, |
| "loss": 0.762714433670044, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.227314390467461, |
| "grad_norm": 0.9882314801216125, |
| "learning_rate": 0.00011904176904176904, |
| "loss": 0.7749518871307373, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.2456461961503207, |
| "grad_norm": 0.9463419914245605, |
| "learning_rate": 0.00011781326781326782, |
| "loss": 0.7641645908355713, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.2639780018331805, |
| "grad_norm": 0.9794511198997498, |
| "learning_rate": 0.00011658476658476658, |
| "loss": 0.8050010681152344, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2823098075160404, |
| "grad_norm": 1.1002216339111328, |
| "learning_rate": 0.00011535626535626536, |
| "loss": 0.793759822845459, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3006416131989, |
| "grad_norm": 0.9648078083992004, |
| "learning_rate": 0.00011412776412776414, |
| "loss": 0.7668623447418212, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.31897341888176, |
| "grad_norm": 1.0727074146270752, |
| "learning_rate": 0.00011289926289926291, |
| "loss": 0.8029165267944336, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.3373052245646195, |
| "grad_norm": 0.9617047905921936, |
| "learning_rate": 0.00011167076167076167, |
| "loss": 0.7798116683959961, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3556370302474794, |
| "grad_norm": 0.8710028529167175, |
| "learning_rate": 0.00011044226044226045, |
| "loss": 0.7962553977966309, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.3739688359303392, |
| "grad_norm": 0.8409777283668518, |
| "learning_rate": 0.00010921375921375923, |
| "loss": 0.7403414726257325, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.3923006416131989, |
| "grad_norm": 1.029362440109253, |
| "learning_rate": 0.00010798525798525798, |
| "loss": 0.7815125465393067, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.4106324472960587, |
| "grad_norm": 0.9566736221313477, |
| "learning_rate": 0.00010675675675675677, |
| "loss": 0.7839052200317382, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.4289642529789184, |
| "grad_norm": 0.975339949131012, |
| "learning_rate": 0.00010552825552825553, |
| "loss": 0.749812650680542, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.4472960586617782, |
| "grad_norm": 1.1521857976913452, |
| "learning_rate": 0.00010429975429975432, |
| "loss": 0.7797944068908691, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.4656278643446379, |
| "grad_norm": 0.8301038146018982, |
| "learning_rate": 0.00010307125307125307, |
| "loss": 0.7280281543731689, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4839596700274977, |
| "grad_norm": 0.9730615615844727, |
| "learning_rate": 0.00010184275184275185, |
| "loss": 0.7437058448791504, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.5022914757103574, |
| "grad_norm": 1.0270700454711914, |
| "learning_rate": 0.00010061425061425062, |
| "loss": 0.7848101615905761, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.5206232813932172, |
| "grad_norm": 1.2335196733474731, |
| "learning_rate": 9.938574938574939e-05, |
| "loss": 0.7896716117858886, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.538955087076077, |
| "grad_norm": 0.968611478805542, |
| "learning_rate": 9.815724815724816e-05, |
| "loss": 0.7918240070343018, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5572868927589367, |
| "grad_norm": 0.9463298320770264, |
| "learning_rate": 9.692874692874694e-05, |
| "loss": 0.7453035354614258, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.5756186984417964, |
| "grad_norm": 1.004184603691101, |
| "learning_rate": 9.570024570024571e-05, |
| "loss": 0.8125950813293457, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5939505041246562, |
| "grad_norm": 1.1150691509246826, |
| "learning_rate": 9.447174447174448e-05, |
| "loss": 0.7995445251464843, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.612282309807516, |
| "grad_norm": 1.060056447982788, |
| "learning_rate": 9.324324324324324e-05, |
| "loss": 0.7746751785278321, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.630614115490376, |
| "grad_norm": 1.0525883436203003, |
| "learning_rate": 9.201474201474201e-05, |
| "loss": 0.7861367225646972, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.6489459211732356, |
| "grad_norm": 0.9495214223861694, |
| "learning_rate": 9.07862407862408e-05, |
| "loss": 0.8055791854858398, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6672777268560952, |
| "grad_norm": 0.8876036405563354, |
| "learning_rate": 8.955773955773956e-05, |
| "loss": 0.736152982711792, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.685609532538955, |
| "grad_norm": 1.0228347778320312, |
| "learning_rate": 8.832923832923833e-05, |
| "loss": 0.7859257698059082, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.703941338221815, |
| "grad_norm": 1.2196885347366333, |
| "learning_rate": 8.710073710073711e-05, |
| "loss": 0.7442365169525147, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.7222731439046746, |
| "grad_norm": 1.1201367378234863, |
| "learning_rate": 8.587223587223587e-05, |
| "loss": 0.7933924674987793, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.7406049495875344, |
| "grad_norm": 1.0457044839859009, |
| "learning_rate": 8.464373464373465e-05, |
| "loss": 0.7594408512115478, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.758936755270394, |
| "grad_norm": 1.2219468355178833, |
| "learning_rate": 8.341523341523342e-05, |
| "loss": 0.725389051437378, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.777268560953254, |
| "grad_norm": 1.0098403692245483, |
| "learning_rate": 8.21867321867322e-05, |
| "loss": 0.7753002166748046, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7956003666361138, |
| "grad_norm": 1.020544409751892, |
| "learning_rate": 8.095823095823097e-05, |
| "loss": 0.7580110549926757, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.8139321723189734, |
| "grad_norm": 0.9121679067611694, |
| "learning_rate": 7.972972972972974e-05, |
| "loss": 0.7329069614410401, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.832263978001833, |
| "grad_norm": 1.1305643320083618, |
| "learning_rate": 7.85012285012285e-05, |
| "loss": 0.7436663150787354, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.850595783684693, |
| "grad_norm": 0.9970649480819702, |
| "learning_rate": 7.727272727272727e-05, |
| "loss": 0.7684538841247559, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.8689275893675528, |
| "grad_norm": 1.0161981582641602, |
| "learning_rate": 7.604422604422605e-05, |
| "loss": 0.7405171394348145, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.8872593950504126, |
| "grad_norm": 1.3399347066879272, |
| "learning_rate": 7.481572481572482e-05, |
| "loss": 0.780091667175293, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.9055912007332723, |
| "grad_norm": 1.2579443454742432, |
| "learning_rate": 7.358722358722359e-05, |
| "loss": 0.6868968486785889, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.923923006416132, |
| "grad_norm": 1.0092531442642212, |
| "learning_rate": 7.235872235872236e-05, |
| "loss": 0.742798137664795, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.9422548120989918, |
| "grad_norm": 1.121690273284912, |
| "learning_rate": 7.113022113022113e-05, |
| "loss": 0.8035343170166016, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.9605866177818516, |
| "grad_norm": 1.0780940055847168, |
| "learning_rate": 6.990171990171991e-05, |
| "loss": 0.7256640911102294, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.9789184234647113, |
| "grad_norm": 1.0335768461227417, |
| "learning_rate": 6.867321867321868e-05, |
| "loss": 0.751814889907837, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.9972502291475709, |
| "grad_norm": 1.0326813459396362, |
| "learning_rate": 6.744471744471746e-05, |
| "loss": 0.781493854522705, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.0146654445462877, |
| "grad_norm": 0.9365840554237366, |
| "learning_rate": 6.621621621621621e-05, |
| "loss": 0.6172435760498047, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.0329972502291476, |
| "grad_norm": 1.0775729417800903, |
| "learning_rate": 6.498771498771498e-05, |
| "loss": 0.5551021575927735, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.0513290559120074, |
| "grad_norm": 1.1233711242675781, |
| "learning_rate": 6.375921375921376e-05, |
| "loss": 0.530482006072998, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.0696608615948673, |
| "grad_norm": 0.9685810208320618, |
| "learning_rate": 6.253071253071253e-05, |
| "loss": 0.5284864902496338, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.0879926672777267, |
| "grad_norm": 1.3673559427261353, |
| "learning_rate": 6.130221130221131e-05, |
| "loss": 0.5300433158874511, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.1063244729605866, |
| "grad_norm": 1.291156530380249, |
| "learning_rate": 6.0073710073710075e-05, |
| "loss": 0.5388914585113526, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.1246562786434464, |
| "grad_norm": 1.0860686302185059, |
| "learning_rate": 5.8845208845208844e-05, |
| "loss": 0.5599504947662354, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.1429880843263063, |
| "grad_norm": 1.2297484874725342, |
| "learning_rate": 5.761670761670762e-05, |
| "loss": 0.5693662643432618, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.1613198900091657, |
| "grad_norm": 1.3128403425216675, |
| "learning_rate": 5.638820638820639e-05, |
| "loss": 0.5087790012359619, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.1796516956920255, |
| "grad_norm": 1.47864830493927, |
| "learning_rate": 5.515970515970517e-05, |
| "loss": 0.5520669460296631, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.1979835013748854, |
| "grad_norm": 1.3533881902694702, |
| "learning_rate": 5.393120393120393e-05, |
| "loss": 0.5194924354553223, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.2163153070577453, |
| "grad_norm": 1.0729988813400269, |
| "learning_rate": 5.27027027027027e-05, |
| "loss": 0.5386343955993652, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.234647112740605, |
| "grad_norm": 1.1851814985275269, |
| "learning_rate": 5.147420147420148e-05, |
| "loss": 0.5398934364318848, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.2529789184234645, |
| "grad_norm": 1.306754469871521, |
| "learning_rate": 5.024570024570024e-05, |
| "loss": 0.5412076473236084, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.2713107241063244, |
| "grad_norm": 1.2561992406845093, |
| "learning_rate": 4.901719901719902e-05, |
| "loss": 0.555613899230957, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.2896425297891843, |
| "grad_norm": 1.4935739040374756, |
| "learning_rate": 4.778869778869779e-05, |
| "loss": 0.560833215713501, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.307974335472044, |
| "grad_norm": 1.2064818143844604, |
| "learning_rate": 4.656019656019656e-05, |
| "loss": 0.5313505172729492, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.3263061411549035, |
| "grad_norm": 1.2250595092773438, |
| "learning_rate": 4.5331695331695335e-05, |
| "loss": 0.5530914306640625, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.3446379468377634, |
| "grad_norm": 1.249531865119934, |
| "learning_rate": 4.4103194103194104e-05, |
| "loss": 0.5281160831451416, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.3629697525206232, |
| "grad_norm": 1.1765642166137695, |
| "learning_rate": 4.287469287469288e-05, |
| "loss": 0.5416937351226807, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.381301558203483, |
| "grad_norm": 1.2973071336746216, |
| "learning_rate": 4.164619164619165e-05, |
| "loss": 0.5341888427734375, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.399633363886343, |
| "grad_norm": 1.3533828258514404, |
| "learning_rate": 4.0417690417690415e-05, |
| "loss": 0.5386404514312744, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.4179651695692024, |
| "grad_norm": 1.1323643922805786, |
| "learning_rate": 3.918918918918919e-05, |
| "loss": 0.5549521446228027, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.4362969752520622, |
| "grad_norm": 1.0967226028442383, |
| "learning_rate": 3.7960687960687965e-05, |
| "loss": 0.5668260097503662, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.454628780934922, |
| "grad_norm": 1.3874995708465576, |
| "learning_rate": 3.6732186732186734e-05, |
| "loss": 0.5530946254730225, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.472960586617782, |
| "grad_norm": 1.3139115571975708, |
| "learning_rate": 3.550368550368551e-05, |
| "loss": 0.544743013381958, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.4912923923006414, |
| "grad_norm": 1.3629847764968872, |
| "learning_rate": 3.427518427518428e-05, |
| "loss": 0.5550421714782715, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.5096241979835012, |
| "grad_norm": 1.3279292583465576, |
| "learning_rate": 3.3046683046683045e-05, |
| "loss": 0.5361227035522461, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.527956003666361, |
| "grad_norm": 1.3736717700958252, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.5256178855895997, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.546287809349221, |
| "grad_norm": 1.2405906915664673, |
| "learning_rate": 3.058968058968059e-05, |
| "loss": 0.5551014900207519, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.564619615032081, |
| "grad_norm": 1.2711869478225708, |
| "learning_rate": 2.9361179361179364e-05, |
| "loss": 0.549025011062622, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.5829514207149407, |
| "grad_norm": 1.1510083675384521, |
| "learning_rate": 2.8132678132678135e-05, |
| "loss": 0.5108777046203613, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.6012832263978, |
| "grad_norm": 1.2585129737854004, |
| "learning_rate": 2.6904176904176904e-05, |
| "loss": 0.545832633972168, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.61961503208066, |
| "grad_norm": 1.2926920652389526, |
| "learning_rate": 2.5675675675675675e-05, |
| "loss": 0.531532621383667, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.63794683776352, |
| "grad_norm": 1.1816222667694092, |
| "learning_rate": 2.4447174447174447e-05, |
| "loss": 0.4899789333343506, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.656278643446379, |
| "grad_norm": 1.3441561460494995, |
| "learning_rate": 2.3218673218673222e-05, |
| "loss": 0.5534125804901123, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.674610449129239, |
| "grad_norm": 1.5367056131362915, |
| "learning_rate": 2.199017199017199e-05, |
| "loss": 0.5469470500946045, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.692942254812099, |
| "grad_norm": 1.292490005493164, |
| "learning_rate": 2.0761670761670762e-05, |
| "loss": 0.5418555259704589, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.711274060494959, |
| "grad_norm": 1.2457395792007446, |
| "learning_rate": 1.9533169533169534e-05, |
| "loss": 0.5272214889526368, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.7296058661778186, |
| "grad_norm": 1.3204960823059082, |
| "learning_rate": 1.8304668304668305e-05, |
| "loss": 0.5287877559661865, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.7479376718606785, |
| "grad_norm": 1.0838243961334229, |
| "learning_rate": 1.7076167076167077e-05, |
| "loss": 0.49266462326049804, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.766269477543538, |
| "grad_norm": 1.3524028062820435, |
| "learning_rate": 1.584766584766585e-05, |
| "loss": 0.534552526473999, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.7846012832263978, |
| "grad_norm": 1.2434245347976685, |
| "learning_rate": 1.4619164619164619e-05, |
| "loss": 0.5303339004516602, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.8029330889092576, |
| "grad_norm": 1.4432783126831055, |
| "learning_rate": 1.339066339066339e-05, |
| "loss": 0.5432450771331787, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.8212648945921175, |
| "grad_norm": 1.372916340827942, |
| "learning_rate": 1.2162162162162164e-05, |
| "loss": 0.5449412345886231, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.839596700274977, |
| "grad_norm": 1.3015090227127075, |
| "learning_rate": 1.0933660933660935e-05, |
| "loss": 0.5642722129821778, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.8579285059578368, |
| "grad_norm": 1.434592366218567, |
| "learning_rate": 9.705159705159705e-06, |
| "loss": 0.5404855728149414, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.8762603116406966, |
| "grad_norm": 2.344008445739746, |
| "learning_rate": 8.476658476658477e-06, |
| "loss": 0.5066645622253418, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.8945921173235565, |
| "grad_norm": 1.0202550888061523, |
| "learning_rate": 7.2481572481572485e-06, |
| "loss": 0.5348256587982178, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.9129239230064163, |
| "grad_norm": 1.2149248123168945, |
| "learning_rate": 6.019656019656019e-06, |
| "loss": 0.4903108596801758, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.9312557286892758, |
| "grad_norm": 1.5240399837493896, |
| "learning_rate": 4.791154791154792e-06, |
| "loss": 0.5296618461608886, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.9495875343721356, |
| "grad_norm": 1.4258157014846802, |
| "learning_rate": 3.562653562653563e-06, |
| "loss": 0.49115524291992185, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.9679193400549955, |
| "grad_norm": 1.1255574226379395, |
| "learning_rate": 2.3341523341523343e-06, |
| "loss": 0.5161442279815673, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.9862511457378553, |
| "grad_norm": 1.365963101387024, |
| "learning_rate": 1.1056511056511056e-06, |
| "loss": 0.5332399368286133, |
| "step": 1630 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1638, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.469692570628813e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|