[ { "loss": 2.6827, "grad_norm": 5.949710369110107, "learning_rate": 1.267605633802817e-05, "epoch": 0.007067137809187279, "step": 10 }, { "loss": 1.9991, "grad_norm": 1.8221319913864136, "learning_rate": 2.676056338028169e-05, "epoch": 0.014134275618374558, "step": 20 }, { "loss": 1.7179, "grad_norm": 0.8600999712944031, "learning_rate": 4.0845070422535214e-05, "epoch": 0.02120141342756184, "step": 30 }, { "loss": 1.6038, "grad_norm": 0.7585324048995972, "learning_rate": 5.492957746478874e-05, "epoch": 0.028268551236749116, "step": 40 }, { "loss": 1.4924, "grad_norm": 0.7620086669921875, "learning_rate": 6.901408450704226e-05, "epoch": 0.0353356890459364, "step": 50 }, { "loss": 1.4182, "grad_norm": 0.6350599527359009, "learning_rate": 8.309859154929578e-05, "epoch": 0.04240282685512368, "step": 60 }, { "loss": 1.386, "grad_norm": 0.7683842778205872, "learning_rate": 9.718309859154931e-05, "epoch": 0.04946996466431095, "step": 70 }, { "loss": 1.3652, "grad_norm": 0.7968618273735046, "learning_rate": 0.00011126760563380282, "epoch": 0.05653710247349823, "step": 80 }, { "loss": 1.348, "grad_norm": 0.7063258290290833, "learning_rate": 0.00012535211267605635, "epoch": 0.0636042402826855, "step": 90 }, { "loss": 1.322, "grad_norm": 0.8496018052101135, "learning_rate": 0.00013943661971830987, "epoch": 0.0706713780918728, "step": 100 }, { "loss": 1.3119, "grad_norm": 0.8192782998085022, "learning_rate": 0.00015352112676056339, "epoch": 0.07773851590106007, "step": 110 }, { "loss": 1.2765, "grad_norm": 0.8431249260902405, "learning_rate": 0.0001676056338028169, "epoch": 0.08480565371024736, "step": 120 }, { "loss": 1.2576, "grad_norm": 0.6553122997283936, "learning_rate": 0.00018169014084507045, "epoch": 0.09187279151943463, "step": 130 }, { "loss": 1.2482, "grad_norm": 0.6763297915458679, "learning_rate": 0.00019577464788732396, "epoch": 0.0989399293286219, "step": 140 }, { "loss": 1.228, "grad_norm": 0.6940188407897949, "learning_rate": 0.00019999665339174013, "epoch": 0.10600706713780919, "step": 150 }, { "loss": 1.2421, "grad_norm": 0.688686728477478, "learning_rate": 0.00019998026238030888, "epoch": 0.11307420494699646, "step": 160 }, { "loss": 1.2332, "grad_norm": 0.7205774784088135, "learning_rate": 0.00019995021451869546, "epoch": 0.12014134275618374, "step": 170 }, { "loss": 1.2164, "grad_norm": 0.6336075067520142, "learning_rate": 0.00019990651391130147, "epoch": 0.127208480565371, "step": 180 }, { "loss": 1.2046, "grad_norm": 0.5959163308143616, "learning_rate": 0.00019984916652743156, "epoch": 0.13427561837455831, "step": 190 }, { "loss": 1.224, "grad_norm": 0.6479778289794922, "learning_rate": 0.00019977818020047817, "epoch": 0.1413427561837456, "step": 200 }, { "loss": 1.1952, "grad_norm": 0.6542138457298279, "learning_rate": 0.00019969356462685146, "epoch": 0.14840989399293286, "step": 210 }, { "loss": 1.1933, "grad_norm": 0.605870246887207, "learning_rate": 0.0001995953313646548, "epoch": 0.15547703180212014, "step": 220 }, { "loss": 1.1723, "grad_norm": 0.6159653067588806, "learning_rate": 0.0001994834938321061, "epoch": 0.1625441696113074, "step": 230 }, { "loss": 1.1641, "grad_norm": 0.614716649055481, "learning_rate": 0.00019935806730570488, "epoch": 0.1696113074204947, "step": 240 }, { "loss": 1.1679, "grad_norm": 0.6723307967185974, "learning_rate": 0.00019921906891814551, "epoch": 0.17667844522968199, "step": 250 }, { "loss": 1.1699, "grad_norm": 0.5286707878112793, "learning_rate": 0.000199066517655977, "epoch": 0.18374558303886926, "step": 260 }, { "loss": 1.1488, "grad_norm": 0.6386291980743408, "learning_rate": 0.00019890043435700954, "epoch": 0.19081272084805653, "step": 270 }, { "loss": 1.155, "grad_norm": 0.5279228091239929, "learning_rate": 0.00019872084170746829, "epoch": 0.1978798586572438, "step": 280 }, { "eval_loss": 1.1403530836105347, "eval_runtime": 57.7627, "eval_samples_per_second": 41.238, "eval_steps_per_second": 10.318, "epoch": 0.19929328621908127, "step": 282 }, { "loss": 1.1519, "grad_norm": 0.5783646106719971, "learning_rate": 0.0001985277642388941, "epoch": 0.2049469964664311, "step": 290 }, { "loss": 1.1458, "grad_norm": 0.5709810853004456, "learning_rate": 0.00019832122832479326, "epoch": 0.21201413427561838, "step": 300 }, { "loss": 1.1386, "grad_norm": 0.6111096143722534, "learning_rate": 0.0001981012621770344, "epoch": 0.21908127208480566, "step": 310 }, { "loss": 1.13, "grad_norm": 0.6752498745918274, "learning_rate": 0.00019786789584199524, "epoch": 0.22614840989399293, "step": 320 }, { "loss": 1.1317, "grad_norm": 0.590641975402832, "learning_rate": 0.00019762116119645818, "epoch": 0.2332155477031802, "step": 330 }, { "loss": 1.1375, "grad_norm": 0.5161224603652954, "learning_rate": 0.00019736109194325635, "epoch": 0.24028268551236748, "step": 340 }, { "loss": 1.1317, "grad_norm": 0.5282869338989258, "learning_rate": 0.00019708772360666957, "epoch": 0.24734982332155478, "step": 350 }, { "loss": 1.0828, "grad_norm": 0.6267129778862, "learning_rate": 0.00019680109352757227, "epoch": 0.254416961130742, "step": 360 }, { "loss": 1.1445, "grad_norm": 0.5746105313301086, "learning_rate": 0.0001965012408583327, "epoch": 0.26148409893992935, "step": 370 }, { "loss": 1.1146, "grad_norm": 0.6003304719924927, "learning_rate": 0.00019618820655746487, "epoch": 0.26855123674911663, "step": 380 }, { "loss": 1.1176, "grad_norm": 0.5761094689369202, "learning_rate": 0.0001958620333840339, "epoch": 0.2756183745583039, "step": 390 }, { "loss": 1.107, "grad_norm": 0.5851646661758423, "learning_rate": 0.00019552276589181522, "epoch": 0.2826855123674912, "step": 400 }, { "loss": 1.1305, "grad_norm": 0.603738009929657, "learning_rate": 0.00019517045042320892, "epoch": 0.28975265017667845, "step": 410 }, { "loss": 1.11, "grad_norm": 0.5267948508262634, "learning_rate": 0.00019480513510290934, "epoch": 0.2968197879858657, "step": 420 }, { "loss": 1.1154, "grad_norm": 0.569698691368103, "learning_rate": 0.00019442686983133168, "epoch": 0.303886925795053, "step": 430 }, { "loss": 1.0916, "grad_norm": 0.5906787514686584, "learning_rate": 0.0001940357062777956, "epoch": 0.31095406360424027, "step": 440 }, { "loss": 1.0965, "grad_norm": 0.563790500164032, "learning_rate": 0.0001936316978734676, "epoch": 0.31802120141342755, "step": 450 }, { "loss": 1.1157, "grad_norm": 0.5844557881355286, "learning_rate": 0.0001932148998040626, "epoch": 0.3250883392226148, "step": 460 }, { "loss": 1.0973, "grad_norm": 0.5148825645446777, "learning_rate": 0.00019278536900230563, "epoch": 0.3321554770318021, "step": 470 }, { "loss": 1.1128, "grad_norm": 0.5640899538993835, "learning_rate": 0.0001923431641401552, "epoch": 0.3392226148409894, "step": 480 }, { "loss": 1.1074, "grad_norm": 0.5463106036186218, "learning_rate": 0.00019188834562078902, "epoch": 0.3462897526501767, "step": 490 }, { "loss": 1.0837, "grad_norm": 0.5812387466430664, "learning_rate": 0.00019142097557035308, "epoch": 0.35335689045936397, "step": 500 }, { "loss": 1.0835, "grad_norm": 0.541191816329956, "learning_rate": 0.0001909411178294756, "epoch": 0.36042402826855124, "step": 510 }, { "loss": 1.0864, "grad_norm": 0.6021603941917419, "learning_rate": 0.0001904488379445466, "epoch": 0.3674911660777385, "step": 520 }, { "loss": 1.0724, "grad_norm": 0.6133444309234619, "learning_rate": 0.00018994420315876468, "epoch": 0.3745583038869258, "step": 530 }, { "loss": 1.0666, "grad_norm": 0.6168462038040161, "learning_rate": 0.0001894272824029518, "epoch": 0.38162544169611307, "step": 540 }, { "loss": 1.0812, "grad_norm": 0.5473210215568542, "learning_rate": 0.0001888981462861377, "epoch": 0.38869257950530034, "step": 550 }, { "loss": 1.0849, "grad_norm": 0.674217700958252, "learning_rate": 0.00018835686708591496, "epoch": 0.3957597173144876, "step": 560 }, { "eval_loss": 1.060619592666626, "eval_runtime": 43.4781, "eval_samples_per_second": 54.786, "eval_steps_per_second": 13.708, "epoch": 0.39858657243816253, "step": 564 }, { "loss": 1.0668, "grad_norm": 0.5787924528121948, "learning_rate": 0.00018780351873856627, "epoch": 0.4028268551236749, "step": 570 }, { "loss": 1.0726, "grad_norm": 0.564720869064331, "learning_rate": 0.00018723817682896515, "epoch": 0.4098939929328622, "step": 580 }, { "loss": 1.0729, "grad_norm": 0.5361018776893616, "learning_rate": 0.00018666091858025112, "epoch": 0.4169611307420495, "step": 590 }, { "loss": 1.0631, "grad_norm": 0.5667257905006409, "learning_rate": 0.0001860718228432817, "epoch": 0.42402826855123676, "step": 600 }, { "loss": 1.0603, "grad_norm": 0.5567010045051575, "learning_rate": 0.00018547097008586155, "epoch": 0.43109540636042404, "step": 610 }, { "loss": 1.0602, "grad_norm": 0.6078583002090454, "learning_rate": 0.00018485844238175095, "epoch": 0.4381625441696113, "step": 620 }, { "loss": 1.0553, "grad_norm": 0.5796458721160889, "learning_rate": 0.000184234323399455, "epoch": 0.4452296819787986, "step": 630 }, { "loss": 1.0742, "grad_norm": 0.5551220178604126, "learning_rate": 0.0001835986983907947, "epoch": 0.45229681978798586, "step": 640 }, { "loss": 1.0701, "grad_norm": 0.5546141862869263, "learning_rate": 0.00018295165417926207, "epoch": 0.45936395759717313, "step": 650 }, { "loss": 1.0495, "grad_norm": 0.752085268497467, "learning_rate": 0.00018229327914816052, "epoch": 0.4664310954063604, "step": 660 }, { "loss": 1.0539, "grad_norm": 0.514069676399231, "learning_rate": 0.00018162366322853191, "epoch": 0.4734982332155477, "step": 670 }, { "loss": 1.0476, "grad_norm": 0.545942485332489, "learning_rate": 0.00018094289788687245, "epoch": 0.48056537102473496, "step": 680 }, { "loss": 1.0412, "grad_norm": 0.5867109298706055, "learning_rate": 0.0001802510761126389, "epoch": 0.4876325088339223, "step": 690 }, { "loss": 1.0501, "grad_norm": 0.548149824142456, "learning_rate": 0.00017954829240554644, "epoch": 0.49469964664310956, "step": 700 }, { "loss": 1.0341, "grad_norm": 0.6045145988464355, "learning_rate": 0.00017883464276266064, "epoch": 0.5017667844522968, "step": 710 }, { "loss": 1.0359, "grad_norm": 0.5739872455596924, "learning_rate": 0.00017811022466528452, "epoch": 0.508833922261484, "step": 720 }, { "loss": 1.0478, "grad_norm": 0.6167759299278259, "learning_rate": 0.0001773751370656431, "epoch": 0.5159010600706714, "step": 730 }, { "loss": 1.077, "grad_norm": 0.5697989463806152, "learning_rate": 0.0001766294803733671, "epoch": 0.5229681978798587, "step": 740 }, { "loss": 1.0321, "grad_norm": 0.5935072302818298, "learning_rate": 0.0001758733564417773, "epoch": 0.5300353356890459, "step": 750 }, { "loss": 1.0477, "grad_norm": 0.571767270565033, "learning_rate": 0.00017510686855397176, "epoch": 0.5371024734982333, "step": 760 }, { "loss": 1.0524, "grad_norm": 0.6030092239379883, "learning_rate": 0.00017433012140871811, "epoch": 0.5441696113074205, "step": 770 }, { "loss": 1.0298, "grad_norm": 0.6298210024833679, "learning_rate": 0.00017354322110615188, "epoch": 0.5512367491166078, "step": 780 }, { "loss": 1.0295, "grad_norm": 0.5458575487136841, "learning_rate": 0.00017274627513328385, "epoch": 0.558303886925795, "step": 790 }, { "loss": 1.0273, "grad_norm": 0.5203152298927307, "learning_rate": 0.00017193939234931777, "epoch": 0.5653710247349824, "step": 800 }, { "loss": 1.0368, "grad_norm": 0.6097717881202698, "learning_rate": 0.00017112268297078077, "epoch": 0.5724381625441696, "step": 810 }, { "loss": 1.0248, "grad_norm": 0.5902014374732971, "learning_rate": 0.0001702962585564681, "epoch": 0.5795053003533569, "step": 820 }, { "loss": 1.0522, "grad_norm": 0.5153626799583435, "learning_rate": 0.00016946023199220487, "epoch": 0.5865724381625441, "step": 830 }, { "loss": 1.0479, "grad_norm": 0.5805534720420837, "learning_rate": 0.0001686147174754263, "epoch": 0.5936395759717314, "step": 840 }, { "eval_loss": 1.0175507068634033, "eval_runtime": 43.4082, "eval_samples_per_second": 54.874, "eval_steps_per_second": 13.73, "epoch": 0.5978798586572438, "step": 846 }, { "loss": 1.0148, "grad_norm": 0.5587823390960693, "learning_rate": 0.00016775983049957887, "epoch": 0.6007067137809188, "step": 850 }, { "loss": 1.0049, "grad_norm": 0.5274335741996765, "learning_rate": 0.0001668956878383445, "epoch": 0.607773851590106, "step": 860 }, { "loss": 1.0066, "grad_norm": 0.5678777694702148, "learning_rate": 0.0001660224075296896, "epoch": 0.6148409893992933, "step": 870 }, { "loss": 1.0076, "grad_norm": 0.5417252779006958, "learning_rate": 0.00016514010885974184, "epoch": 0.6219081272084805, "step": 880 }, { "loss": 1.026, "grad_norm": 0.5451430678367615, "learning_rate": 0.00016424891234649618, "epoch": 0.6289752650176679, "step": 890 }, { "loss": 1.007, "grad_norm": 0.5552809834480286, "learning_rate": 0.00016334893972335247, "epoch": 0.6360424028268551, "step": 900 }, { "loss": 1.0216, "grad_norm": 0.5455328226089478, "learning_rate": 0.00016244031392248748, "epoch": 0.6431095406360424, "step": 910 }, { "loss": 1.0191, "grad_norm": 0.5708609223365784, "learning_rate": 0.00016152315905806268, "epoch": 0.6501766784452296, "step": 920 }, { "loss": 1.029, "grad_norm": 0.5028846859931946, "learning_rate": 0.00016059760040927103, "epoch": 0.657243816254417, "step": 930 }, { "loss": 1.0394, "grad_norm": 0.5415972471237183, "learning_rate": 0.0001596637644032242, "epoch": 0.6643109540636042, "step": 940 }, { "loss": 1.0208, "grad_norm": 0.5780535936355591, "learning_rate": 0.00015872177859768333, "epoch": 0.6713780918727915, "step": 950 }, { "loss": 1.006, "grad_norm": 0.585171103477478, "learning_rate": 0.00015777177166363527, "epoch": 0.6784452296819788, "step": 960 }, { "loss": 0.9989, "grad_norm": 0.5906310677528381, "learning_rate": 0.00015681387336771656, "epoch": 0.6855123674911661, "step": 970 }, { "loss": 1.0085, "grad_norm": 0.5665215849876404, "learning_rate": 0.0001558482145544879, "epoch": 0.6925795053003534, "step": 980 }, { "loss": 1.0169, "grad_norm": 0.5841283798217773, "learning_rate": 0.0001548749271285616, "epoch": 0.6996466431095406, "step": 990 }, { "loss": 1.0107, "grad_norm": 0.508304238319397, "learning_rate": 0.0001538941440365837, "epoch": 0.7067137809187279, "step": 1000 }, { "loss": 1.0042, "grad_norm": 0.5774527192115784, "learning_rate": 0.00015290599924907433, "epoch": 0.7137809187279152, "step": 1010 }, { "loss": 1.0105, "grad_norm": 0.6468122005462646, "learning_rate": 0.00015191062774212773, "epoch": 0.7208480565371025, "step": 1020 }, { "loss": 1.0069, "grad_norm": 0.6112880110740662, "learning_rate": 0.0001509081654789753, "epoch": 0.7279151943462897, "step": 1030 }, { "loss": 0.9936, "grad_norm": 0.7022767066955566, "learning_rate": 0.00014989874939141351, "epoch": 0.734982332155477, "step": 1040 }, { "loss": 1.0231, "grad_norm": 0.5384315848350525, "learning_rate": 0.0001488825173610997, "epoch": 0.7420494699646644, "step": 1050 }, { "loss": 0.9907, "grad_norm": 0.5737187266349792, "learning_rate": 0.0001478596082007181, "epoch": 0.7491166077738516, "step": 1060 }, { "loss": 1.0167, "grad_norm": 0.5474862456321716, "learning_rate": 0.00014683016163501855, "epoch": 0.7561837455830389, "step": 1070 }, { "loss": 0.9965, "grad_norm": 0.580353319644928, "learning_rate": 0.0001457943182817308, "epoch": 0.7632508833922261, "step": 1080 }, { "loss": 1.02, "grad_norm": 0.5732952952384949, "learning_rate": 0.00014475221963235687, "epoch": 0.7703180212014135, "step": 1090 }, { "loss": 1.0026, "grad_norm": 0.5824176669120789, "learning_rate": 0.00014370400803284374, "epoch": 0.7773851590106007, "step": 1100 }, { "loss": 1.0063, "grad_norm": 0.5431606769561768, "learning_rate": 0.00014264982666413958, "epoch": 0.784452296819788, "step": 1110 }, { "loss": 1.0196, "grad_norm": 0.5714312791824341, "learning_rate": 0.00014158981952263608, "epoch": 0.7915194346289752, "step": 1120 }, { "eval_loss": 0.9868658185005188, "eval_runtime": 43.5605, "eval_samples_per_second": 54.683, "eval_steps_per_second": 13.682, "epoch": 0.7971731448763251, "step": 1128 }, { "loss": 1.0083, "grad_norm": 0.5832623839378357, "learning_rate": 0.000140524131400499, "epoch": 0.7985865724381626, "step": 1130 }, { "loss": 1.0142, "grad_norm": 0.5128731727600098, "learning_rate": 0.00013945290786589027, "epoch": 0.8056537102473498, "step": 1140 }, { "loss": 0.9984, "grad_norm": 0.5578513145446777, "learning_rate": 0.00013837629524308408, "epoch": 0.8127208480565371, "step": 1150 }, { "loss": 0.9849, "grad_norm": 0.6349474191665649, "learning_rate": 0.00013729444059247954, "epoch": 0.8197879858657244, "step": 1160 }, { "loss": 1.0082, "grad_norm": 0.5942397713661194, "learning_rate": 0.00013620749169051307, "epoch": 0.8268551236749117, "step": 1170 }, { "loss": 0.9698, "grad_norm": 0.5328270196914673, "learning_rate": 0.00013511559700947264, "epoch": 0.833922261484099, "step": 1180 }, { "loss": 1.0041, "grad_norm": 0.5367234945297241, "learning_rate": 0.00013401890569721725, "epoch": 0.8409893992932862, "step": 1190 }, { "loss": 0.9784, "grad_norm": 0.5053948163986206, "learning_rate": 0.00013291756755680388, "epoch": 0.8480565371024735, "step": 1200 }, { "loss": 0.9821, "grad_norm": 0.6247851252555847, "learning_rate": 0.00013181173302602528, "epoch": 0.8551236749116607, "step": 1210 }, { "loss": 0.9842, "grad_norm": 0.6302320957183838, "learning_rate": 0.0001307015531568606, "epoch": 0.8621908127208481, "step": 1220 }, { "loss": 0.9794, "grad_norm": 0.48732852935791016, "learning_rate": 0.00012958717959484254, "epoch": 0.8692579505300353, "step": 1230 }, { "loss": 1.0007, "grad_norm": 0.5459453463554382, "learning_rate": 0.0001284687645583432, "epoch": 0.8763250883392226, "step": 1240 }, { "loss": 0.978, "grad_norm": 0.6455541253089905, "learning_rate": 0.0001273464608177818, "epoch": 0.8833922261484098, "step": 1250 }, { "loss": 0.9907, "grad_norm": 0.5769440531730652, "learning_rate": 0.00012622042167475693, "epoch": 0.8904593639575972, "step": 1260 }, { "loss": 0.9888, "grad_norm": 0.5793958306312561, "learning_rate": 0.00012509080094110604, "epoch": 0.8975265017667845, "step": 1270 }, { "loss": 0.983, "grad_norm": 0.5198792815208435, "learning_rate": 0.00012395775291789568, "epoch": 0.9045936395759717, "step": 1280 }, { "loss": 1.0023, "grad_norm": 0.5861442685127258, "learning_rate": 0.00012282143237434478, "epoch": 0.911660777385159, "step": 1290 }, { "loss": 0.9725, "grad_norm": 0.594825029373169, "learning_rate": 0.00012168199452668341, "epoch": 0.9187279151943463, "step": 1300 }, { "loss": 0.987, "grad_norm": 0.5613613128662109, "learning_rate": 0.00012053959501695145, "epoch": 0.9257950530035336, "step": 1310 }, { "loss": 0.9891, "grad_norm": 0.5798191428184509, "learning_rate": 0.00011939438989173828, "epoch": 0.9328621908127208, "step": 1320 }, { "loss": 0.9891, "grad_norm": 0.5428863763809204, "learning_rate": 0.00011824653558086769, "epoch": 0.9399293286219081, "step": 1330 }, { "loss": 0.9865, "grad_norm": 0.5526650547981262, "learning_rate": 0.00011709618887603014, "epoch": 0.9469964664310954, "step": 1340 }, { "loss": 0.958, "grad_norm": 0.6232343316078186, "learning_rate": 0.00011594350690936581, "epoch": 0.9540636042402827, "step": 1350 }, { "loss": 1.0113, "grad_norm": 0.6425083875656128, "learning_rate": 0.00011478864713200113, "epoch": 0.9611307420494699, "step": 1360 }, { "loss": 0.9683, "grad_norm": 0.5667459964752197, "learning_rate": 0.00011363176729254146, "epoch": 0.9681978798586572, "step": 1370 }, { "loss": 0.9884, "grad_norm": 0.5597785115242004, "learning_rate": 0.00011247302541552359, "epoch": 0.9752650176678446, "step": 1380 }, { "loss": 0.9724, "grad_norm": 0.538309633731842, "learning_rate": 0.00011131257977983014, "epoch": 0.9823321554770318, "step": 1390 }, { "loss": 0.9734, "grad_norm": 0.5398442149162292, "learning_rate": 0.00011015058889706942, "epoch": 0.9893992932862191, "step": 1400 }, { "loss": 0.9643, "grad_norm": 0.5224325656890869, "learning_rate": 0.00010898721148992351, "epoch": 0.9964664310954063, "step": 1410 }, { "eval_loss": 0.9602861404418945, "eval_runtime": 43.2203, "eval_samples_per_second": 55.113, "eval_steps_per_second": 13.79, "epoch": 0.9964664310954063, "step": 1410 }, { "loss": 0.937, "grad_norm": 0.5699999928474426, "learning_rate": 0.00010782260647046742, "epoch": 1.0035335689045937, "step": 1420 }, { "loss": 0.8954, "grad_norm": 0.5502023100852966, "learning_rate": 0.00010665693291846244, "epoch": 1.010600706713781, "step": 1430 }, { "loss": 0.9035, "grad_norm": 0.5710829496383667, "learning_rate": 0.00010549035005962653, "epoch": 1.017667844522968, "step": 1440 }, { "loss": 0.9071, "grad_norm": 0.5481554865837097, "learning_rate": 0.00010432301724388485, "epoch": 1.0247349823321554, "step": 1450 }, { "loss": 0.9024, "grad_norm": 0.5516000986099243, "learning_rate": 0.0001031550939236033, "epoch": 1.0318021201413428, "step": 1460 }, { "loss": 0.912, "grad_norm": 0.5710824728012085, "learning_rate": 0.00010198673963180796, "epoch": 1.03886925795053, "step": 1470 }, { "loss": 0.883, "grad_norm": 0.5751714110374451, "learning_rate": 0.00010081811396039373, "epoch": 1.0459363957597174, "step": 1480 }, { "loss": 0.8968, "grad_norm": 0.6338666081428528, "learning_rate": 9.964937653832468e-05, "epoch": 1.0530035335689045, "step": 1490 }, { "loss": 0.9077, "grad_norm": 0.5074037313461304, "learning_rate": 9.848068700982955e-05, "epoch": 1.0600706713780919, "step": 1500 }, { "loss": 0.908, "grad_norm": 0.5829271674156189, "learning_rate": 9.731220501259501e-05, "epoch": 1.0671378091872792, "step": 1510 }, { "loss": 0.8943, "grad_norm": 0.5685243606567383, "learning_rate": 9.614409015595995e-05, "epoch": 1.0742049469964665, "step": 1520 }, { "loss": 0.8869, "grad_norm": 0.5944189429283142, "learning_rate": 9.497650199911341e-05, "epoch": 1.0812720848056536, "step": 1530 }, { "loss": 0.9062, "grad_norm": 0.5707447528839111, "learning_rate": 9.380960002929979e-05, "epoch": 1.088339222614841, "step": 1540 }, { "loss": 0.8912, "grad_norm": 0.5999617576599121, "learning_rate": 9.264354364003327e-05, "epoch": 1.0954063604240283, "step": 1550 }, { "loss": 0.9002, "grad_norm": 0.5612591505050659, "learning_rate": 9.147849210932571e-05, "epoch": 1.1024734982332156, "step": 1560 }, { "loss": 0.9025, "grad_norm": 0.6082556247711182, "learning_rate": 9.031460457792982e-05, "epoch": 1.1095406360424027, "step": 1570 }, { "loss": 0.9286, "grad_norm": 0.562528133392334, "learning_rate": 8.915204002760122e-05, "epoch": 1.11660777385159, "step": 1580 }, { "loss": 0.9022, "grad_norm": 0.5675996541976929, "learning_rate": 8.799095725938243e-05, "epoch": 1.1236749116607774, "step": 1590 }, { "loss": 0.8905, "grad_norm": 0.577520489692688, "learning_rate": 8.68315148719111e-05, "epoch": 1.1307420494699647, "step": 1600 }, { "loss": 0.9199, "grad_norm": 0.557808518409729, "learning_rate": 8.567387123975648e-05, "epoch": 1.137809187279152, "step": 1610 }, { "loss": 0.8856, "grad_norm": 0.6074324250221252, "learning_rate": 8.451818449178591e-05, "epoch": 1.1448763250883391, "step": 1620 }, { "loss": 0.8989, "grad_norm": 0.5722407102584839, "learning_rate": 8.336461248956522e-05, "epoch": 1.1519434628975265, "step": 1630 }, { "loss": 0.8872, "grad_norm": 0.5413146615028381, "learning_rate": 8.221331280579564e-05, "epoch": 1.1590106007067138, "step": 1640 }, { "loss": 0.8896, "grad_norm": 0.569531261920929, "learning_rate": 8.106444270278999e-05, "epoch": 1.1660777385159011, "step": 1650 }, { "loss": 0.905, "grad_norm": 0.5654504299163818, "learning_rate": 7.991815911099126e-05, "epoch": 1.1731448763250882, "step": 1660 }, { "loss": 0.9112, "grad_norm": 0.5890611410140991, "learning_rate": 7.877461860753697e-05, "epoch": 1.1802120141342756, "step": 1670 }, { "loss": 0.8988, "grad_norm": 0.5833044052124023, "learning_rate": 7.763397739487098e-05, "epoch": 1.187279151943463, "step": 1680 }, { "loss": 0.9063, "grad_norm": 0.6019202470779419, "learning_rate": 7.649639127940735e-05, "epoch": 1.1943462897526502, "step": 1690 }, { "eval_loss": 0.947350263595581, "eval_runtime": 43.7059, "eval_samples_per_second": 54.501, "eval_steps_per_second": 13.637, "epoch": 1.1957597173144876, "step": 1692 }, { "loss": 0.8976, "grad_norm": 0.5837628841400146, "learning_rate": 7.536201565024767e-05, "epoch": 1.2014134275618376, "step": 1700 }, { "loss": 0.8992, "grad_norm": 0.8743357062339783, "learning_rate": 7.423100545795565e-05, "epoch": 1.2084805653710247, "step": 1710 }, { "loss": 0.8933, "grad_norm": 0.6327987313270569, "learning_rate": 7.310351519339165e-05, "epoch": 1.215547703180212, "step": 1720 }, { "loss": 0.9088, "grad_norm": 0.5537572503089905, "learning_rate": 7.197969886660984e-05, "epoch": 1.2226148409893993, "step": 1730 }, { "loss": 0.8975, "grad_norm": 0.5983340740203857, "learning_rate": 7.085970998582112e-05, "epoch": 1.2296819787985867, "step": 1740 }, { "loss": 0.9015, "grad_norm": 0.5243608355522156, "learning_rate": 6.974370153642468e-05, "epoch": 1.2367491166077738, "step": 1750 }, { "loss": 0.8847, "grad_norm": 0.5976786017417908, "learning_rate": 6.863182596011087e-05, "epoch": 1.243816254416961, "step": 1760 }, { "loss": 0.8904, "grad_norm": 0.5367252826690674, "learning_rate": 6.752423513403824e-05, "epoch": 1.2508833922261484, "step": 1770 }, { "loss": 0.9066, "grad_norm": 0.5782431960105896, "learning_rate": 6.642108035008803e-05, "epoch": 1.2579505300353357, "step": 1780 }, { "loss": 0.8915, "grad_norm": 0.6352420449256897, "learning_rate": 6.53225122941981e-05, "epoch": 1.265017667844523, "step": 1790 }, { "loss": 0.8916, "grad_norm": 0.6211472749710083, "learning_rate": 6.422868102578018e-05, "epoch": 1.2720848056537102, "step": 1800 }, { "loss": 0.8769, "grad_norm": 0.5358094573020935, "learning_rate": 6.31397359572223e-05, "epoch": 1.2791519434628975, "step": 1810 }, { "loss": 0.8657, "grad_norm": 0.5707045197486877, "learning_rate": 6.205582583347974e-05, "epoch": 1.2862190812720848, "step": 1820 }, { "loss": 0.873, "grad_norm": 0.6060373187065125, "learning_rate": 6.097709871175723e-05, "epoch": 1.293286219081272, "step": 1830 }, { "loss": 0.8954, "grad_norm": 0.5479307174682617, "learning_rate": 5.990370194128479e-05, "epoch": 1.3003533568904593, "step": 1840 }, { "loss": 0.8849, "grad_norm": 0.5866280794143677, "learning_rate": 5.88357821431908e-05, "epoch": 1.3074204946996466, "step": 1850 }, { "loss": 0.882, "grad_norm": 0.6369838714599609, "learning_rate": 5.7773485190474044e-05, "epoch": 1.314487632508834, "step": 1860 }, { "loss": 0.8863, "grad_norm": 0.5785922408103943, "learning_rate": 5.671695618807802e-05, "epoch": 1.3215547703180213, "step": 1870 }, { "loss": 0.9007, "grad_norm": 0.6535210609436035, "learning_rate": 5.566633945307052e-05, "epoch": 1.3286219081272086, "step": 1880 }, { "loss": 0.8863, "grad_norm": 0.5599955320358276, "learning_rate": 5.4621778494930397e-05, "epoch": 1.3356890459363957, "step": 1890 }, { "loss": 0.8949, "grad_norm": 0.6138301491737366, "learning_rate": 5.358341599594483e-05, "epoch": 1.342756183745583, "step": 1900 }, { "loss": 0.8629, "grad_norm": 0.5357041358947754, "learning_rate": 5.255139379171967e-05, "epoch": 1.3498233215547704, "step": 1910 }, { "loss": 0.8977, "grad_norm": 0.5802581310272217, "learning_rate": 5.152585285180517e-05, "epoch": 1.3568904593639575, "step": 1920 }, { "loss": 0.8614, "grad_norm": 0.6308375597000122, "learning_rate": 5.050693326044036e-05, "epoch": 1.3639575971731448, "step": 1930 }, { "loss": 0.8656, "grad_norm": 0.6286219358444214, "learning_rate": 4.949477419741814e-05, "epoch": 1.3710247349823321, "step": 1940 }, { "loss": 0.8775, "grad_norm": 0.5799056887626648, "learning_rate": 4.848951391907377e-05, "epoch": 1.3780918727915195, "step": 1950 }, { "loss": 0.8773, "grad_norm": 0.5816202163696289, "learning_rate": 4.749128973940001e-05, "epoch": 1.3851590106007068, "step": 1960 }, { "loss": 0.9143, "grad_norm": 0.540955126285553, "learning_rate": 4.6500238011290295e-05, "epoch": 1.3922261484098941, "step": 1970 }, { "eval_loss": 0.931702196598053, "eval_runtime": 43.3565, "eval_samples_per_second": 54.94, "eval_steps_per_second": 13.747, "epoch": 1.3950530035335689, "step": 1974 }, { "loss": 0.8762, "grad_norm": 0.577583372592926, "learning_rate": 4.551649410791384e-05, "epoch": 1.3992932862190812, "step": 1980 }, { "loss": 0.8779, "grad_norm": 0.5985121130943298, "learning_rate": 4.454019240422412e-05, "epoch": 1.4063604240282686, "step": 1990 }, { "loss": 0.8721, "grad_norm": 0.6050156354904175, "learning_rate": 4.357146625860391e-05, "epoch": 1.4134275618374559, "step": 2000 }, { "loss": 0.9011, "grad_norm": 0.5412665605545044, "learning_rate": 4.261044799464915e-05, "epoch": 1.420494699646643, "step": 2010 }, { "loss": 0.8794, "grad_norm": 0.5395935773849487, "learning_rate": 4.165726888309402e-05, "epoch": 1.4275618374558303, "step": 2020 }, { "loss": 0.9012, "grad_norm": 0.5516586899757385, "learning_rate": 4.0712059123880155e-05, "epoch": 1.4346289752650176, "step": 2030 }, { "loss": 0.874, "grad_norm": 0.5666341781616211, "learning_rate": 3.977494782837182e-05, "epoch": 1.441696113074205, "step": 2040 }, { "loss": 0.8642, "grad_norm": 0.6185910701751709, "learning_rate": 3.884606300171979e-05, "epoch": 1.4487632508833923, "step": 2050 }, { "loss": 0.8902, "grad_norm": 0.5560716390609741, "learning_rate": 3.7925531525376623e-05, "epoch": 1.4558303886925796, "step": 2060 }, { "loss": 0.8892, "grad_norm": 0.6083916425704956, "learning_rate": 3.7013479139765115e-05, "epoch": 1.4628975265017667, "step": 2070 }, { "loss": 0.8664, "grad_norm": 0.5855932235717773, "learning_rate": 3.611003042710266e-05, "epoch": 1.469964664310954, "step": 2080 }, { "loss": 0.8864, "grad_norm": 0.5607093572616577, "learning_rate": 3.521530879438407e-05, "epoch": 1.4770318021201414, "step": 2090 }, { "loss": 0.8797, "grad_norm": 0.6089926958084106, "learning_rate": 3.432943645652453e-05, "epoch": 1.4840989399293285, "step": 2100 }, { "loss": 0.8788, "grad_norm": 0.5762563347816467, "learning_rate": 3.345253441966579e-05, "epoch": 1.4911660777385158, "step": 2110 }, { "loss": 0.8829, "grad_norm": 0.6184695959091187, "learning_rate": 3.258472246464717e-05, "epoch": 1.4982332155477032, "step": 2120 }, { "loss": 0.886, "grad_norm": 0.6246795058250427, "learning_rate": 3.172611913064402e-05, "epoch": 1.5053003533568905, "step": 2130 }, { "loss": 0.8844, "grad_norm": 0.5603175759315491, "learning_rate": 3.087684169897588e-05, "epoch": 1.5123674911660778, "step": 2140 }, { "loss": 0.8683, "grad_norm": 0.584713876247406, "learning_rate": 3.0037006177086346e-05, "epoch": 1.5194346289752652, "step": 2150 }, { "loss": 0.8423, "grad_norm": 0.588035523891449, "learning_rate": 2.920672728269692e-05, "epoch": 1.5265017667844523, "step": 2160 }, { "loss": 0.8753, "grad_norm": 0.7277367115020752, "learning_rate": 2.8386118428137254e-05, "epoch": 1.5335689045936396, "step": 2170 }, { "loss": 0.8689, "grad_norm": 0.7008647322654724, "learning_rate": 2.7575291704853323e-05, "epoch": 1.5406360424028267, "step": 2180 }, { "loss": 0.8794, "grad_norm": 0.6551691293716431, "learning_rate": 2.6774357868096432e-05, "epoch": 1.547703180212014, "step": 2190 }, { "loss": 0.8603, "grad_norm": 0.6580778360366821, "learning_rate": 2.5983426321794502e-05, "epoch": 1.5547703180212014, "step": 2200 }, { "loss": 0.8857, "grad_norm": 0.5846377611160278, "learning_rate": 2.5202605103607835e-05, "epoch": 1.5618374558303887, "step": 2210 }, { "loss": 0.8651, "grad_norm": 0.6134793162345886, "learning_rate": 2.443200087017192e-05, "epoch": 1.568904593639576, "step": 2220 }, { "loss": 0.8777, "grad_norm": 0.5883351564407349, "learning_rate": 2.3671718882528437e-05, "epoch": 1.5759717314487633, "step": 2230 }, { "loss": 0.8675, "grad_norm": 0.623563289642334, "learning_rate": 2.292186299174712e-05, "epoch": 1.5830388692579507, "step": 2240 }, { "loss": 0.8723, "grad_norm": 0.641573965549469, "learning_rate": 2.218253562474023e-05, "epoch": 1.5901060070671378, "step": 2250 }, { "eval_loss": 0.9208451509475708, "eval_runtime": 43.8208, "eval_samples_per_second": 54.358, "eval_steps_per_second": 13.601, "epoch": 1.5943462897526501, "step": 2256 }, { "loss": 0.8811, "grad_norm": 0.5773902535438538, "learning_rate": 2.1453837770271334e-05, "epoch": 1.5971731448763251, "step": 2260 }, { "loss": 0.8775, "grad_norm": 0.5559404492378235, "learning_rate": 2.0735868965160953e-05, "epoch": 1.6042402826855122, "step": 2270 }, { "loss": 0.8719, "grad_norm": 0.5846749544143677, "learning_rate": 2.0028727280690107e-05, "epoch": 1.6113074204946995, "step": 2280 }, { "loss": 0.8721, "grad_norm": 0.6338834166526794, "learning_rate": 1.9332509309204183e-05, "epoch": 1.6183745583038869, "step": 2290 }, { "loss": 0.8764, "grad_norm": 0.5955981016159058, "learning_rate": 1.8647310150919083e-05, "epoch": 1.6254416961130742, "step": 2300 }, { "loss": 0.8706, "grad_norm": 0.5912985801696777, "learning_rate": 1.797322340093067e-05, "epoch": 1.6325088339222615, "step": 2310 }, { "loss": 0.8508, "grad_norm": 0.649738073348999, "learning_rate": 1.7310341136430385e-05, "epoch": 1.6395759717314489, "step": 2320 }, { "loss": 0.8796, "grad_norm": 0.6230898499488831, "learning_rate": 1.6658753904127734e-05, "epoch": 1.6466431095406362, "step": 2330 }, { "loss": 0.8691, "grad_norm": 0.6647538542747498, "learning_rate": 1.6018550707882062e-05, "epoch": 1.6537102473498233, "step": 2340 }, { "loss": 0.8804, "grad_norm": 0.6123314499855042, "learning_rate": 1.538981899654508e-05, "epoch": 1.6607773851590106, "step": 2350 }, { "loss": 0.87, "grad_norm": 0.6355293393135071, "learning_rate": 1.477264465201572e-05, "epoch": 1.6678445229681977, "step": 2360 }, { "loss": 0.8661, "grad_norm": 0.5558314323425293, "learning_rate": 1.4167111977508973e-05, "epoch": 1.674911660777385, "step": 2370 }, { "loss": 0.8525, "grad_norm": 0.5967123508453369, "learning_rate": 1.3573303686040628e-05, "epoch": 1.6819787985865724, "step": 2380 }, { "loss": 0.8647, "grad_norm": 0.5779529213905334, "learning_rate": 1.2991300889128866e-05, "epoch": 1.6890459363957597, "step": 2390 }, { "loss": 0.8794, "grad_norm": 0.5755199790000916, "learning_rate": 1.2421183085714927e-05, "epoch": 1.696113074204947, "step": 2400 }, { "loss": 0.861, "grad_norm": 0.5797527432441711, "learning_rate": 1.1863028151303879e-05, "epoch": 1.7031802120141344, "step": 2410 }, { "loss": 0.8714, "grad_norm": 0.5963758826255798, "learning_rate": 1.13169123273271e-05, "epoch": 1.7102473498233217, "step": 2420 }, { "loss": 0.8771, "grad_norm": 0.5919273495674133, "learning_rate": 1.078291021072817e-05, "epoch": 1.7173144876325088, "step": 2430 }, { "loss": 0.8706, "grad_norm": 0.5632163882255554, "learning_rate": 1.0261094743773203e-05, "epoch": 1.7243816254416962, "step": 2440 }, { "loss": 0.8718, "grad_norm": 0.5939465761184692, "learning_rate": 9.751537204087258e-06, "epoch": 1.7314487632508833, "step": 2450 }, { "loss": 0.8648, "grad_norm": 0.5735843777656555, "learning_rate": 9.254307194918144e-06, "epoch": 1.7385159010600706, "step": 2460 }, { "loss": 0.8638, "grad_norm": 0.5408446192741394, "learning_rate": 8.769472635628905e-06, "epoch": 1.745583038869258, "step": 2470 }, { "loss": 0.8692, "grad_norm": 0.5887193083763123, "learning_rate": 8.297099752420446e-06, "epoch": 1.7526501766784452, "step": 2480 }, { "loss": 0.8852, "grad_norm": 0.5828359127044678, "learning_rate": 7.837253069285234e-06, "epoch": 1.7597173144876326, "step": 2490 }, { "loss": 0.8688, "grad_norm": 0.5949276685714722, "learning_rate": 7.389995399193595e-06, "epoch": 1.76678445229682, "step": 2500 }, { "loss": 0.8574, "grad_norm": 0.5560010075569153, "learning_rate": 6.9553878355138936e-06, "epoch": 1.773851590106007, "step": 2510 }, { "loss": 0.8739, "grad_norm": 0.6576620936393738, "learning_rate": 6.5334897436672535e-06, "epoch": 1.7809187279151943, "step": 2520 }, { "loss": 0.8664, "grad_norm": 0.6786220669746399, "learning_rate": 6.124358753018689e-06, "epoch": 1.7879858657243817, "step": 2530 }, { "eval_loss": 0.9149619936943054, "eval_runtime": 46.9648, "eval_samples_per_second": 50.719, "eval_steps_per_second": 12.69, "epoch": 1.7936395759717314, "step": 2538 }, { "loss": 0.8505, "grad_norm": 0.5556807518005371, "learning_rate": 5.7280507490050985e-06, "epoch": 1.7950530035335688, "step": 2540 }, { "loss": 0.8581, "grad_norm": 0.6154960989952087, "learning_rate": 5.3446198655015765e-06, "epoch": 1.802120141342756, "step": 2550 }, { "loss": 0.8708, "grad_norm": 0.6271000504493713, "learning_rate": 4.974118477426992e-06, "epoch": 1.8091872791519434, "step": 2560 }, { "loss": 0.8619, "grad_norm": 0.718324601650238, "learning_rate": 4.616597193589833e-06, "epoch": 1.8162544169611308, "step": 2570 }, { "loss": 0.8619, "grad_norm": 0.5940812230110168, "learning_rate": 4.272104849775216e-06, "epoch": 1.823321554770318, "step": 2580 }, { "loss": 0.8621, "grad_norm": 0.539251983165741, "learning_rate": 3.940688502074186e-06, "epoch": 1.8303886925795054, "step": 2590 }, { "loss": 0.8574, "grad_norm": 0.6987637281417847, "learning_rate": 3.622393420456016e-06, "epoch": 1.8374558303886925, "step": 2600 }, { "loss": 0.8659, "grad_norm": 0.5573965311050415, "learning_rate": 3.3172630825846095e-06, "epoch": 1.8445229681978799, "step": 2610 }, { "loss": 0.8674, "grad_norm": 0.5907488465309143, "learning_rate": 3.025339167879615e-06, "epoch": 1.851590106007067, "step": 2620 }, { "loss": 0.8551, "grad_norm": 0.6165907382965088, "learning_rate": 2.7466615518231486e-06, "epoch": 1.8586572438162543, "step": 2630 }, { "loss": 0.8617, "grad_norm": 0.5819846391677856, "learning_rate": 2.4812683005130843e-06, "epoch": 1.8657243816254416, "step": 2640 }, { "loss": 0.8544, "grad_norm": 0.5709139108657837, "learning_rate": 2.229195665463324e-06, "epoch": 1.872791519434629, "step": 2650 }, { "loss": 0.8475, "grad_norm": 0.687758207321167, "learning_rate": 1.990478078652047e-06, "epoch": 1.8798586572438163, "step": 2660 }, { "loss": 0.8617, "grad_norm": 0.5966079831123352, "learning_rate": 1.7651481478184296e-06, "epoch": 1.8869257950530036, "step": 2670 }, { "loss": 0.8681, "grad_norm": 0.5431429743766785, "learning_rate": 1.553236652008605e-06, "epoch": 1.893992932862191, "step": 2680 }, { "loss": 0.8618, "grad_norm": 0.6142215728759766, "learning_rate": 1.3547725373713405e-06, "epoch": 1.901060070671378, "step": 2690 }, { "loss": 0.8654, "grad_norm": 0.5876252055168152, "learning_rate": 1.169782913204176e-06, "epoch": 1.9081272084805654, "step": 2700 }, { "loss": 0.8492, "grad_norm": 0.5388440489768982, "learning_rate": 9.98293048250376e-07, "epoch": 1.9151943462897525, "step": 2710 }, { "loss": 0.8666, "grad_norm": 0.6419453620910645, "learning_rate": 8.403263672473793e-07, "epoch": 1.9222614840989398, "step": 2720 }, { "loss": 0.8654, "grad_norm": 0.5551348328590393, "learning_rate": 6.959044477270138e-07, "epoch": 1.9293286219081272, "step": 2730 }, { "loss": 0.8681, "grad_norm": 0.5783069133758545, "learning_rate": 5.650470170681876e-07, "epoch": 1.9363957597173145, "step": 2740 }, { "loss": 0.8661, "grad_norm": 0.579828143119812, "learning_rate": 4.477719498021782e-07, "epoch": 1.9434628975265018, "step": 2750 }, { "loss": 0.8518, "grad_norm": 0.6109597682952881, "learning_rate": 3.440952651710072e-07, "epoch": 1.9505300353356891, "step": 2760 }, { "loss": 0.8649, "grad_norm": 0.5834357738494873, "learning_rate": 2.540311249393912e-07, "epoch": 1.9575971731448765, "step": 2770 }, { "loss": 0.8595, "grad_norm": 0.5844215750694275, "learning_rate": 1.7759183146021096e-07, "epoch": 1.9646643109540636, "step": 2780 }, { "loss": 0.8602, "grad_norm": 0.6017207503318787, "learning_rate": 1.1478782599411153e-07, "epoch": 1.971731448763251, "step": 2790 }, { "loss": 0.8555, "grad_norm": 0.60943603515625, "learning_rate": 6.562768728327618e-08, "epoch": 1.978798586572438, "step": 2800 }, { "loss": 0.86, "grad_norm": 0.5658397078514099, "learning_rate": 3.0118130379575005e-08, "epoch": 1.9858657243816253, "step": 2810 }, { "loss": 0.8591, "grad_norm": 0.5758052468299866, "learning_rate": 8.2640057273764e-09, "epoch": 1.9929328621908127, "step": 2820 }, { "eval_loss": 0.9138538241386414, "eval_runtime": 43.3799, "eval_samples_per_second": 54.91, "eval_steps_per_second": 13.739, "epoch": 1.9929328621908127, "step": 2820 }, { "loss": 0.8452, "grad_norm": 1.4162325859069824, "learning_rate": 6.829850092149315e-11, "epoch": 2.0, "step": 2830 }, { "train_runtime": 3925.8146, "train_samples_per_second": 23.055, "train_steps_per_second": 0.721, "total_flos": 4.463337713824443e+17, "train_loss": 0.9908652207876684, "epoch": 2.0, "step": 2830 } ]