| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.4465031016252844, |
| "eval_steps": 500, |
| "global_step": 3410, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0013093932598981946, |
| "grad_norm": 0.9729704260826111, |
| "learning_rate": 0.00018, |
| "loss": 2.9613, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002618786519796389, |
| "grad_norm": 0.4988560676574707, |
| "learning_rate": 0.00019976402726796014, |
| "loss": 2.2501, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.003928179779694584, |
| "grad_norm": 0.3629654049873352, |
| "learning_rate": 0.0001995018353434714, |
| "loss": 1.9558, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.005237573039592778, |
| "grad_norm": 0.42317306995391846, |
| "learning_rate": 0.0001992396434189827, |
| "loss": 1.8904, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.006546966299490973, |
| "grad_norm": 0.4342662990093231, |
| "learning_rate": 0.00019897745149449398, |
| "loss": 1.9487, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.007856359559389167, |
| "grad_norm": 0.4164058268070221, |
| "learning_rate": 0.00019871525957000524, |
| "loss": 1.845, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.009165752819287363, |
| "grad_norm": 0.38950663805007935, |
| "learning_rate": 0.0001984530676455165, |
| "loss": 1.8264, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.010475146079185557, |
| "grad_norm": 0.42093154788017273, |
| "learning_rate": 0.00019819087572102778, |
| "loss": 1.8418, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.011784539339083753, |
| "grad_norm": 0.4716477394104004, |
| "learning_rate": 0.00019792868379653908, |
| "loss": 1.8346, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.013093932598981946, |
| "grad_norm": 0.4358816146850586, |
| "learning_rate": 0.00019766649187205035, |
| "loss": 1.8271, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.014403325858880142, |
| "grad_norm": 0.45478910207748413, |
| "learning_rate": 0.00019740429994756162, |
| "loss": 1.7506, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.015712719118778334, |
| "grad_norm": 0.4366815388202667, |
| "learning_rate": 0.00019714210802307289, |
| "loss": 1.7854, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01702211237867653, |
| "grad_norm": 0.45096880197525024, |
| "learning_rate": 0.00019687991609858418, |
| "loss": 1.779, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.018331505638574726, |
| "grad_norm": 0.4566694498062134, |
| "learning_rate": 0.00019661772417409545, |
| "loss": 1.7509, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01964089889847292, |
| "grad_norm": 0.4729042649269104, |
| "learning_rate": 0.00019635553224960672, |
| "loss": 1.7271, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.020950292158371114, |
| "grad_norm": 0.46566858887672424, |
| "learning_rate": 0.000196093340325118, |
| "loss": 1.714, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.02225968541826931, |
| "grad_norm": 0.45467349886894226, |
| "learning_rate": 0.00019583114840062926, |
| "loss": 1.702, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.023569078678167505, |
| "grad_norm": 0.434721440076828, |
| "learning_rate": 0.00019556895647614055, |
| "loss": 1.7162, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.024878471938065697, |
| "grad_norm": 0.5182896852493286, |
| "learning_rate": 0.00019530676455165182, |
| "loss": 1.688, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.026187865197963893, |
| "grad_norm": 0.5060753226280212, |
| "learning_rate": 0.0001950445726271631, |
| "loss": 1.6955, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02749725845786209, |
| "grad_norm": 0.46147406101226807, |
| "learning_rate": 0.00019478238070267436, |
| "loss": 1.681, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.028806651717760284, |
| "grad_norm": 0.4517662823200226, |
| "learning_rate": 0.00019452018877818563, |
| "loss": 1.6936, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.030116044977658477, |
| "grad_norm": 0.44920527935028076, |
| "learning_rate": 0.00019425799685369693, |
| "loss": 1.6633, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.03142543823755667, |
| "grad_norm": 0.5066579580307007, |
| "learning_rate": 0.0001939958049292082, |
| "loss": 1.6872, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03273483149745487, |
| "grad_norm": 0.5238184928894043, |
| "learning_rate": 0.00019373361300471946, |
| "loss": 1.6255, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.03404422475735306, |
| "grad_norm": 0.4943958520889282, |
| "learning_rate": 0.00019347142108023073, |
| "loss": 1.6499, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03535361801725126, |
| "grad_norm": 0.48346492648124695, |
| "learning_rate": 0.00019320922915574203, |
| "loss": 1.672, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.03666301127714945, |
| "grad_norm": 0.4401436746120453, |
| "learning_rate": 0.0001929470372312533, |
| "loss": 1.6863, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.037972404537047644, |
| "grad_norm": 0.4602312743663788, |
| "learning_rate": 0.00019268484530676457, |
| "loss": 1.646, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.03928179779694584, |
| "grad_norm": 0.4927528202533722, |
| "learning_rate": 0.00019242265338227584, |
| "loss": 1.6252, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.040591191056844035, |
| "grad_norm": 0.5075507760047913, |
| "learning_rate": 0.0001921604614577871, |
| "loss": 1.6218, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.04190058431674223, |
| "grad_norm": 0.5239428877830505, |
| "learning_rate": 0.0001918982695332984, |
| "loss": 1.6354, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.043209977576640426, |
| "grad_norm": 0.5954804420471191, |
| "learning_rate": 0.00019163607760880967, |
| "loss": 1.7022, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.04451937083653862, |
| "grad_norm": 0.5364096760749817, |
| "learning_rate": 0.00019137388568432094, |
| "loss": 1.5981, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.04582876409643681, |
| "grad_norm": 0.55096435546875, |
| "learning_rate": 0.0001911116937598322, |
| "loss": 1.6211, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04713815735633501, |
| "grad_norm": 0.5193445682525635, |
| "learning_rate": 0.00019084950183534348, |
| "loss": 1.6195, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0484475506162332, |
| "grad_norm": 0.528788685798645, |
| "learning_rate": 0.00019058730991085477, |
| "loss": 1.6076, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.049756943876131395, |
| "grad_norm": 0.5360815525054932, |
| "learning_rate": 0.00019032511798636604, |
| "loss": 1.5912, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.051066337136029594, |
| "grad_norm": 0.5031074285507202, |
| "learning_rate": 0.0001900629260618773, |
| "loss": 1.6157, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.052375730395927786, |
| "grad_norm": 0.5149925351142883, |
| "learning_rate": 0.00018980073413738858, |
| "loss": 1.579, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.053685123655825985, |
| "grad_norm": 0.5419250726699829, |
| "learning_rate": 0.00018953854221289985, |
| "loss": 1.6242, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.05499451691572418, |
| "grad_norm": 0.5513054728507996, |
| "learning_rate": 0.00018927635028841112, |
| "loss": 1.5948, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.05630391017562237, |
| "grad_norm": 0.5670781135559082, |
| "learning_rate": 0.0001890141583639224, |
| "loss": 1.5314, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.05761330343552057, |
| "grad_norm": 0.5327165722846985, |
| "learning_rate": 0.00018875196643943366, |
| "loss": 1.5716, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05892269669541876, |
| "grad_norm": 0.5244112610816956, |
| "learning_rate": 0.00018848977451494493, |
| "loss": 1.5347, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06023208995531695, |
| "grad_norm": 0.5349589586257935, |
| "learning_rate": 0.00018822758259045622, |
| "loss": 1.564, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.06154148321521515, |
| "grad_norm": 0.5296887755393982, |
| "learning_rate": 0.0001879653906659675, |
| "loss": 1.5779, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06285087647511334, |
| "grad_norm": 0.5426337718963623, |
| "learning_rate": 0.00018770319874147876, |
| "loss": 1.5112, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.06416026973501154, |
| "grad_norm": 0.5532763004302979, |
| "learning_rate": 0.00018744100681699003, |
| "loss": 1.5458, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.06546966299490974, |
| "grad_norm": 0.5318668484687805, |
| "learning_rate": 0.00018717881489250133, |
| "loss": 1.5597, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06677905625480793, |
| "grad_norm": 0.6084654331207275, |
| "learning_rate": 0.0001869166229680126, |
| "loss": 1.5485, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.06808844951470612, |
| "grad_norm": 0.5626131296157837, |
| "learning_rate": 0.00018665443104352386, |
| "loss": 1.5217, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.06939784277460431, |
| "grad_norm": 0.528758704662323, |
| "learning_rate": 0.00018639223911903513, |
| "loss": 1.5343, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.07070723603450252, |
| "grad_norm": 0.5894292593002319, |
| "learning_rate": 0.0001861300471945464, |
| "loss": 1.5604, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.07201662929440071, |
| "grad_norm": 0.5676683187484741, |
| "learning_rate": 0.0001858678552700577, |
| "loss": 1.5216, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0733260225542989, |
| "grad_norm": 0.6381473541259766, |
| "learning_rate": 0.00018560566334556897, |
| "loss": 1.4334, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0746354158141971, |
| "grad_norm": 0.6644160151481628, |
| "learning_rate": 0.00018534347142108024, |
| "loss": 1.4832, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.07594480907409529, |
| "grad_norm": 0.5856960415840149, |
| "learning_rate": 0.0001850812794965915, |
| "loss": 1.5118, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.07725420233399348, |
| "grad_norm": 0.5892801880836487, |
| "learning_rate": 0.00018481908757210277, |
| "loss": 1.5028, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.07856359559389169, |
| "grad_norm": 0.5674527883529663, |
| "learning_rate": 0.00018455689564761407, |
| "loss": 1.5125, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07987298885378988, |
| "grad_norm": 0.6059868335723877, |
| "learning_rate": 0.00018429470372312534, |
| "loss": 1.4543, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.08118238211368807, |
| "grad_norm": 0.6255605816841125, |
| "learning_rate": 0.0001840325117986366, |
| "loss": 1.4851, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.08249177537358626, |
| "grad_norm": 0.5904423594474792, |
| "learning_rate": 0.00018377031987414788, |
| "loss": 1.4154, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.08380116863348445, |
| "grad_norm": 0.6035749912261963, |
| "learning_rate": 0.00018350812794965917, |
| "loss": 1.4276, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.08511056189338265, |
| "grad_norm": 0.597172737121582, |
| "learning_rate": 0.00018324593602517044, |
| "loss": 1.4736, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.08641995515328085, |
| "grad_norm": 0.6352164149284363, |
| "learning_rate": 0.0001829837441006817, |
| "loss": 1.4975, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.08772934841317905, |
| "grad_norm": 0.5500873327255249, |
| "learning_rate": 0.00018272155217619298, |
| "loss": 1.4578, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.08903874167307724, |
| "grad_norm": 0.6423613429069519, |
| "learning_rate": 0.00018245936025170425, |
| "loss": 1.3926, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.09034813493297543, |
| "grad_norm": 0.665908694267273, |
| "learning_rate": 0.00018219716832721555, |
| "loss": 1.4548, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.09165752819287362, |
| "grad_norm": 0.6354024410247803, |
| "learning_rate": 0.00018193497640272682, |
| "loss": 1.5, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.09296692145277183, |
| "grad_norm": 0.6588740348815918, |
| "learning_rate": 0.00018167278447823808, |
| "loss": 1.3609, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.09427631471267002, |
| "grad_norm": 0.6754702925682068, |
| "learning_rate": 0.00018141059255374935, |
| "loss": 1.3432, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.09558570797256821, |
| "grad_norm": 0.6337271332740784, |
| "learning_rate": 0.00018114840062926062, |
| "loss": 1.4439, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0968951012324664, |
| "grad_norm": 0.6592088937759399, |
| "learning_rate": 0.00018088620870477192, |
| "loss": 1.3949, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0982044944923646, |
| "grad_norm": 0.6700498461723328, |
| "learning_rate": 0.0001806240167802832, |
| "loss": 1.4046, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.09951388775226279, |
| "grad_norm": 0.708410382270813, |
| "learning_rate": 0.00018036182485579446, |
| "loss": 1.3021, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.100823281012161, |
| "grad_norm": 0.6718457937240601, |
| "learning_rate": 0.00018009963293130573, |
| "loss": 1.3769, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.10213267427205919, |
| "grad_norm": 0.661522388458252, |
| "learning_rate": 0.00017983744100681702, |
| "loss": 1.434, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.10344206753195738, |
| "grad_norm": 0.6615481376647949, |
| "learning_rate": 0.0001795752490823283, |
| "loss": 1.3839, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.10475146079185557, |
| "grad_norm": 0.696959376335144, |
| "learning_rate": 0.00017931305715783956, |
| "loss": 1.3634, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10606085405175376, |
| "grad_norm": 0.7320592403411865, |
| "learning_rate": 0.00017905086523335083, |
| "loss": 1.2737, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.10737024731165197, |
| "grad_norm": 0.7200619578361511, |
| "learning_rate": 0.0001787886733088621, |
| "loss": 1.3732, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.10867964057155016, |
| "grad_norm": 0.6982961297035217, |
| "learning_rate": 0.00017852648138437337, |
| "loss": 1.3019, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.10998903383144835, |
| "grad_norm": 0.7427386045455933, |
| "learning_rate": 0.00017826428945988464, |
| "loss": 1.3398, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.11129842709134655, |
| "grad_norm": 0.7897806763648987, |
| "learning_rate": 0.0001780020975353959, |
| "loss": 1.3216, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.11260782035124474, |
| "grad_norm": 0.7520805597305298, |
| "learning_rate": 0.00017773990561090717, |
| "loss": 1.2875, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.11391721361114293, |
| "grad_norm": 0.7332555055618286, |
| "learning_rate": 0.00017747771368641844, |
| "loss": 1.272, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.11522660687104114, |
| "grad_norm": 0.7135840654373169, |
| "learning_rate": 0.00017721552176192974, |
| "loss": 1.3185, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.11653600013093933, |
| "grad_norm": 0.6898264288902283, |
| "learning_rate": 0.000176953329837441, |
| "loss": 1.3089, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.11784539339083752, |
| "grad_norm": 0.9488328099250793, |
| "learning_rate": 0.00017669113791295228, |
| "loss": 1.2258, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.11915478665073571, |
| "grad_norm": 0.7257933616638184, |
| "learning_rate": 0.00017642894598846355, |
| "loss": 1.3284, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.1204641799106339, |
| "grad_norm": 0.7688736915588379, |
| "learning_rate": 0.00017616675406397484, |
| "loss": 1.2878, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.1217735731705321, |
| "grad_norm": 0.8328510522842407, |
| "learning_rate": 0.0001759045621394861, |
| "loss": 1.2346, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1230829664304303, |
| "grad_norm": 0.8448120951652527, |
| "learning_rate": 0.00017564237021499738, |
| "loss": 1.2926, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.1243923596903285, |
| "grad_norm": 0.8510689735412598, |
| "learning_rate": 0.00017538017829050865, |
| "loss": 1.2109, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.12570175295022668, |
| "grad_norm": 0.866874098777771, |
| "learning_rate": 0.00017511798636601992, |
| "loss": 1.3091, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.12701114621012488, |
| "grad_norm": 0.9010233879089355, |
| "learning_rate": 0.00017485579444153122, |
| "loss": 1.2273, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1283205394700231, |
| "grad_norm": 0.9316047430038452, |
| "learning_rate": 0.00017459360251704248, |
| "loss": 1.2611, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.12962993272992127, |
| "grad_norm": 0.9005467295646667, |
| "learning_rate": 0.00017433141059255375, |
| "loss": 1.1747, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.13093932598981947, |
| "grad_norm": 0.8843415975570679, |
| "learning_rate": 0.00017406921866806502, |
| "loss": 1.1915, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.13224871924971765, |
| "grad_norm": 0.8090497851371765, |
| "learning_rate": 0.0001738070267435763, |
| "loss": 1.2452, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.13355811250961586, |
| "grad_norm": 1.2498819828033447, |
| "learning_rate": 0.0001735448348190876, |
| "loss": 1.276, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.13486750576951406, |
| "grad_norm": 0.7861034870147705, |
| "learning_rate": 0.00017328264289459886, |
| "loss": 1.1989, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.13617689902941224, |
| "grad_norm": 0.9525002837181091, |
| "learning_rate": 0.00017302045097011013, |
| "loss": 1.1338, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.13748629228931045, |
| "grad_norm": 0.8066142201423645, |
| "learning_rate": 0.0001727582590456214, |
| "loss": 1.1421, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.13879568554920862, |
| "grad_norm": 0.8200965523719788, |
| "learning_rate": 0.0001724960671211327, |
| "loss": 1.1596, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.14010507880910683, |
| "grad_norm": 0.9981400370597839, |
| "learning_rate": 0.00017223387519664396, |
| "loss": 1.0562, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.14141447206900504, |
| "grad_norm": 0.9273063540458679, |
| "learning_rate": 0.00017197168327215523, |
| "loss": 1.1275, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.14272386532890322, |
| "grad_norm": 0.8812237977981567, |
| "learning_rate": 0.0001717094913476665, |
| "loss": 1.0406, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.14403325858880142, |
| "grad_norm": 0.8970304727554321, |
| "learning_rate": 0.00017144729942317777, |
| "loss": 1.1263, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1453426518486996, |
| "grad_norm": 0.9097404479980469, |
| "learning_rate": 0.00017118510749868906, |
| "loss": 1.1956, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.1466520451085978, |
| "grad_norm": 1.0246269702911377, |
| "learning_rate": 0.00017092291557420033, |
| "loss": 1.0717, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.14796143836849598, |
| "grad_norm": 1.1149781942367554, |
| "learning_rate": 0.0001706607236497116, |
| "loss": 1.076, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1492708316283942, |
| "grad_norm": 1.1981500387191772, |
| "learning_rate": 0.00017039853172522287, |
| "loss": 1.142, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.1505802248882924, |
| "grad_norm": 0.9477318525314331, |
| "learning_rate": 0.00017013633980073414, |
| "loss": 1.0799, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.15188961814819057, |
| "grad_norm": 1.0102957487106323, |
| "learning_rate": 0.00016987414787624544, |
| "loss": 1.0531, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.15319901140808878, |
| "grad_norm": 1.1728227138519287, |
| "learning_rate": 0.0001696119559517567, |
| "loss": 1.0903, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.15450840466798696, |
| "grad_norm": 1.0086623430252075, |
| "learning_rate": 0.00016934976402726797, |
| "loss": 1.0677, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.15581779792788517, |
| "grad_norm": 0.8586070537567139, |
| "learning_rate": 0.00016908757210277924, |
| "loss": 1.1022, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.15712719118778337, |
| "grad_norm": 1.2628968954086304, |
| "learning_rate": 0.00016882538017829054, |
| "loss": 1.0575, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.15843658444768155, |
| "grad_norm": 0.9629563689231873, |
| "learning_rate": 0.0001685631882538018, |
| "loss": 1.0844, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.15974597770757976, |
| "grad_norm": 1.0898447036743164, |
| "learning_rate": 0.00016830099632931308, |
| "loss": 1.0654, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.16105537096747793, |
| "grad_norm": 1.13120698928833, |
| "learning_rate": 0.00016803880440482435, |
| "loss": 1.0686, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.16236476422737614, |
| "grad_norm": 1.0732567310333252, |
| "learning_rate": 0.00016777661248033561, |
| "loss": 1.084, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.16367415748727435, |
| "grad_norm": 1.0681878328323364, |
| "learning_rate": 0.00016751442055584688, |
| "loss": 0.9979, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.16498355074717252, |
| "grad_norm": 0.9773361086845398, |
| "learning_rate": 0.00016725222863135815, |
| "loss": 1.0841, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.16629294400707073, |
| "grad_norm": 1.0342450141906738, |
| "learning_rate": 0.00016699003670686942, |
| "loss": 1.0176, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.1676023372669689, |
| "grad_norm": 1.0580531358718872, |
| "learning_rate": 0.0001667278447823807, |
| "loss": 0.9858, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.16891173052686712, |
| "grad_norm": 0.9744387865066528, |
| "learning_rate": 0.000166465652857892, |
| "loss": 0.9282, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1702211237867653, |
| "grad_norm": 0.9636452198028564, |
| "learning_rate": 0.00016620346093340326, |
| "loss": 0.9414, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1715305170466635, |
| "grad_norm": 1.1029468774795532, |
| "learning_rate": 0.00016594126900891453, |
| "loss": 0.8812, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.1728399103065617, |
| "grad_norm": 1.2941449880599976, |
| "learning_rate": 0.0001656790770844258, |
| "loss": 0.9823, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.17414930356645988, |
| "grad_norm": 1.627166509628296, |
| "learning_rate": 0.00016541688515993706, |
| "loss": 0.9585, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1754586968263581, |
| "grad_norm": 1.091630458831787, |
| "learning_rate": 0.00016515469323544836, |
| "loss": 0.9516, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.17676809008625627, |
| "grad_norm": 1.1108227968215942, |
| "learning_rate": 0.00016489250131095963, |
| "loss": 0.8998, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.17807748334615447, |
| "grad_norm": 1.0883326530456543, |
| "learning_rate": 0.0001646303093864709, |
| "loss": 0.916, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.17938687660605268, |
| "grad_norm": 1.2917275428771973, |
| "learning_rate": 0.00016436811746198217, |
| "loss": 0.9112, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.18069626986595086, |
| "grad_norm": 1.1828432083129883, |
| "learning_rate": 0.00016410592553749344, |
| "loss": 0.9721, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.18200566312584907, |
| "grad_norm": 1.3447389602661133, |
| "learning_rate": 0.00016384373361300473, |
| "loss": 0.9198, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.18331505638574724, |
| "grad_norm": 1.0735760927200317, |
| "learning_rate": 0.000163581541688516, |
| "loss": 0.8634, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18462444964564545, |
| "grad_norm": 1.0454446077346802, |
| "learning_rate": 0.00016331934976402727, |
| "loss": 0.9151, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.18593384290554366, |
| "grad_norm": 1.2230719327926636, |
| "learning_rate": 0.00016305715783953854, |
| "loss": 0.9202, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.18724323616544183, |
| "grad_norm": 1.1030149459838867, |
| "learning_rate": 0.00016279496591504984, |
| "loss": 0.9068, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.18855262942534004, |
| "grad_norm": 1.4471871852874756, |
| "learning_rate": 0.0001625327739905611, |
| "loss": 0.8682, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.18986202268523822, |
| "grad_norm": 1.2458796501159668, |
| "learning_rate": 0.00016227058206607237, |
| "loss": 0.8247, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.19117141594513642, |
| "grad_norm": 1.1849644184112549, |
| "learning_rate": 0.00016200839014158364, |
| "loss": 0.8987, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.19248080920503463, |
| "grad_norm": 1.2985557317733765, |
| "learning_rate": 0.0001617461982170949, |
| "loss": 0.8006, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.1937902024649328, |
| "grad_norm": 1.7127928733825684, |
| "learning_rate": 0.0001614840062926062, |
| "loss": 0.8191, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.19509959572483102, |
| "grad_norm": 1.440895915031433, |
| "learning_rate": 0.00016122181436811748, |
| "loss": 0.8129, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.1964089889847292, |
| "grad_norm": 1.252194881439209, |
| "learning_rate": 0.00016095962244362875, |
| "loss": 0.8803, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1977183822446274, |
| "grad_norm": 1.138358235359192, |
| "learning_rate": 0.00016069743051914001, |
| "loss": 0.8744, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.19902777550452558, |
| "grad_norm": 1.080971598625183, |
| "learning_rate": 0.00016043523859465128, |
| "loss": 0.8693, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.20033716876442378, |
| "grad_norm": 1.1612547636032104, |
| "learning_rate": 0.00016017304667016258, |
| "loss": 0.7991, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.201646562024322, |
| "grad_norm": 1.1773971319198608, |
| "learning_rate": 0.00015991085474567385, |
| "loss": 0.912, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.20295595528422017, |
| "grad_norm": 1.1353998184204102, |
| "learning_rate": 0.00015964866282118512, |
| "loss": 0.7986, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.20426534854411837, |
| "grad_norm": 1.6848335266113281, |
| "learning_rate": 0.0001593864708966964, |
| "loss": 0.6932, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.20557474180401655, |
| "grad_norm": 1.4043173789978027, |
| "learning_rate": 0.00015912427897220768, |
| "loss": 0.8529, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.20688413506391476, |
| "grad_norm": 1.2601439952850342, |
| "learning_rate": 0.00015886208704771895, |
| "loss": 0.8173, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.20819352832381297, |
| "grad_norm": 1.2090034484863281, |
| "learning_rate": 0.00015859989512323022, |
| "loss": 0.7451, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.20950292158371114, |
| "grad_norm": 1.3334815502166748, |
| "learning_rate": 0.0001583377031987415, |
| "loss": 0.775, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.21081231484360935, |
| "grad_norm": 1.1993087530136108, |
| "learning_rate": 0.00015807551127425276, |
| "loss": 0.7733, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.21212170810350753, |
| "grad_norm": 1.51642906665802, |
| "learning_rate": 0.00015781331934976406, |
| "loss": 0.6907, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.21343110136340573, |
| "grad_norm": 1.3714466094970703, |
| "learning_rate": 0.00015755112742527532, |
| "loss": 0.7016, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.21474049462330394, |
| "grad_norm": 1.2519642114639282, |
| "learning_rate": 0.0001572889355007866, |
| "loss": 0.7648, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.21604988788320212, |
| "grad_norm": 1.3851202726364136, |
| "learning_rate": 0.00015702674357629786, |
| "loss": 0.7069, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.21735928114310032, |
| "grad_norm": 1.334105134010315, |
| "learning_rate": 0.00015676455165180913, |
| "loss": 0.7338, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.2186686744029985, |
| "grad_norm": 1.3785145282745361, |
| "learning_rate": 0.0001565023597273204, |
| "loss": 0.6299, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.2199780676628967, |
| "grad_norm": 1.4771215915679932, |
| "learning_rate": 0.00015624016780283167, |
| "loss": 0.6828, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.2212874609227949, |
| "grad_norm": 1.3885449171066284, |
| "learning_rate": 0.00015597797587834294, |
| "loss": 0.7141, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.2225968541826931, |
| "grad_norm": 1.2664909362792969, |
| "learning_rate": 0.00015571578395385423, |
| "loss": 0.7667, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2239062474425913, |
| "grad_norm": 1.2576826810836792, |
| "learning_rate": 0.0001554535920293655, |
| "loss": 0.7395, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.22521564070248948, |
| "grad_norm": 1.284826636314392, |
| "learning_rate": 0.00015519140010487677, |
| "loss": 0.6832, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.22652503396238768, |
| "grad_norm": 1.272933006286621, |
| "learning_rate": 0.00015492920818038804, |
| "loss": 0.6892, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.22783442722228586, |
| "grad_norm": 1.3465379476547241, |
| "learning_rate": 0.0001546670162558993, |
| "loss": 0.6449, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.22914382048218407, |
| "grad_norm": 1.2862318754196167, |
| "learning_rate": 0.00015440482433141058, |
| "loss": 0.6883, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.23045321374208227, |
| "grad_norm": 1.2469042539596558, |
| "learning_rate": 0.00015414263240692188, |
| "loss": 0.7593, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.23176260700198045, |
| "grad_norm": 1.5080034732818604, |
| "learning_rate": 0.00015388044048243315, |
| "loss": 0.7009, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.23307200026187866, |
| "grad_norm": 0.9788569211959839, |
| "learning_rate": 0.00015361824855794441, |
| "loss": 0.602, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.23438139352177684, |
| "grad_norm": 1.3450673818588257, |
| "learning_rate": 0.00015335605663345568, |
| "loss": 0.6238, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.23569078678167504, |
| "grad_norm": 1.4177800416946411, |
| "learning_rate": 0.00015309386470896695, |
| "loss": 0.6768, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.23700018004157325, |
| "grad_norm": 1.3528062105178833, |
| "learning_rate": 0.00015283167278447825, |
| "loss": 0.6404, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.23830957330147143, |
| "grad_norm": 1.2898012399673462, |
| "learning_rate": 0.00015256948085998952, |
| "loss": 0.6606, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.23961896656136963, |
| "grad_norm": 1.311298131942749, |
| "learning_rate": 0.0001523072889355008, |
| "loss": 0.662, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.2409283598212678, |
| "grad_norm": 1.6476584672927856, |
| "learning_rate": 0.00015204509701101206, |
| "loss": 0.671, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.24223775308116602, |
| "grad_norm": 1.36719810962677, |
| "learning_rate": 0.00015178290508652335, |
| "loss": 0.7097, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2435471463410642, |
| "grad_norm": 1.3647184371948242, |
| "learning_rate": 0.00015152071316203462, |
| "loss": 0.6604, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2448565396009624, |
| "grad_norm": 1.2265934944152832, |
| "learning_rate": 0.0001512585212375459, |
| "loss": 0.6272, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.2461659328608606, |
| "grad_norm": 1.4882850646972656, |
| "learning_rate": 0.00015099632931305716, |
| "loss": 0.7007, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.2474753261207588, |
| "grad_norm": 1.408470869064331, |
| "learning_rate": 0.00015073413738856843, |
| "loss": 0.6526, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.248784719380657, |
| "grad_norm": 1.3388913869857788, |
| "learning_rate": 0.00015047194546407972, |
| "loss": 0.6891, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2500941126405552, |
| "grad_norm": 1.3725926876068115, |
| "learning_rate": 0.000150209753539591, |
| "loss": 0.5763, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.25140350590045335, |
| "grad_norm": 1.40208899974823, |
| "learning_rate": 0.00014994756161510226, |
| "loss": 0.5637, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.25271289916035156, |
| "grad_norm": 1.8308840990066528, |
| "learning_rate": 0.00014968536969061353, |
| "loss": 0.6899, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.25402229242024976, |
| "grad_norm": 1.4921183586120605, |
| "learning_rate": 0.0001494231777661248, |
| "loss": 0.5764, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.25533168568014797, |
| "grad_norm": 1.5387523174285889, |
| "learning_rate": 0.0001491609858416361, |
| "loss": 0.5229, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2566410789400462, |
| "grad_norm": 1.3345798254013062, |
| "learning_rate": 0.00014889879391714737, |
| "loss": 0.5949, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.2579504721999443, |
| "grad_norm": 1.682065486907959, |
| "learning_rate": 0.00014863660199265863, |
| "loss": 0.5619, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.25925986545984253, |
| "grad_norm": 1.480276346206665, |
| "learning_rate": 0.0001483744100681699, |
| "loss": 0.5473, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.26056925871974074, |
| "grad_norm": 1.3453810214996338, |
| "learning_rate": 0.0001481122181436812, |
| "loss": 0.5603, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.26187865197963894, |
| "grad_norm": 1.4118777513504028, |
| "learning_rate": 0.00014785002621919247, |
| "loss": 0.5543, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.26318804523953715, |
| "grad_norm": 1.2959351539611816, |
| "learning_rate": 0.00014758783429470374, |
| "loss": 0.4962, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.2644974384994353, |
| "grad_norm": 1.3605815172195435, |
| "learning_rate": 0.000147325642370215, |
| "loss": 0.5699, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.2658068317593335, |
| "grad_norm": 2.086613416671753, |
| "learning_rate": 0.00014706345044572628, |
| "loss": 0.565, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.2671162250192317, |
| "grad_norm": 1.2892887592315674, |
| "learning_rate": 0.00014680125852123757, |
| "loss": 0.6062, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.2684256182791299, |
| "grad_norm": 1.5760036706924438, |
| "learning_rate": 0.00014653906659674884, |
| "loss": 0.5642, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.2697350115390281, |
| "grad_norm": 1.21380615234375, |
| "learning_rate": 0.0001462768746722601, |
| "loss": 0.5514, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.2710444047989263, |
| "grad_norm": 1.4393121004104614, |
| "learning_rate": 0.00014601468274777138, |
| "loss": 0.5572, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.2723537980588245, |
| "grad_norm": 1.2972021102905273, |
| "learning_rate": 0.00014575249082328265, |
| "loss": 0.535, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.2736631913187227, |
| "grad_norm": 1.0208637714385986, |
| "learning_rate": 0.00014549029889879392, |
| "loss": 0.5835, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.2749725845786209, |
| "grad_norm": 1.4418736696243286, |
| "learning_rate": 0.00014522810697430521, |
| "loss": 0.4829, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2762819778385191, |
| "grad_norm": 1.4326051473617554, |
| "learning_rate": 0.00014496591504981648, |
| "loss": 0.4711, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.27759137109841725, |
| "grad_norm": 1.497841715812683, |
| "learning_rate": 0.00014470372312532775, |
| "loss": 0.4935, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.27890076435831546, |
| "grad_norm": 1.5082463026046753, |
| "learning_rate": 0.00014444153120083902, |
| "loss": 0.4979, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.28021015761821366, |
| "grad_norm": 1.2458934783935547, |
| "learning_rate": 0.0001441793392763503, |
| "loss": 0.5644, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.28151955087811187, |
| "grad_norm": 1.730130910873413, |
| "learning_rate": 0.00014391714735186156, |
| "loss": 0.4749, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.2828289441380101, |
| "grad_norm": 1.2587112188339233, |
| "learning_rate": 0.00014365495542737283, |
| "loss": 0.5175, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.2841383373979082, |
| "grad_norm": 1.431119441986084, |
| "learning_rate": 0.0001433927635028841, |
| "loss": 0.5597, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.28544773065780643, |
| "grad_norm": 1.5383937358856201, |
| "learning_rate": 0.0001431305715783954, |
| "loss": 0.5153, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.28675712391770464, |
| "grad_norm": 1.4311727285385132, |
| "learning_rate": 0.00014286837965390666, |
| "loss": 0.5452, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.28806651717760284, |
| "grad_norm": 1.2555975914001465, |
| "learning_rate": 0.00014260618772941793, |
| "loss": 0.4937, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.28937591043750105, |
| "grad_norm": 1.3781330585479736, |
| "learning_rate": 0.0001423439958049292, |
| "loss": 0.4537, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.2906853036973992, |
| "grad_norm": 1.4810888767242432, |
| "learning_rate": 0.00014208180388044047, |
| "loss": 0.396, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.2919946969572974, |
| "grad_norm": 1.6619911193847656, |
| "learning_rate": 0.00014181961195595177, |
| "loss": 0.4756, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.2933040902171956, |
| "grad_norm": 1.3403065204620361, |
| "learning_rate": 0.00014155742003146303, |
| "loss": 0.5157, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.2946134834770938, |
| "grad_norm": 1.4188278913497925, |
| "learning_rate": 0.0001412952281069743, |
| "loss": 0.5237, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.29592287673699197, |
| "grad_norm": 1.852266550064087, |
| "learning_rate": 0.00014103303618248557, |
| "loss": 0.4558, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.2972322699968902, |
| "grad_norm": 1.3092072010040283, |
| "learning_rate": 0.00014077084425799687, |
| "loss": 0.4437, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.2985416632567884, |
| "grad_norm": 1.4190593957901, |
| "learning_rate": 0.00014050865233350814, |
| "loss": 0.4717, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.2998510565166866, |
| "grad_norm": 1.4562608003616333, |
| "learning_rate": 0.0001402464604090194, |
| "loss": 0.4744, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.3011604497765848, |
| "grad_norm": 1.4576420783996582, |
| "learning_rate": 0.00013998426848453068, |
| "loss": 0.4429, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.30246984303648294, |
| "grad_norm": 1.867145299911499, |
| "learning_rate": 0.00013972207656004194, |
| "loss": 0.4881, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.30377923629638115, |
| "grad_norm": 1.3077807426452637, |
| "learning_rate": 0.00013945988463555324, |
| "loss": 0.4067, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.30508862955627936, |
| "grad_norm": 1.3587473630905151, |
| "learning_rate": 0.0001391976927110645, |
| "loss": 0.4428, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.30639802281617756, |
| "grad_norm": 1.6012579202651978, |
| "learning_rate": 0.00013893550078657578, |
| "loss": 0.4572, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.30770741607607577, |
| "grad_norm": 1.2226955890655518, |
| "learning_rate": 0.00013867330886208705, |
| "loss": 0.4117, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3090168093359739, |
| "grad_norm": 1.4615281820297241, |
| "learning_rate": 0.00013841111693759834, |
| "loss": 0.4561, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.3103262025958721, |
| "grad_norm": 1.401014804840088, |
| "learning_rate": 0.0001381489250131096, |
| "loss": 0.441, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.31163559585577033, |
| "grad_norm": 1.4875798225402832, |
| "learning_rate": 0.00013788673308862088, |
| "loss": 0.3991, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.31294498911566854, |
| "grad_norm": 1.1867239475250244, |
| "learning_rate": 0.00013762454116413215, |
| "loss": 0.4223, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.31425438237556674, |
| "grad_norm": 1.3172953128814697, |
| "learning_rate": 0.00013736234923964342, |
| "loss": 0.4388, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3155637756354649, |
| "grad_norm": 1.4044665098190308, |
| "learning_rate": 0.00013710015731515472, |
| "loss": 0.4102, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.3168731688953631, |
| "grad_norm": 1.5709283351898193, |
| "learning_rate": 0.00013683796539066599, |
| "loss": 0.4837, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.3181825621552613, |
| "grad_norm": 1.2237786054611206, |
| "learning_rate": 0.00013657577346617725, |
| "loss": 0.4452, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.3194919554151595, |
| "grad_norm": 1.8869267702102661, |
| "learning_rate": 0.00013631358154168852, |
| "loss": 0.4077, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.3208013486750577, |
| "grad_norm": 1.226117491722107, |
| "learning_rate": 0.0001360513896171998, |
| "loss": 0.4109, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.32211074193495587, |
| "grad_norm": 1.6273385286331177, |
| "learning_rate": 0.0001357891976927111, |
| "loss": 0.3596, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.3234201351948541, |
| "grad_norm": 1.4535574913024902, |
| "learning_rate": 0.00013552700576822236, |
| "loss": 0.3996, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.3247295284547523, |
| "grad_norm": 1.6052360534667969, |
| "learning_rate": 0.00013526481384373363, |
| "loss": 0.4082, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.3260389217146505, |
| "grad_norm": 1.9104530811309814, |
| "learning_rate": 0.0001350026219192449, |
| "loss": 0.4089, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.3273483149745487, |
| "grad_norm": 1.6006613969802856, |
| "learning_rate": 0.0001347404299947562, |
| "loss": 0.3848, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.32865770823444684, |
| "grad_norm": 1.4406352043151855, |
| "learning_rate": 0.00013447823807026746, |
| "loss": 0.3926, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.32996710149434505, |
| "grad_norm": 1.3455756902694702, |
| "learning_rate": 0.00013421604614577873, |
| "loss": 0.4203, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.33127649475424326, |
| "grad_norm": 1.7718679904937744, |
| "learning_rate": 0.00013395385422129, |
| "loss": 0.3765, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.33258588801414146, |
| "grad_norm": 1.410130500793457, |
| "learning_rate": 0.00013369166229680127, |
| "loss": 0.3646, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.33389528127403967, |
| "grad_norm": 1.6361408233642578, |
| "learning_rate": 0.00013342947037231254, |
| "loss": 0.3917, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.3352046745339378, |
| "grad_norm": 1.7627660036087036, |
| "learning_rate": 0.0001331672784478238, |
| "loss": 0.367, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.336514067793836, |
| "grad_norm": 1.2431906461715698, |
| "learning_rate": 0.00013290508652333508, |
| "loss": 0.3708, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.33782346105373423, |
| "grad_norm": 1.4763669967651367, |
| "learning_rate": 0.00013264289459884634, |
| "loss": 0.377, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.33913285431363244, |
| "grad_norm": 2.1701712608337402, |
| "learning_rate": 0.00013238070267435761, |
| "loss": 0.344, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.3404422475735306, |
| "grad_norm": 1.4388126134872437, |
| "learning_rate": 0.0001321185107498689, |
| "loss": 0.3556, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.3417516408334288, |
| "grad_norm": 1.2981114387512207, |
| "learning_rate": 0.00013185631882538018, |
| "loss": 0.3272, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.343061034093327, |
| "grad_norm": 1.539335012435913, |
| "learning_rate": 0.00013159412690089145, |
| "loss": 0.4132, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.3443704273532252, |
| "grad_norm": 1.9272770881652832, |
| "learning_rate": 0.00013133193497640272, |
| "loss": 0.4121, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.3456798206131234, |
| "grad_norm": 1.4415314197540283, |
| "learning_rate": 0.000131069743051914, |
| "loss": 0.3595, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.34698921387302156, |
| "grad_norm": 1.3155860900878906, |
| "learning_rate": 0.00013080755112742528, |
| "loss": 0.3611, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.34829860713291977, |
| "grad_norm": 1.507858157157898, |
| "learning_rate": 0.00013054535920293655, |
| "loss": 0.3813, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.349608000392818, |
| "grad_norm": 1.5444693565368652, |
| "learning_rate": 0.00013028316727844782, |
| "loss": 0.3527, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.3509173936527162, |
| "grad_norm": 1.4008456468582153, |
| "learning_rate": 0.0001300209753539591, |
| "loss": 0.3573, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.3522267869126144, |
| "grad_norm": 1.6443661451339722, |
| "learning_rate": 0.00012975878342947039, |
| "loss": 0.3885, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.35353618017251254, |
| "grad_norm": 1.513431429862976, |
| "learning_rate": 0.00012949659150498165, |
| "loss": 0.3332, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.35484557343241074, |
| "grad_norm": 1.6663899421691895, |
| "learning_rate": 0.00012923439958049292, |
| "loss": 0.3769, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.35615496669230895, |
| "grad_norm": 1.2655925750732422, |
| "learning_rate": 0.0001289722076560042, |
| "loss": 0.4177, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.35746435995220716, |
| "grad_norm": 1.324833869934082, |
| "learning_rate": 0.00012871001573151546, |
| "loss": 0.3501, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.35877375321210536, |
| "grad_norm": 1.4842655658721924, |
| "learning_rate": 0.00012844782380702676, |
| "loss": 0.3223, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.3600831464720035, |
| "grad_norm": 1.4087761640548706, |
| "learning_rate": 0.00012818563188253803, |
| "loss": 0.3308, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.3613925397319017, |
| "grad_norm": 1.7493972778320312, |
| "learning_rate": 0.0001279234399580493, |
| "loss": 0.3655, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.3627019329917999, |
| "grad_norm": 1.4829336404800415, |
| "learning_rate": 0.00012766124803356056, |
| "loss": 0.3674, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.36401132625169813, |
| "grad_norm": 1.39944589138031, |
| "learning_rate": 0.00012739905610907186, |
| "loss": 0.3285, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.36532071951159634, |
| "grad_norm": 1.5995631217956543, |
| "learning_rate": 0.00012713686418458313, |
| "loss": 0.3431, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.3666301127714945, |
| "grad_norm": 1.0113691091537476, |
| "learning_rate": 0.0001268746722600944, |
| "loss": 0.3389, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3679395060313927, |
| "grad_norm": 1.6544948816299438, |
| "learning_rate": 0.00012661248033560567, |
| "loss": 0.323, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.3692488992912909, |
| "grad_norm": 1.8022606372833252, |
| "learning_rate": 0.00012635028841111694, |
| "loss": 0.3777, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.3705582925511891, |
| "grad_norm": 1.6005665063858032, |
| "learning_rate": 0.00012608809648662823, |
| "loss": 0.3482, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.3718676858110873, |
| "grad_norm": 1.2550064325332642, |
| "learning_rate": 0.0001258259045621395, |
| "loss": 0.3288, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.37317707907098546, |
| "grad_norm": 2.43110728263855, |
| "learning_rate": 0.00012556371263765077, |
| "loss": 0.3511, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.37448647233088367, |
| "grad_norm": 1.5041906833648682, |
| "learning_rate": 0.00012530152071316204, |
| "loss": 0.3578, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.3757958655907819, |
| "grad_norm": 1.6031140089035034, |
| "learning_rate": 0.0001250393287886733, |
| "loss": 0.3213, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.3771052588506801, |
| "grad_norm": 1.025795817375183, |
| "learning_rate": 0.0001247771368641846, |
| "loss": 0.3352, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.3784146521105783, |
| "grad_norm": 1.934812068939209, |
| "learning_rate": 0.00012451494493969587, |
| "loss": 0.3365, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.37972404537047644, |
| "grad_norm": 1.0730398893356323, |
| "learning_rate": 0.00012425275301520714, |
| "loss": 0.3365, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.38103343863037464, |
| "grad_norm": 1.3496712446212769, |
| "learning_rate": 0.0001239905610907184, |
| "loss": 0.3548, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.38234283189027285, |
| "grad_norm": 1.3053911924362183, |
| "learning_rate": 0.0001237283691662297, |
| "loss": 0.3563, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.38365222515017106, |
| "grad_norm": 1.3640882968902588, |
| "learning_rate": 0.00012346617724174098, |
| "loss": 0.365, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.38496161841006926, |
| "grad_norm": 1.3266191482543945, |
| "learning_rate": 0.00012320398531725225, |
| "loss": 0.2981, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.3862710116699674, |
| "grad_norm": 1.32815682888031, |
| "learning_rate": 0.00012294179339276352, |
| "loss": 0.3544, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.3875804049298656, |
| "grad_norm": 1.4236459732055664, |
| "learning_rate": 0.00012267960146827479, |
| "loss": 0.3095, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.3888897981897638, |
| "grad_norm": 1.1536756753921509, |
| "learning_rate": 0.00012241740954378605, |
| "loss": 0.3125, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.39019919144966203, |
| "grad_norm": 1.4237791299819946, |
| "learning_rate": 0.00012215521761929732, |
| "loss": 0.3207, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.3915085847095602, |
| "grad_norm": 1.4023237228393555, |
| "learning_rate": 0.0001218930256948086, |
| "loss": 0.3714, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.3928179779694584, |
| "grad_norm": 1.3556010723114014, |
| "learning_rate": 0.00012163083377031987, |
| "loss": 0.3313, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3941273712293566, |
| "grad_norm": 1.2301980257034302, |
| "learning_rate": 0.00012136864184583114, |
| "loss": 0.3062, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.3954367644892548, |
| "grad_norm": 1.3532170057296753, |
| "learning_rate": 0.00012110644992134244, |
| "loss": 0.2946, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.396746157749153, |
| "grad_norm": 1.2680764198303223, |
| "learning_rate": 0.00012084425799685371, |
| "loss": 0.3005, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.39805555100905116, |
| "grad_norm": 1.5346810817718506, |
| "learning_rate": 0.00012058206607236498, |
| "loss": 0.3363, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.39936494426894936, |
| "grad_norm": 1.423195242881775, |
| "learning_rate": 0.00012031987414787625, |
| "loss": 0.3294, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.40067433752884757, |
| "grad_norm": 1.599571704864502, |
| "learning_rate": 0.00012005768222338753, |
| "loss": 0.3469, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.4019837307887458, |
| "grad_norm": 1.2103453874588013, |
| "learning_rate": 0.0001197954902988988, |
| "loss": 0.2827, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.403293124048644, |
| "grad_norm": 1.3197276592254639, |
| "learning_rate": 0.00011953329837441007, |
| "loss": 0.3194, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.40460251730854213, |
| "grad_norm": 1.291038990020752, |
| "learning_rate": 0.00011927110644992135, |
| "loss": 0.2798, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.40591191056844034, |
| "grad_norm": 1.1556978225708008, |
| "learning_rate": 0.00011900891452543262, |
| "loss": 0.3318, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.40722130382833854, |
| "grad_norm": 1.3520278930664062, |
| "learning_rate": 0.0001187467226009439, |
| "loss": 0.3222, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.40853069708823675, |
| "grad_norm": 1.0671277046203613, |
| "learning_rate": 0.00011848453067645517, |
| "loss": 0.268, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.40984009034813496, |
| "grad_norm": 1.442131757736206, |
| "learning_rate": 0.00011822233875196644, |
| "loss": 0.3028, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.4111494836080331, |
| "grad_norm": 1.5673497915267944, |
| "learning_rate": 0.00011796014682747771, |
| "loss": 0.31, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.4124588768679313, |
| "grad_norm": 1.2009717226028442, |
| "learning_rate": 0.00011769795490298898, |
| "loss": 0.2986, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.4137682701278295, |
| "grad_norm": 1.2754930257797241, |
| "learning_rate": 0.00011743576297850027, |
| "loss": 0.3352, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.4150776633877277, |
| "grad_norm": 1.6189430952072144, |
| "learning_rate": 0.00011717357105401154, |
| "loss": 0.3804, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.41638705664762593, |
| "grad_norm": 1.6117827892303467, |
| "learning_rate": 0.00011691137912952281, |
| "loss": 0.3239, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.4176964499075241, |
| "grad_norm": 1.7495907545089722, |
| "learning_rate": 0.00011664918720503408, |
| "loss": 0.3145, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.4190058431674223, |
| "grad_norm": 1.2301905155181885, |
| "learning_rate": 0.00011638699528054538, |
| "loss": 0.2776, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4203152364273205, |
| "grad_norm": 1.3571341037750244, |
| "learning_rate": 0.00011612480335605665, |
| "loss": 0.3019, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.4216246296872187, |
| "grad_norm": 0.9271483421325684, |
| "learning_rate": 0.00011586261143156792, |
| "loss": 0.2929, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.4229340229471169, |
| "grad_norm": 1.294146180152893, |
| "learning_rate": 0.00011560041950707918, |
| "loss": 0.3095, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.42424341620701506, |
| "grad_norm": 1.5177209377288818, |
| "learning_rate": 0.00011533822758259045, |
| "loss": 0.2714, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.42555280946691326, |
| "grad_norm": 1.1218962669372559, |
| "learning_rate": 0.00011507603565810175, |
| "loss": 0.282, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.42686220272681147, |
| "grad_norm": 1.2807728052139282, |
| "learning_rate": 0.00011481384373361302, |
| "loss": 0.3461, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.4281715959867097, |
| "grad_norm": 1.1680692434310913, |
| "learning_rate": 0.00011455165180912429, |
| "loss": 0.2842, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.4294809892466079, |
| "grad_norm": 1.6534638404846191, |
| "learning_rate": 0.00011428945988463556, |
| "loss": 0.2774, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.43079038250650603, |
| "grad_norm": 1.2321938276290894, |
| "learning_rate": 0.00011402726796014683, |
| "loss": 0.2841, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.43209977576640424, |
| "grad_norm": 1.6666522026062012, |
| "learning_rate": 0.00011376507603565811, |
| "loss": 0.2993, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.43340916902630244, |
| "grad_norm": 1.8330938816070557, |
| "learning_rate": 0.00011350288411116938, |
| "loss": 0.2834, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.43471856228620065, |
| "grad_norm": 1.570809245109558, |
| "learning_rate": 0.00011324069218668065, |
| "loss": 0.2885, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.4360279555460988, |
| "grad_norm": 1.4093183279037476, |
| "learning_rate": 0.00011297850026219192, |
| "loss": 0.2872, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.437337348805997, |
| "grad_norm": 0.8298211097717285, |
| "learning_rate": 0.00011271630833770321, |
| "loss": 0.2884, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.4386467420658952, |
| "grad_norm": 1.1143261194229126, |
| "learning_rate": 0.00011245411641321448, |
| "loss": 0.279, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.4399561353257934, |
| "grad_norm": 1.1568537950515747, |
| "learning_rate": 0.00011219192448872575, |
| "loss": 0.2724, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.4412655285856916, |
| "grad_norm": 0.8700618147850037, |
| "learning_rate": 0.00011192973256423702, |
| "loss": 0.2563, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.4425749218455898, |
| "grad_norm": 0.974319577217102, |
| "learning_rate": 0.00011166754063974829, |
| "loss": 0.2864, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.443884315105488, |
| "grad_norm": 0.9288910031318665, |
| "learning_rate": 0.00011140534871525958, |
| "loss": 0.2717, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.4451937083653862, |
| "grad_norm": 1.0942648649215698, |
| "learning_rate": 0.00011114315679077085, |
| "loss": 0.2625, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.4465031016252844, |
| "grad_norm": 1.3224159479141235, |
| "learning_rate": 0.00011088096486628212, |
| "loss": 0.2719, |
| "step": 3410 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 7638, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.8131850187780976e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|