| { | |
| "best_global_step": 626, | |
| "best_metric": 4.402504920959473, | |
| "best_model_checkpoint": "/home/deployer/laion/Orpheus-3B-Continued-2E-V4-WithGen/checkpoint-626", | |
| "epoch": 0.12624531117654175, | |
| "eval_steps": 313, | |
| "global_step": 4382, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00020166982616060984, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 1.8e-05, | |
| "loss": 4.9769, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0004033396523212197, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 3.8e-05, | |
| "loss": 4.8382, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0006050094784818296, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 5.8e-05, | |
| "loss": 4.6796, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0008066793046424393, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 4.6557, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0010083491308030493, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 9.8e-05, | |
| "loss": 4.6592, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0012100189569636591, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 0.000118, | |
| "loss": 4.612, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.001411688783124269, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.000138, | |
| "loss": 4.5519, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0016133586092848787, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 0.00015800000000000002, | |
| "loss": 4.5649, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0018150284354454887, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 0.00017800000000000002, | |
| "loss": 4.5665, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0020166982616060987, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 0.00019800000000000002, | |
| "loss": 4.5854, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0022183680877667085, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00019999998367737306, | |
| "loss": 4.6197, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0024200379139273183, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 0.00019999992725348425, | |
| "loss": 4.5047, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.002621707740087928, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 0.00019999983052684242, | |
| "loss": 4.5378, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.002823377566248538, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001999996934974865, | |
| "loss": 4.5702, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0030250473924091476, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.00019999951616547182, | |
| "loss": 4.5161, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0032267172185697574, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 0.00019999929853086975, | |
| "loss": 4.5427, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0034283870447303676, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 0.00019999904059376803, | |
| "loss": 4.5205, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0036300568708909774, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00019999874235427067, | |
| "loss": 4.5382, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.003831726697051587, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 0.0001999984038124978, | |
| "loss": 4.5279, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.004033396523212197, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 0.0001999980249685859, | |
| "loss": 4.5047, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.004235066349372807, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00019999760582268763, | |
| "loss": 4.5041, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.004436736175533417, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 0.00019999714637497192, | |
| "loss": 4.5513, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.004638406001694026, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 0.00019999664662562398, | |
| "loss": 4.5115, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0048400758278546365, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.0001999961065748452, | |
| "loss": 4.5027, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.005041745654015246, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 0.00019999552622285317, | |
| "loss": 4.448, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.005243415480175856, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 0.0001999949055698819, | |
| "loss": 4.5337, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.005445085306336466, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 0.00019999424461618145, | |
| "loss": 4.5358, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.005646755132497076, | |
| "grad_norm": 0.06103515625, | |
| "learning_rate": 0.00019999354336201828, | |
| "loss": 4.5168, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.005848424958657686, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.0001999928018076749, | |
| "loss": 4.4455, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.006050094784818295, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 0.00019999201995345026, | |
| "loss": 4.4747, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.006251764610978905, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 0.00019999119779965947, | |
| "loss": 4.4948, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.006312265558827088, | |
| "eval_loss": 4.493932723999023, | |
| "eval_runtime": 8.8208, | |
| "eval_samples_per_second": 22.674, | |
| "eval_steps_per_second": 1.474, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.00028233775662485377, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 0.00019996118655688004, | |
| "loss": 4.4607, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0006856774089460735, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 0.00019995756127956854, | |
| "loss": 4.4336, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0010890170612672932, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 0.00019995377420631467, | |
| "loss": 4.4016, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.001492356713588513, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 0.00019994982534324835, | |
| "loss": 4.2976, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0018956963659097325, | |
| "grad_norm": 0.05224609375, | |
| "learning_rate": 0.00019994571469676142, | |
| "loss": 4.2416, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0022990360182309523, | |
| "grad_norm": 0.052734375, | |
| "learning_rate": 0.00019994144227350756, | |
| "loss": 4.1895, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.002702375670552172, | |
| "grad_norm": 0.058349609375, | |
| "learning_rate": 0.00019993700808040233, | |
| "loss": 4.1082, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.003105715322873392, | |
| "grad_norm": 0.057861328125, | |
| "learning_rate": 0.0001999324121246231, | |
| "loss": 4.148, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0035090549751946114, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00019992765441360905, | |
| "loss": 4.1184, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.003912394627515831, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 0.00019992273495506133, | |
| "loss": 4.1018, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.004315734279837051, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 0.00019991765375694276, | |
| "loss": 4.0995, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.0047190739321582706, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.00019991241082747795, | |
| "loss": 4.1194, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.00512241358447949, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.00019990700617515344, | |
| "loss": 4.0612, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.00552575323680071, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00019990143980871738, | |
| "loss": 4.1001, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.005929092889121929, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.00019989571173717975, | |
| "loss": 4.075, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.00633243254144315, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 0.00019988982196981233, | |
| "loss": 4.1117, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.006735772193764369, | |
| "grad_norm": 0.0517578125, | |
| "learning_rate": 0.00019988377051614854, | |
| "loss": 4.5104, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.007139111846085589, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 0.00019987755738598356, | |
| "loss": 4.5086, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.007542451498406808, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 0.00019987118258937416, | |
| "loss": 4.4517, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.007945791150728028, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 0.000199864646136639, | |
| "loss": 4.419, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.008349130803049248, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 0.00019985794803835825, | |
| "loss": 4.4749, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.008752470455370467, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 0.00019985108830537372, | |
| "loss": 4.4646, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.009155810107691688, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 0.00019984406694878895, | |
| "loss": 4.4207, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.009559149760012906, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 0.00019983688397996898, | |
| "loss": 4.4308, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.009962489412334127, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 0.00019982953941054054, | |
| "loss": 4.4311, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.010365829064655347, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 0.00019982203325239186, | |
| "loss": 4.4623, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.010769168716976566, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 0.00019981436551767275, | |
| "loss": 4.4461, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.011172508369297786, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 0.00019980653621879462, | |
| "loss": 4.4197, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.011575848021619005, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 0.00019979854536843027, | |
| "loss": 4.4135, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.011979187673940225, | |
| "grad_norm": 0.0498046875, | |
| "learning_rate": 0.0001997903929795141, | |
| "loss": 4.4105, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.012382527326261444, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 0.00019978207906524192, | |
| "loss": 4.4107, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.012624531117654176, | |
| "eval_loss": 4.402504920959473, | |
| "eval_runtime": 3.1085, | |
| "eval_samples_per_second": 64.339, | |
| "eval_steps_per_second": 8.042, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.00016133586092848787, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 0.000199773603639071, | |
| "loss": 4.2779, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.0005646755132497075, | |
| "grad_norm": 0.0517578125, | |
| "learning_rate": 0.0001997649667147201, | |
| "loss": 4.2592, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.0009680151655709273, | |
| "grad_norm": 0.053466796875, | |
| "learning_rate": 0.00019975616830616937, | |
| "loss": 4.2228, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.001371354817892147, | |
| "grad_norm": 0.055908203125, | |
| "learning_rate": 0.00019974720842766023, | |
| "loss": 4.122, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.0017746944702133668, | |
| "grad_norm": 0.057373046875, | |
| "learning_rate": 0.00019973808709369565, | |
| "loss": 4.0099, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.0021780341225345863, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 0.00019972880431903977, | |
| "loss": 3.9065, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.002581373774855806, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 0.00019971936011871816, | |
| "loss": 3.7953, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.002984713427177026, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.00019970975450801762, | |
| "loss": 3.7788, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0033880530794982455, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.00019969998750248626, | |
| "loss": 3.7368, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.003791392731819465, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001996900591179334, | |
| "loss": 3.7229, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.004194732384140685, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001996799693704296, | |
| "loss": 3.7192, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.004598072036461905, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.00019966971827630654, | |
| "loss": 3.7457, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.005001411688783124, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.00019965930585215714, | |
| "loss": 3.7031, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.005404751341104344, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.00019964873211483547, | |
| "loss": 3.7335, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.005808090993425563, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 0.00019963799708145664, | |
| "loss": 3.6902, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.006211430645746784, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 0.00019962710076939686, | |
| "loss": 3.7408, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.006614770298068003, | |
| "grad_norm": 0.062255859375, | |
| "learning_rate": 0.00019961604319629342, | |
| "loss": 4.204, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.007018109950389223, | |
| "grad_norm": 0.053955078125, | |
| "learning_rate": 0.00019960482438004462, | |
| "loss": 4.364, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.007421449602710442, | |
| "grad_norm": 0.053955078125, | |
| "learning_rate": 0.00019959344433880978, | |
| "loss": 4.3305, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.007824789255031663, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 0.0001995819030910091, | |
| "loss": 4.2992, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.008228128907352882, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 0.00019957020065532386, | |
| "loss": 4.3347, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.008631468559674102, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 0.0001995583370506961, | |
| "loss": 4.321, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.00903480821199532, | |
| "grad_norm": 0.0517578125, | |
| "learning_rate": 0.00019954631229632884, | |
| "loss": 4.3122, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.009438147864316541, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 0.00019953412641168588, | |
| "loss": 4.2918, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.00984148751663776, | |
| "grad_norm": 0.052490234375, | |
| "learning_rate": 0.00019952177941649185, | |
| "loss": 4.2793, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.01024482716895898, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 0.00019950927133073222, | |
| "loss": 4.3363, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.0106481668212802, | |
| "grad_norm": 0.05126953125, | |
| "learning_rate": 0.00019949660217465307, | |
| "loss": 4.311, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.01105150647360142, | |
| "grad_norm": 0.05126953125, | |
| "learning_rate": 0.00019948377196876138, | |
| "loss": 4.2843, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01145484612592264, | |
| "grad_norm": 0.053466796875, | |
| "learning_rate": 0.00019947078073382466, | |
| "loss": 4.2645, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.011858185778243858, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 0.00019945762849087113, | |
| "loss": 4.2695, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.012261525430565079, | |
| "grad_norm": 0.054931640625, | |
| "learning_rate": 0.00019944431526118964, | |
| "loss": 4.279, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.012624531117654176, | |
| "eval_loss": 4.440088272094727, | |
| "eval_runtime": 3.1398, | |
| "eval_samples_per_second": 63.698, | |
| "eval_steps_per_second": 7.962, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 4.033396523212197e-05, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 0.0, | |
| "loss": 4.0975, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.0004436736175533417, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 2e-05, | |
| "loss": 4.1075, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.0008470132698745614, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 4e-05, | |
| "loss": 4.015, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.001250352922195781, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 6e-05, | |
| "loss": 3.8564, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.0016536925745170008, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 8e-05, | |
| "loss": 3.5457, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.0020570322268382204, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 0.0001, | |
| "loss": 3.2341, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.00246037187915944, | |
| "grad_norm": 0.234375, | |
| "learning_rate": 0.00012, | |
| "loss": 3.0956, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.00286371153148066, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 0.00014, | |
| "loss": 2.9331, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.0032670511838018795, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 0.00016, | |
| "loss": 2.8938, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.003670390836123099, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 0.00018, | |
| "loss": 2.8145, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.004073730488444319, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 0.0002, | |
| "loss": 2.9022, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.004477070140765539, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 0.0001999999190676822, | |
| "loss": 2.9572, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.004880409793086758, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 0.00019999967627085973, | |
| "loss": 2.9381, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.005283749445407978, | |
| "grad_norm": 0.2470703125, | |
| "learning_rate": 0.00019999927160992563, | |
| "loss": 2.9392, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.005687089097729197, | |
| "grad_norm": 0.267578125, | |
| "learning_rate": 0.00019999870508553488, | |
| "loss": 2.8675, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.006090428750050418, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 0.00019999797669860455, | |
| "loss": 2.9042, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.006493768402371637, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 0.00019999708645031353, | |
| "loss": 3.4063, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.006897108054692857, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.00019999603434210292, | |
| "loss": 4.137, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.0073004477070140765, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 0.00019999482037567565, | |
| "loss": 4.1305, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.007703787359335296, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 0.00019999344455299674, | |
| "loss": 4.0303, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.008107127011656516, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.0001999919068762931, | |
| "loss": 4.0717, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.008510466663977735, | |
| "grad_norm": 0.059326171875, | |
| "learning_rate": 0.00019999020734805373, | |
| "loss": 4.0664, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.008913806316298956, | |
| "grad_norm": 0.060302734375, | |
| "learning_rate": 0.0001999883459710296, | |
| "loss": 4.0298, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.009317145968620174, | |
| "grad_norm": 0.057861328125, | |
| "learning_rate": 0.00019998632274823358, | |
| "loss": 4.0348, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.009720485620941395, | |
| "grad_norm": 0.06201171875, | |
| "learning_rate": 0.00019998413768294052, | |
| "loss": 4.0192, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.010123825273262615, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 0.0001999817907786873, | |
| "loss": 4.033, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.010527164925583834, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 0.00019997928203927275, | |
| "loss": 4.0413, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.010930504577905054, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 0.00019997661146875758, | |
| "loss": 4.0011, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.011333844230226273, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 0.00019997377907146459, | |
| "loss": 3.9817, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.011737183882547493, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 0.0001999707848519783, | |
| "loss": 4.0007, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.012140523534868712, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 0.0001999676288151454, | |
| "loss": 4.0265, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.012543863187189933, | |
| "grad_norm": 0.060302734375, | |
| "learning_rate": 0.00019996431096607438, | |
| "loss": 4.172, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.012624531117654176, | |
| "eval_loss": 4.49122428894043, | |
| "eval_runtime": 3.1912, | |
| "eval_samples_per_second": 62.672, | |
| "eval_steps_per_second": 7.834, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.00032267172185697574, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 4.1169, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.0007260113741781954, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 4.0418, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.001129351026499415, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 5.400000000000001e-06, | |
| "loss": 3.9211, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.0015326906788206349, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 7.4e-06, | |
| "loss": 3.6888, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.0019360303311418546, | |
| "grad_norm": 0.375, | |
| "learning_rate": 9.4e-06, | |
| "loss": 3.4768, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0023393699834630744, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 1.14e-05, | |
| "loss": 3.242, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.002742709635784294, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 1.3400000000000002e-05, | |
| "loss": 2.9401, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.0031460492881055136, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 1.54e-05, | |
| "loss": 2.7321, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.0035493889404267336, | |
| "grad_norm": 0.23828125, | |
| "learning_rate": 1.7400000000000003e-05, | |
| "loss": 2.413, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.003952728592747953, | |
| "grad_norm": 0.2353515625, | |
| "learning_rate": 1.94e-05, | |
| "loss": 2.2395, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.004356068245069173, | |
| "grad_norm": 0.2353515625, | |
| "learning_rate": 1.9999999012581816e-05, | |
| "loss": 2.2062, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.004759407897390392, | |
| "grad_norm": 0.232421875, | |
| "learning_rate": 1.999999417624832e-05, | |
| "loss": 2.1174, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.005162747549711612, | |
| "grad_norm": 0.2265625, | |
| "learning_rate": 1.999998530963894e-05, | |
| "loss": 1.9498, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.005566087202032832, | |
| "grad_norm": 0.23046875, | |
| "learning_rate": 1.999997241275724e-05, | |
| "loss": 1.8648, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.005969426854354052, | |
| "grad_norm": 0.2392578125, | |
| "learning_rate": 1.9999955485608426e-05, | |
| "loss": 1.8192, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.006372766506675271, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.999993452819932e-05, | |
| "loss": 2.0548, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.006776106158996491, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 1.999990954053836e-05, | |
| "loss": 3.5804, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.0071794458113177105, | |
| "grad_norm": 0.21484375, | |
| "learning_rate": 1.9999880522635625e-05, | |
| "loss": 3.8265, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.00758278546363893, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 1.999984747450281e-05, | |
| "loss": 3.7124, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.00798612511596015, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 1.9999810396153232e-05, | |
| "loss": 3.7061, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.00838946476828137, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 1.9999769287601834e-05, | |
| "loss": 3.68, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.008792804420602589, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 1.9999724148865183e-05, | |
| "loss": 3.6099, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.00919614407292381, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 1.9999674979961473e-05, | |
| "loss": 3.613, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.00959948372524503, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 1.999962178091052e-05, | |
| "loss": 3.5696, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.010002823377566248, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 1.9999564551733764e-05, | |
| "loss": 3.5333, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.010406163029887469, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 1.9999503292454275e-05, | |
| "loss": 3.5119, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.010809502682208687, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 1.9999438003096733e-05, | |
| "loss": 3.403, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.011212842334529908, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 1.9999368683687457e-05, | |
| "loss": 3.3882, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.011616181986851127, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 1.999929533425439e-05, | |
| "loss": 3.3576, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.012019521639172347, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 1.999921795482708e-05, | |
| "loss": 3.4566, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.012422861291493567, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 1.9999136545436727e-05, | |
| "loss": 3.7767, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.012826200943814786, | |
| "grad_norm": 0.2177734375, | |
| "learning_rate": 1.999905110611613e-05, | |
| "loss": 4.5098, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.013229540596136007, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 1.9998961636899736e-05, | |
| "loss": 4.6336, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.013632880248457225, | |
| "grad_norm": 0.12255859375, | |
| "learning_rate": 1.999886813782359e-05, | |
| "loss": 4.5697, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.014036219900778446, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 1.999877060892538e-05, | |
| "loss": 4.5598, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.014439559553099664, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 1.9998669050244416e-05, | |
| "loss": 4.5326, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.014842899205420885, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 1.999856346182163e-05, | |
| "loss": 4.5219, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.015246238857742105, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 1.999845384369957e-05, | |
| "loss": 4.5013, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.015649578510063326, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 1.9998340195922418e-05, | |
| "loss": 4.4859, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.016052918162384543, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 1.999822251853598e-05, | |
| "loss": 4.5328, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.016456257814705763, | |
| "grad_norm": 0.05859375, | |
| "learning_rate": 1.9998100811587686e-05, | |
| "loss": 4.5024, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.016859597467026984, | |
| "grad_norm": 0.06201171875, | |
| "learning_rate": 1.9997975075126573e-05, | |
| "loss": 4.5178, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.017262937119348204, | |
| "grad_norm": 0.05908203125, | |
| "learning_rate": 1.9997845309203333e-05, | |
| "loss": 4.4892, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.017666276771669424, | |
| "grad_norm": 0.055908203125, | |
| "learning_rate": 1.9997711513870257e-05, | |
| "loss": 4.4645, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.01806961642399064, | |
| "grad_norm": 0.0546875, | |
| "learning_rate": 1.9997573689181272e-05, | |
| "loss": 4.4891, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.018472956076311862, | |
| "grad_norm": 0.054443359375, | |
| "learning_rate": 1.999743183519192e-05, | |
| "loss": 4.4604, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.018876295728633082, | |
| "grad_norm": 0.05224609375, | |
| "learning_rate": 1.9997285951959372e-05, | |
| "loss": 4.4935, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.019279635380954303, | |
| "grad_norm": 0.052978515625, | |
| "learning_rate": 1.999713603954243e-05, | |
| "loss": 4.4723, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.01968297503327552, | |
| "grad_norm": 0.052734375, | |
| "learning_rate": 1.9996982098001508e-05, | |
| "loss": 4.4923, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.02008631468559674, | |
| "grad_norm": 0.050537109375, | |
| "learning_rate": 1.9996824127398648e-05, | |
| "loss": 4.4402, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.02048965433791796, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 1.999666212779752e-05, | |
| "loss": 4.4716, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.02089299399023918, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 1.999649609926341e-05, | |
| "loss": 4.4952, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.0212963336425604, | |
| "grad_norm": 0.052490234375, | |
| "learning_rate": 1.9996326041863236e-05, | |
| "loss": 4.4755, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.02169967329488162, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 1.9996151955665535e-05, | |
| "loss": 4.4645, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.02210301294720284, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 1.9995973840740467e-05, | |
| "loss": 4.474, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.02250635259952406, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 1.999579169715982e-05, | |
| "loss": 4.4614, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.02290969225184528, | |
| "grad_norm": 0.049560546875, | |
| "learning_rate": 1.9995605524996996e-05, | |
| "loss": 4.4622, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.0233130319041665, | |
| "grad_norm": 0.052490234375, | |
| "learning_rate": 1.9995415324327038e-05, | |
| "loss": 4.4785, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.023716371556487717, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 1.9995221095226597e-05, | |
| "loss": 4.5104, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.024119711208808937, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 1.999502283777395e-05, | |
| "loss": 4.4759, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.024523050861130158, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9994820552049002e-05, | |
| "loss": 4.4359, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.02492639051345138, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9994614238133282e-05, | |
| "loss": 4.4895, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.0253297301657726, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9994403896109942e-05, | |
| "loss": 4.4755, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.025733069818093816, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9994189526063746e-05, | |
| "loss": 4.4661, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.026136409470415036, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 1.99939711280811e-05, | |
| "loss": 4.4775, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.026539749122736257, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 1.999374870225003e-05, | |
| "loss": 4.4954, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.026943088775057477, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.9993522248660163e-05, | |
| "loss": 4.4905, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.027346428427378694, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 1.9993291767402776e-05, | |
| "loss": 4.4545, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.027749768079699914, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9993057258570762e-05, | |
| "loss": 4.4737, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.028153107732021135, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 1.9992818722258626e-05, | |
| "loss": 4.4908, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.028556447384342355, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 1.9992576158562515e-05, | |
| "loss": 4.4893, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.028959787036663576, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 1.999232956758018e-05, | |
| "loss": 4.4452, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.029363126688984793, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.999207894941101e-05, | |
| "loss": 4.4901, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.029766466341306013, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 1.9991824304156006e-05, | |
| "loss": 4.419, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.030169805993627234, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 1.99915656319178e-05, | |
| "loss": 4.4521, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.030573145645948454, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.999130293280065e-05, | |
| "loss": 4.4561, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.030976485298269674, | |
| "grad_norm": 0.0595703125, | |
| "learning_rate": 1.9991036206910417e-05, | |
| "loss": 4.4544, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.03137982495059089, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 1.999076545435461e-05, | |
| "loss": 4.4713, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.03178316460291211, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 1.999049067524235e-05, | |
| "loss": 4.4408, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.03218650425523333, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9990211869684374e-05, | |
| "loss": 4.4686, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.03258984390755455, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 1.998992903779305e-05, | |
| "loss": 4.4673, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.03299318355987577, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 1.9989642179682374e-05, | |
| "loss": 4.4302, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.033396523212196993, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.998935129546795e-05, | |
| "loss": 4.4844, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.033799862864518214, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9989056385267015e-05, | |
| "loss": 4.4739, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.03420320251683943, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 1.9988757449198428e-05, | |
| "loss": 4.499, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.03460654216916065, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.9988454487382667e-05, | |
| "loss": 4.4517, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.03500988182148187, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9988147499941832e-05, | |
| "loss": 4.4608, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.03541322147380309, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.998783648699965e-05, | |
| "loss": 4.4515, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.03581656112612431, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9987521448681465e-05, | |
| "loss": 4.4646, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.03621990077844553, | |
| "grad_norm": 0.05908203125, | |
| "learning_rate": 1.9987202385114252e-05, | |
| "loss": 4.4586, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.03662324043076675, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 1.99868792964266e-05, | |
| "loss": 4.4699, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.03702658008308797, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.9986552182748715e-05, | |
| "loss": 4.4678, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.03742991973540919, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9986221044212442e-05, | |
| "loss": 4.4581, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.037833259387730404, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.998588588095124e-05, | |
| "loss": 4.4273, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.038236599040051625, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9985546693100186e-05, | |
| "loss": 4.4602, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.038639938692372845, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 1.9985203480795977e-05, | |
| "loss": 4.4766, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.039043278344694066, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9984856244176948e-05, | |
| "loss": 4.4579, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.039446617997015286, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.998450498338303e-05, | |
| "loss": 4.4661, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.039849957649336507, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9984149698555808e-05, | |
| "loss": 4.4415, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.04025329730165773, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.998379038983846e-05, | |
| "loss": 4.4461, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.04065663695397895, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9983427057375802e-05, | |
| "loss": 4.4627, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.04105997660630017, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9983059701314267e-05, | |
| "loss": 4.4399, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.04146331625862139, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9982688321801906e-05, | |
| "loss": 4.4673, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.0418666559109426, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.99823129189884e-05, | |
| "loss": 4.478, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.04226999556326382, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9981933493025044e-05, | |
| "loss": 4.4433, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.04267333521558504, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9981550044064756e-05, | |
| "loss": 4.4535, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.04307667486790626, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.998116257226208e-05, | |
| "loss": 4.4665, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.043480014520227483, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9980771077773177e-05, | |
| "loss": 4.4419, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.043883354172548704, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.9980375560755833e-05, | |
| "loss": 4.4724, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.044286693824869924, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.997997602136944e-05, | |
| "loss": 4.4485, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.044690033477191145, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.997957245977504e-05, | |
| "loss": 4.4484, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.045093373129512365, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.997916487613527e-05, | |
| "loss": 4.4594, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.04549671278183358, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.9978753270614403e-05, | |
| "loss": 4.4836, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.0459000524341548, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.997833764337832e-05, | |
| "loss": 4.4767, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.04630339208647602, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 1.9977917994594537e-05, | |
| "loss": 4.4358, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04670673173879724, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.997749432443218e-05, | |
| "loss": 4.4552, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.04711007139111846, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9977066633062002e-05, | |
| "loss": 4.4383, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.04751341104343968, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9976634920656374e-05, | |
| "loss": 4.4605, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.0479167506957609, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.9976199187389286e-05, | |
| "loss": 4.4496, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.04832009034808212, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.997575943343635e-05, | |
| "loss": 4.454, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.04872343000040334, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.997531565897481e-05, | |
| "loss": 4.4212, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.04912676965272456, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.9974867864183508e-05, | |
| "loss": 4.4441, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.049530109305045776, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.997441604924292e-05, | |
| "loss": 4.4085, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.049933448957366997, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.997396021433514e-05, | |
| "loss": 4.4616, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.05033678860968822, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 1.9973500359643885e-05, | |
| "loss": 4.4544, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.05074012826200944, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9973036485354485e-05, | |
| "loss": 4.453, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.05114346791433066, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.99725685916539e-05, | |
| "loss": 4.4722, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.05154680756665188, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 1.99720966787307e-05, | |
| "loss": 4.4332, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.0519501472189731, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.9971620746775077e-05, | |
| "loss": 4.4757, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.05235348687129432, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.997114079597885e-05, | |
| "loss": 4.4264, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.05275682652361554, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.997065682653545e-05, | |
| "loss": 4.4372, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.05316016617593675, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.997016883863993e-05, | |
| "loss": 4.4566, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.05356350582825797, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9969676832488965e-05, | |
| "loss": 4.4309, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.053966845480579194, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.9969180808280845e-05, | |
| "loss": 4.4621, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.054370185132900414, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9968680766215477e-05, | |
| "loss": 4.4465, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.054773524785221635, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9968176706494403e-05, | |
| "loss": 4.4239, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.055176864437542855, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.996766862932076e-05, | |
| "loss": 4.4567, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.055580204089864076, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.996715653489933e-05, | |
| "loss": 4.423, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.055983543742185296, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 1.9966640423436492e-05, | |
| "loss": 4.3849, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.056386883394506516, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9966120295140258e-05, | |
| "loss": 4.4788, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.05679022304682774, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.996559615022025e-05, | |
| "loss": 4.4313, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.05719356269914895, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.996506798888772e-05, | |
| "loss": 4.4493, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.05759690235147017, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 1.9964535811355524e-05, | |
| "loss": 4.4438, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.05800024200379139, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 1.996399961783815e-05, | |
| "loss": 4.413, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.05840358165611261, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9963459408551693e-05, | |
| "loss": 4.4538, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.05880692130843383, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.996291518371388e-05, | |
| "loss": 4.466, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.05921026096075505, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9962366943544045e-05, | |
| "loss": 4.4963, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.05961360061307627, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9961814688263138e-05, | |
| "loss": 4.46, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.06001694026539749, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9961258418093745e-05, | |
| "loss": 4.4481, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.060420279917718714, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9960698133260053e-05, | |
| "loss": 4.442, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.06082361957003993, | |
| "grad_norm": 0.052490234375, | |
| "learning_rate": 1.9960133833987866e-05, | |
| "loss": 4.4473, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.06122695922236115, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 1.9959565520504625e-05, | |
| "loss": 4.4286, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.06163029887468237, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.9958993193039365e-05, | |
| "loss": 4.4616, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.06203363852700359, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 1.9958416851822755e-05, | |
| "loss": 4.4409, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.06243697817932481, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 1.9957836497087074e-05, | |
| "loss": 4.4416, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.06284031783164602, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9957252129066227e-05, | |
| "loss": 4.4552, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.06324365748396725, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.9956663747995724e-05, | |
| "loss": 4.4536, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.06364699713628846, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.99560713541127e-05, | |
| "loss": 4.4544, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.06405033678860969, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 1.9955474947655912e-05, | |
| "loss": 4.4288, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.0644536764409309, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 1.995487452886572e-05, | |
| "loss": 4.4397, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.06485701609325213, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.995427009798411e-05, | |
| "loss": 4.4909, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.06526035574557335, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9953661655254695e-05, | |
| "loss": 4.4528, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.06566369539789457, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9953049200922684e-05, | |
| "loss": 4.4308, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.06606703505021579, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9952432735234918e-05, | |
| "loss": 4.4585, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.066470374702537, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 1.9951812258439846e-05, | |
| "loss": 4.4663, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.06687371435485823, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.995118777078754e-05, | |
| "loss": 4.4549, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.06727705400717944, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 1.9950559272529686e-05, | |
| "loss": 4.4434, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.06768039365950067, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9949926763919586e-05, | |
| "loss": 4.4086, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.06808373331182188, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9949290245212157e-05, | |
| "loss": 4.4456, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.06848707296414311, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.9948649716663936e-05, | |
| "loss": 4.4395, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.06889041261646432, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 1.994800517853307e-05, | |
| "loss": 4.4357, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.06929375226878555, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 1.9947356631079337e-05, | |
| "loss": 4.4129, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.06969709192110676, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.9946704074564105e-05, | |
| "loss": 4.4587, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.07010043157342799, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 1.994604750925038e-05, | |
| "loss": 4.4844, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.0705037712257492, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9945386935402775e-05, | |
| "loss": 4.4578, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07090711087807042, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9944722353287518e-05, | |
| "loss": 4.4306, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.07131045053039164, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 1.994405376317246e-05, | |
| "loss": 4.4542, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.07171379018271286, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9943381165327053e-05, | |
| "loss": 4.4166, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.07211712983503409, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9942704560022378e-05, | |
| "loss": 4.4745, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.0725204694873553, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9942023947531122e-05, | |
| "loss": 4.4555, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.07292380913967653, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 1.99413393281276e-05, | |
| "loss": 4.4133, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.07332714879199774, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.9940650702087718e-05, | |
| "loss": 4.4555, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.07373048844431897, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9939958069689026e-05, | |
| "loss": 4.4011, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.07413382809664018, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9939261431210664e-05, | |
| "loss": 4.4595, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.0745371677489614, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 1.9938560786933398e-05, | |
| "loss": 4.452, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.07494050740128262, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9937856137139612e-05, | |
| "loss": 4.4497, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.07534384705360384, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9937147482113296e-05, | |
| "loss": 4.4514, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.07574718670592506, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.993643482214006e-05, | |
| "loss": 4.4674, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.07615052635824628, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 1.9935718157507124e-05, | |
| "loss": 4.4503, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.0765538660105675, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9934997488503325e-05, | |
| "loss": 4.4512, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.07695720566288872, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.993427281541911e-05, | |
| "loss": 4.4441, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.07736054531520994, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.9933544138546542e-05, | |
| "loss": 4.4542, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.07776388496753116, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9932811458179305e-05, | |
| "loss": 4.4436, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.07816722461985237, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 1.993207477461268e-05, | |
| "loss": 4.4158, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.0785705642721736, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.993133408814358e-05, | |
| "loss": 4.4518, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.07897390392449481, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9930589399070515e-05, | |
| "loss": 4.4289, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.07937724357681604, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9929840707693618e-05, | |
| "loss": 4.4452, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.07978058322913725, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.9929088014314636e-05, | |
| "loss": 4.462, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.08018392288145848, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.992833131923692e-05, | |
| "loss": 4.4282, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.0805872625337797, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9927570622765443e-05, | |
| "loss": 4.4584, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.08099060218610092, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9926805925206784e-05, | |
| "loss": 4.455, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.08139394183842213, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.992603722686914e-05, | |
| "loss": 4.4233, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.08179728149074335, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9925264528062317e-05, | |
| "loss": 4.4435, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.08220062114306458, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9924487829097733e-05, | |
| "loss": 4.4385, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.08260396079538579, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 1.9923707130288415e-05, | |
| "loss": 4.4204, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.08300730044770702, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9922922431949017e-05, | |
| "loss": 4.4202, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.08341064010002823, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 1.9922133734395787e-05, | |
| "loss": 4.394, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.08381397975234946, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9921341037946592e-05, | |
| "loss": 4.4216, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.08421731940467067, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9920544342920913e-05, | |
| "loss": 4.4231, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.0846206590569919, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.991974364963984e-05, | |
| "loss": 4.4847, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.08502399870931311, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 1.9918938958426075e-05, | |
| "loss": 4.4063, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.08542733836163434, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9918130269603926e-05, | |
| "loss": 4.4668, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.08583067801395555, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.991731758349933e-05, | |
| "loss": 4.4454, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.08623401766627677, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9916500900439806e-05, | |
| "loss": 4.4527, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.086637357318598, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.991568022075451e-05, | |
| "loss": 4.4057, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.0870406969709192, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.9914855544774195e-05, | |
| "loss": 4.4503, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.08744403662324043, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.991402687283123e-05, | |
| "loss": 4.4968, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.08784737627556165, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9913194205259595e-05, | |
| "loss": 4.4642, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.08825071592788288, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9912357542394873e-05, | |
| "loss": 4.4283, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.08865405558020409, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9911516884574262e-05, | |
| "loss": 4.4776, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.08905739523252532, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9910672232136578e-05, | |
| "loss": 4.4578, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.08946073488484653, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.990982358542223e-05, | |
| "loss": 4.4718, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.08986407453716774, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.9908970944773255e-05, | |
| "loss": 4.4575, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.09026741418948897, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 1.9908114310533285e-05, | |
| "loss": 4.4147, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.09067075384181018, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 1.990725368304757e-05, | |
| "loss": 4.4363, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.09107409349413141, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.990638906266297e-05, | |
| "loss": 4.4575, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.09147743314645262, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.990552044972794e-05, | |
| "loss": 4.4545, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.09188077279877385, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.9904647844592572e-05, | |
| "loss": 4.4439, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.09228411245109507, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9903771247608535e-05, | |
| "loss": 4.4192, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.09268745210341629, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9902890659129125e-05, | |
| "loss": 4.4251, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.0930907917557375, | |
| "grad_norm": 0.041259765625, | |
| "learning_rate": 1.990200607950925e-05, | |
| "loss": 4.4339, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.09349413140805872, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9901117509105417e-05, | |
| "loss": 4.4635, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.09389747106037995, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.990022494827574e-05, | |
| "loss": 4.4255, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.09430081071270116, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9899328397379955e-05, | |
| "loss": 4.4412, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.09470415036502239, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 1.989842785677939e-05, | |
| "loss": 4.437, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.0951074900173436, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9897523326836987e-05, | |
| "loss": 4.4136, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.09551082966966483, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 1.98966148079173e-05, | |
| "loss": 4.4353, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.09591416932198604, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.989570230038649e-05, | |
| "loss": 4.4331, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.09631750897430727, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.989478580461232e-05, | |
| "loss": 4.4311, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.09672084862662848, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9893865320964162e-05, | |
| "loss": 4.3961, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.0971241882789497, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9892940849812997e-05, | |
| "loss": 4.4645, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.09752752793127092, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.9892012391531413e-05, | |
| "loss": 4.495, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.09793086758359214, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.989107994649361e-05, | |
| "loss": 4.4074, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.09833420723591337, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.989014351507538e-05, | |
| "loss": 4.441, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.09873754688823458, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.988920309765413e-05, | |
| "loss": 4.446, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.0991408865405558, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9888258694608886e-05, | |
| "loss": 4.4378, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.09954422619287702, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.988731030632026e-05, | |
| "loss": 4.4068, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.09994756584519825, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.988635793317048e-05, | |
| "loss": 4.4587, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.10035090549751946, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 1.9885401575543384e-05, | |
| "loss": 4.4586, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.10075424514984069, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.98844412338244e-05, | |
| "loss": 4.4342, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.1011575848021619, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.9883476908400587e-05, | |
| "loss": 4.4647, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.10156092445448311, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9882508599660583e-05, | |
| "loss": 4.4362, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.10196426410680434, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 1.9881536307994645e-05, | |
| "loss": 4.4402, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.10236760375912556, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9880560033794637e-05, | |
| "loss": 4.4238, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.10277094341144678, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9879579777454027e-05, | |
| "loss": 4.4556, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.103174283063768, | |
| "grad_norm": 0.049560546875, | |
| "learning_rate": 1.987859553936788e-05, | |
| "loss": 4.4287, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.10357762271608922, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 1.9877607319932872e-05, | |
| "loss": 4.421, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.10398096236841044, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.9876615119547286e-05, | |
| "loss": 4.4607, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.10438430202073166, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 1.9875618938611008e-05, | |
| "loss": 4.4433, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.10478764167305288, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.987461877752552e-05, | |
| "loss": 4.4713, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.10519098132537409, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9873614636693918e-05, | |
| "loss": 4.4631, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.10559432097769532, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9872606516520898e-05, | |
| "loss": 4.3911, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.10599766063001653, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9871594417412763e-05, | |
| "loss": 4.451, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.10640100028233776, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9870578339777416e-05, | |
| "loss": 4.4256, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.10680433993465897, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9869558284024363e-05, | |
| "loss": 4.4336, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.1072076795869802, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9868534250564713e-05, | |
| "loss": 4.464, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.10761101923930141, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 1.9867506239811188e-05, | |
| "loss": 4.4258, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.10801435889162264, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 1.9866474252178096e-05, | |
| "loss": 4.4037, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.10841769854394386, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9865438288081366e-05, | |
| "loss": 4.383, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.10882103819626507, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.986439834793851e-05, | |
| "loss": 4.4667, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.1092243778485863, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.986335443216866e-05, | |
| "loss": 4.4176, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.10962771750090751, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.9862306541192536e-05, | |
| "loss": 4.4293, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.11003105715322874, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 1.9861254675432478e-05, | |
| "loss": 4.4302, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.11043439680554995, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9860198835312408e-05, | |
| "loss": 4.4271, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.11083773645787118, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.985913902125786e-05, | |
| "loss": 4.4409, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.11124107611019239, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9858075233695974e-05, | |
| "loss": 4.4272, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.11164441576251362, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9857007473055482e-05, | |
| "loss": 4.426, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.11204775541483483, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9855935739766724e-05, | |
| "loss": 4.4782, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.11245109506715605, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 1.9854860034261635e-05, | |
| "loss": 4.4529, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.11285443471947727, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9853780356973757e-05, | |
| "loss": 4.413, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.11325777437179849, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9852696708338224e-05, | |
| "loss": 4.4179, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.11366111402411971, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 1.9851609088791783e-05, | |
| "loss": 4.4679, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.11406445367644093, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9850517498772775e-05, | |
| "loss": 4.4556, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.11446779332876215, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9849421938721137e-05, | |
| "loss": 4.4282, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.11487113298108337, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9848322409078412e-05, | |
| "loss": 4.4264, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.1152744726334046, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9847218910287743e-05, | |
| "loss": 4.4565, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.11567781228572581, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 1.9846111442793866e-05, | |
| "loss": 4.4284, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.11608115193804702, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.984500000704313e-05, | |
| "loss": 4.4254, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.11648449159036825, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9843884603483464e-05, | |
| "loss": 4.4437, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.11688783124268946, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9842765232564415e-05, | |
| "loss": 4.4201, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.11729117089501069, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9841641894737113e-05, | |
| "loss": 4.442, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.1176945105473319, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.98405145904543e-05, | |
| "loss": 4.4023, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.11809785019965313, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 1.9839383320170308e-05, | |
| "loss": 4.4158, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.11850118985197435, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9838248084341077e-05, | |
| "loss": 4.4407, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.11890452950429557, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.9837108883424128e-05, | |
| "loss": 4.4053, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.11930786915661679, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 1.98359657178786e-05, | |
| "loss": 4.453, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.11971120880893801, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9834818588165216e-05, | |
| "loss": 4.428, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.12011454846125923, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.98336674947463e-05, | |
| "loss": 4.4208, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.12051788811358044, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9832512438085776e-05, | |
| "loss": 4.4668, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.12092122776590167, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 1.9831353418649168e-05, | |
| "loss": 4.4151, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.12132456741822288, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 1.9830190436903587e-05, | |
| "loss": 4.4326, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.12172790707054411, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.982902349331775e-05, | |
| "loss": 4.4254, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.12213124672286532, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 1.9827852588361966e-05, | |
| "loss": 4.46, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.12253458637518655, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.982667772250815e-05, | |
| "loss": 4.4317, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.12293792602750776, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.9825498896229793e-05, | |
| "loss": 4.4348, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.12334126567982899, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 1.9824316110002e-05, | |
| "loss": 4.4246, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.1237446053321502, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9823129364301474e-05, | |
| "loss": 4.4576, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.12414794498447142, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.9821938659606496e-05, | |
| "loss": 4.4288, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.12455128463679264, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 1.9820743996396957e-05, | |
| "loss": 4.4153, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.12495462428911386, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 1.981954537515434e-05, | |
| "loss": 4.4412, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.12535796394143509, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 1.9818342796361723e-05, | |
| "loss": 4.4165, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.1257613035937563, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 1.981713626050378e-05, | |
| "loss": 4.4294, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.1261646432460775, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.9815925768066776e-05, | |
| "loss": 4.4297, | |
| "step": 4380 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 49586, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 313, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.014857922095612e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |